diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
index f330d2c45..9ef49fba3 100644
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@@ -5,7 +5,7 @@ run-name: Run the integration test suite from tests/integration in replay mode
 on:
   push:
     branches: [ main ]
-  pull_request_target:
+  pull_request:
     branches: [ main ]
     types: [opened, synchronize, reopened]
     paths:
@@ -34,7 +34,7 @@ on:
 
 concurrency:
   # Skip concurrency for pushes to main - each commit should be tested independently
-  group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.event.pull_request.number }}
+  group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
   cancel-in-progress: true
 
 jobs:
diff --git a/.github/workflows/integration-vector-io-tests.yml b/.github/workflows/integration-vector-io-tests.yml
index f4d28e407..99a44c147 100644
--- a/.github/workflows/integration-vector-io-tests.yml
+++ b/.github/workflows/integration-vector-io-tests.yml
@@ -14,9 +14,11 @@ on:
       - 'pyproject.toml'
       - 'requirements.txt'
       - '.github/workflows/integration-vector-io-tests.yml' # This workflow
+  schedule:
+    - cron: '0 0 * * *'  # (test on python 3.13) Daily at 12 AM UTC
 
 concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
+  group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
   cancel-in-progress: true
 
 jobs:
@@ -25,7 +27,7 @@ jobs:
     strategy:
       matrix:
         vector-io-provider: ["inline::faiss", "inline::sqlite-vec", "inline::milvus", "remote::chromadb", "remote::pgvector", "remote::weaviate", "remote::qdrant"]
-        python-version: ["3.12", "3.13"]
+        python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
       fail-fast: false # we want to run all tests regardless of failure
 
     steps:
diff --git a/.github/workflows/record-integration-tests.yml b/.github/workflows/record-integration-tests.yml
index 12957db27..b31709a4f 100644
--- a/.github/workflows/record-integration-tests.yml
+++ b/.github/workflows/record-integration-tests.yml
@@ -3,7 +3,7 @@ name: Integration Tests (Record)
 run-name: Run the integration test suite from tests/integration
 
 on:
-  pull_request:
+  pull_request_target:
     branches: [ main ]
     types: [opened, synchronize, labeled]
     paths:
@@ -23,7 +23,7 @@ on:
         default: 'ollama'
 
 concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
   cancel-in-progress: true
 
 jobs:
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 30843173c..4309f289a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -2,6 +2,7 @@ exclude: 'build/'
 
 default_language_version:
     python: python3.12
+    node: "22"
 
 repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
@@ -145,6 +146,20 @@ repos:
         pass_filenames: false
         require_serial: true
         files: ^.github/workflows/.*$
+      - id: ui-prettier
+        name: Format UI code with Prettier
+        entry: bash -c 'cd llama_stack/ui && npm run format'
+        language: system
+        files: ^llama_stack/ui/.*\.(ts|tsx)$
+        pass_filenames: false
+        require_serial: true
+      - id: ui-eslint
+        name: Lint UI code with ESLint
+        entry: bash -c 'cd llama_stack/ui && npm run lint -- --fix --quiet'
+        language: system
+        files: ^llama_stack/ui/.*\.(ts|tsx)$
+        pass_filenames: false
+        require_serial: true
 
 ci:
     autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
diff --git a/README.md b/README.md
index 8db4580a2..4df4a5372 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,5 @@
 # Llama Stack
 
-<a href="https://trendshift.io/repositories/11824" target="_blank"><img src="https://trendshift.io/api/badge/repositories/11824" alt="meta-llama%2Fllama-stack | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
-
------
 [![PyPI version](https://img.shields.io/pypi/v/llama_stack.svg)](https://pypi.org/project/llama_stack/)
 [![PyPI - Downloads](https://img.shields.io/pypi/dm/llama-stack)](https://pypi.org/project/llama-stack/)
 [![License](https://img.shields.io/pypi/l/llama_stack.svg)](https://github.com/meta-llama/llama-stack/blob/main/LICENSE)
diff --git a/docs/source/distributions/k8s-benchmark/openai-mock-server.py b/docs/source/distributions/k8s-benchmark/openai-mock-server.py
old mode 100644
new mode 100755
diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py
index 0f12a0865..30196c429 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -48,8 +48,8 @@ from llama_stack.providers.utils.responses.responses_store import ResponsesStore
 
 from .agent_instance import ChatAgent
 from .config import MetaReferenceAgentsImplConfig
-from .openai_responses import OpenAIResponsesImpl
 from .persistence import AgentInfo
+from .responses.openai_responses import OpenAIResponsesImpl
 
 logger = logging.getLogger()
 
diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
deleted file mode 100644
index 6aca4d68e..000000000
--- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
+++ /dev/null
@@ -1,1154 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import asyncio
-import json
-import time
-import uuid
-from collections.abc import AsyncIterator
-from typing import Any
-
-from openai.types.chat import ChatCompletionToolParam
-from pydantic import BaseModel
-
-from llama_stack.apis.agents import Order
-from llama_stack.apis.agents.openai_responses import (
-    AllowedToolsFilter,
-    ListOpenAIResponseInputItem,
-    ListOpenAIResponseObject,
-    OpenAIDeleteResponseObject,
-    OpenAIResponseContentPartOutputText,
-    OpenAIResponseInput,
-    OpenAIResponseInputFunctionToolCallOutput,
-    OpenAIResponseInputMessageContent,
-    OpenAIResponseInputMessageContentImage,
-    OpenAIResponseInputMessageContentText,
-    OpenAIResponseInputTool,
-    OpenAIResponseInputToolFileSearch,
-    OpenAIResponseInputToolMCP,
-    OpenAIResponseMessage,
-    OpenAIResponseObject,
-    OpenAIResponseObjectStream,
-    OpenAIResponseObjectStreamResponseCompleted,
-    OpenAIResponseObjectStreamResponseContentPartAdded,
-    OpenAIResponseObjectStreamResponseContentPartDone,
-    OpenAIResponseObjectStreamResponseCreated,
-    OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta,
-    OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone,
-    OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta,
-    OpenAIResponseObjectStreamResponseMcpCallArgumentsDone,
-    OpenAIResponseObjectStreamResponseMcpCallCompleted,
-    OpenAIResponseObjectStreamResponseMcpCallFailed,
-    OpenAIResponseObjectStreamResponseMcpCallInProgress,
-    OpenAIResponseObjectStreamResponseOutputItemAdded,
-    OpenAIResponseObjectStreamResponseOutputItemDone,
-    OpenAIResponseObjectStreamResponseOutputTextDelta,
-    OpenAIResponseObjectStreamResponseWebSearchCallCompleted,
-    OpenAIResponseObjectStreamResponseWebSearchCallInProgress,
-    OpenAIResponseObjectStreamResponseWebSearchCallSearching,
-    OpenAIResponseOutput,
-    OpenAIResponseOutputMessageContent,
-    OpenAIResponseOutputMessageContentOutputText,
-    OpenAIResponseOutputMessageFileSearchToolCall,
-    OpenAIResponseOutputMessageFileSearchToolCallResults,
-    OpenAIResponseOutputMessageFunctionToolCall,
-    OpenAIResponseOutputMessageMCPListTools,
-    OpenAIResponseOutputMessageWebSearchToolCall,
-    OpenAIResponseText,
-    OpenAIResponseTextFormat,
-    WebSearchToolTypes,
-)
-from llama_stack.apis.common.content_types import TextContentItem
-from llama_stack.apis.inference import (
-    Inference,
-    OpenAIAssistantMessageParam,
-    OpenAIChatCompletion,
-    OpenAIChatCompletionContentPartImageParam,
-    OpenAIChatCompletionContentPartParam,
-    OpenAIChatCompletionContentPartTextParam,
-    OpenAIChatCompletionToolCall,
-    OpenAIChatCompletionToolCallFunction,
-    OpenAIChoice,
-    OpenAIDeveloperMessageParam,
-    OpenAIImageURL,
-    OpenAIJSONSchema,
-    OpenAIMessageParam,
-    OpenAIResponseFormatJSONObject,
-    OpenAIResponseFormatJSONSchema,
-    OpenAIResponseFormatParam,
-    OpenAIResponseFormatText,
-    OpenAISystemMessageParam,
-    OpenAIToolMessageParam,
-    OpenAIUserMessageParam,
-)
-from llama_stack.apis.tools import ToolGroups, ToolInvocationResult, ToolRuntime
-from llama_stack.apis.vector_io import VectorIO
-from llama_stack.log import get_logger
-from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
-from llama_stack.providers.utils.inference.openai_compat import (
-    convert_tooldef_to_openai_tool,
-)
-from llama_stack.providers.utils.responses.responses_store import ResponsesStore
-
-logger = get_logger(name=__name__, category="openai_responses")
-
-OPENAI_RESPONSES_PREFIX = "openai_responses:"
-
-
-class ToolExecutionResult(BaseModel):
-    """Result of streaming tool execution."""
-
-    stream_event: OpenAIResponseObjectStream | None = None
-    sequence_number: int
-    final_output_message: OpenAIResponseOutput | None = None
-    final_input_message: OpenAIMessageParam | None = None
-
-
-async def _convert_response_content_to_chat_content(
-    content: (str | list[OpenAIResponseInputMessageContent] | list[OpenAIResponseOutputMessageContent]),
-) -> str | list[OpenAIChatCompletionContentPartParam]:
-    """
-    Convert the content parts from an OpenAI Response API request into OpenAI Chat Completion content parts.
-
-    The content schemas of each API look similar, but are not exactly the same.
-    """
-    if isinstance(content, str):
-        return content
-
-    converted_parts = []
-    for content_part in content:
-        if isinstance(content_part, OpenAIResponseInputMessageContentText):
-            converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text))
-        elif isinstance(content_part, OpenAIResponseOutputMessageContentOutputText):
-            converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text))
-        elif isinstance(content_part, OpenAIResponseInputMessageContentImage):
-            if content_part.image_url:
-                image_url = OpenAIImageURL(url=content_part.image_url, detail=content_part.detail)
-                converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
-        elif isinstance(content_part, str):
-            converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part))
-        else:
-            raise ValueError(
-                f"Llama Stack OpenAI Responses does not yet support content type '{type(content_part)}' in this context"
-            )
-    return converted_parts
-
-
-async def _convert_response_input_to_chat_messages(
-    input: str | list[OpenAIResponseInput],
-) -> list[OpenAIMessageParam]:
-    """
-    Convert the input from an OpenAI Response API request into OpenAI Chat Completion messages.
-    """
-    messages: list[OpenAIMessageParam] = []
-    if isinstance(input, list):
-        for input_item in input:
-            if isinstance(input_item, OpenAIResponseInputFunctionToolCallOutput):
-                messages.append(
-                    OpenAIToolMessageParam(
-                        content=input_item.output,
-                        tool_call_id=input_item.call_id,
-                    )
-                )
-            elif isinstance(input_item, OpenAIResponseOutputMessageFunctionToolCall):
-                tool_call = OpenAIChatCompletionToolCall(
-                    index=0,
-                    id=input_item.call_id,
-                    function=OpenAIChatCompletionToolCallFunction(
-                        name=input_item.name,
-                        arguments=input_item.arguments,
-                    ),
-                )
-                messages.append(OpenAIAssistantMessageParam(tool_calls=[tool_call]))
-            else:
-                content = await _convert_response_content_to_chat_content(input_item.content)
-                message_type = await _get_message_type_by_role(input_item.role)
-                if message_type is None:
-                    raise ValueError(
-                        f"Llama Stack OpenAI Responses does not yet support message role '{input_item.role}' in this context"
-                    )
-                messages.append(message_type(content=content))
-    else:
-        messages.append(OpenAIUserMessageParam(content=input))
-    return messages
-
-
-async def _convert_chat_choice_to_response_message(
-    choice: OpenAIChoice,
-) -> OpenAIResponseMessage:
-    """
-    Convert an OpenAI Chat Completion choice into an OpenAI Response output message.
-    """
-    output_content = ""
-    if isinstance(choice.message.content, str):
-        output_content = choice.message.content
-    elif isinstance(choice.message.content, OpenAIChatCompletionContentPartTextParam):
-        output_content = choice.message.content.text
-    else:
-        raise ValueError(
-            f"Llama Stack OpenAI Responses does not yet support output content type: {type(choice.message.content)}"
-        )
-
-    return OpenAIResponseMessage(
-        id=f"msg_{uuid.uuid4()}",
-        content=[OpenAIResponseOutputMessageContentOutputText(text=output_content)],
-        status="completed",
-        role="assistant",
-    )
-
-
-async def _convert_response_text_to_chat_response_format(
-    text: OpenAIResponseText,
-) -> OpenAIResponseFormatParam:
-    """
-    Convert an OpenAI Response text parameter into an OpenAI Chat Completion response format.
-    """
-    if not text.format or text.format["type"] == "text":
-        return OpenAIResponseFormatText(type="text")
-    if text.format["type"] == "json_object":
-        return OpenAIResponseFormatJSONObject()
-    if text.format["type"] == "json_schema":
-        return OpenAIResponseFormatJSONSchema(
-            json_schema=OpenAIJSONSchema(name=text.format["name"], schema=text.format["schema"])
-        )
-    raise ValueError(f"Unsupported text format: {text.format}")
-
-
-async def _get_message_type_by_role(role: str):
-    role_to_type = {
-        "user": OpenAIUserMessageParam,
-        "system": OpenAISystemMessageParam,
-        "assistant": OpenAIAssistantMessageParam,
-        "developer": OpenAIDeveloperMessageParam,
-    }
-    return role_to_type.get(role)
-
-
-class OpenAIResponsePreviousResponseWithInputItems(BaseModel):
-    input_items: ListOpenAIResponseInputItem
-    response: OpenAIResponseObject
-
-
-class ChatCompletionContext(BaseModel):
-    model: str
-    messages: list[OpenAIMessageParam]
-    response_tools: list[OpenAIResponseInputTool] | None = None
-    chat_tools: list[ChatCompletionToolParam] | None = None
-    mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP]
-    temperature: float | None
-    response_format: OpenAIResponseFormatParam
-
-
-class OpenAIResponsesImpl:
-    def __init__(
-        self,
-        inference_api: Inference,
-        tool_groups_api: ToolGroups,
-        tool_runtime_api: ToolRuntime,
-        responses_store: ResponsesStore,
-        vector_io_api: VectorIO,  # VectorIO
-    ):
-        self.inference_api = inference_api
-        self.tool_groups_api = tool_groups_api
-        self.tool_runtime_api = tool_runtime_api
-        self.responses_store = responses_store
-        self.vector_io_api = vector_io_api
-
-    async def _prepend_previous_response(
-        self,
-        input: str | list[OpenAIResponseInput],
-        previous_response_id: str | None = None,
-    ):
-        if previous_response_id:
-            previous_response_with_input = await self.responses_store.get_response_object(previous_response_id)
-
-            # previous response input items
-            new_input_items = previous_response_with_input.input
-
-            # previous response output items
-            new_input_items.extend(previous_response_with_input.output)
-
-            # new input items from the current request
-            if isinstance(input, str):
-                new_input_items.append(OpenAIResponseMessage(content=input, role="user"))
-            else:
-                new_input_items.extend(input)
-
-            input = new_input_items
-
-        return input
-
-    async def _prepend_instructions(self, messages, instructions):
-        if instructions:
-            messages.insert(0, OpenAISystemMessageParam(content=instructions))
-
-    async def get_openai_response(
-        self,
-        response_id: str,
-    ) -> OpenAIResponseObject:
-        response_with_input = await self.responses_store.get_response_object(response_id)
-        return OpenAIResponseObject(**{k: v for k, v in response_with_input.model_dump().items() if k != "input"})
-
-    async def list_openai_responses(
-        self,
-        after: str | None = None,
-        limit: int | None = 50,
-        model: str | None = None,
-        order: Order | None = Order.desc,
-    ) -> ListOpenAIResponseObject:
-        return await self.responses_store.list_responses(after, limit, model, order)
-
-    async def list_openai_response_input_items(
-        self,
-        response_id: str,
-        after: str | None = None,
-        before: str | None = None,
-        include: list[str] | None = None,
-        limit: int | None = 20,
-        order: Order | None = Order.desc,
-    ) -> ListOpenAIResponseInputItem:
-        """List input items for a given OpenAI response.
-
-        :param response_id: The ID of the response to retrieve input items for.
-        :param after: An item ID to list items after, used for pagination.
-        :param before: An item ID to list items before, used for pagination.
-        :param include: Additional fields to include in the response.
-        :param limit: A limit on the number of objects to be returned.
-        :param order: The order to return the input items in.
-        :returns: An ListOpenAIResponseInputItem.
-        """
-        return await self.responses_store.list_response_input_items(response_id, after, before, include, limit, order)
-
-    async def _store_response(
-        self,
-        response: OpenAIResponseObject,
-        input: str | list[OpenAIResponseInput],
-    ) -> None:
-        new_input_id = f"msg_{uuid.uuid4()}"
-        if isinstance(input, str):
-            # synthesize a message from the input string
-            input_content = OpenAIResponseInputMessageContentText(text=input)
-            input_content_item = OpenAIResponseMessage(
-                role="user",
-                content=[input_content],
-                id=new_input_id,
-            )
-            input_items_data = [input_content_item]
-        else:
-            # we already have a list of messages
-            input_items_data = []
-            for input_item in input:
-                if isinstance(input_item, OpenAIResponseMessage):
-                    # These may or may not already have an id, so dump to dict, check for id, and add if missing
-                    input_item_dict = input_item.model_dump()
-                    if "id" not in input_item_dict:
-                        input_item_dict["id"] = new_input_id
-                    input_items_data.append(OpenAIResponseMessage(**input_item_dict))
-                else:
-                    input_items_data.append(input_item)
-
-        await self.responses_store.store_response_object(
-            response_object=response,
-            input=input_items_data,
-        )
-
-    async def create_openai_response(
-        self,
-        input: str | list[OpenAIResponseInput],
-        model: str,
-        instructions: str | None = None,
-        previous_response_id: str | None = None,
-        store: bool | None = True,
-        stream: bool | None = False,
-        temperature: float | None = None,
-        text: OpenAIResponseText | None = None,
-        tools: list[OpenAIResponseInputTool] | None = None,
-        include: list[str] | None = None,
-        max_infer_iters: int | None = 10,
-    ):
-        stream = bool(stream)
-        text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text
-
-        stream_gen = self._create_streaming_response(
-            input=input,
-            model=model,
-            instructions=instructions,
-            previous_response_id=previous_response_id,
-            store=store,
-            temperature=temperature,
-            text=text,
-            tools=tools,
-            max_infer_iters=max_infer_iters,
-        )
-
-        if stream:
-            return stream_gen
-        else:
-            response = None
-            async for stream_chunk in stream_gen:
-                if stream_chunk.type == "response.completed":
-                    if response is not None:
-                        raise ValueError("The response stream completed multiple times! Earlier response: {response}")
-                    response = stream_chunk.response
-                    # don't leave the generator half complete!
-
-            if response is None:
-                raise ValueError("The response stream never completed")
-            return response
-
-    async def _create_streaming_response(
-        self,
-        input: str | list[OpenAIResponseInput],
-        model: str,
-        instructions: str | None = None,
-        previous_response_id: str | None = None,
-        store: bool | None = True,
-        temperature: float | None = None,
-        text: OpenAIResponseText | None = None,
-        tools: list[OpenAIResponseInputTool] | None = None,
-        max_infer_iters: int | None = 10,
-    ) -> AsyncIterator[OpenAIResponseObjectStream]:
-        output_messages: list[OpenAIResponseOutput] = []
-
-        # Input preprocessing
-        input = await self._prepend_previous_response(input, previous_response_id)
-        messages = await _convert_response_input_to_chat_messages(input)
-        await self._prepend_instructions(messages, instructions)
-
-        # Structured outputs
-        response_format = await _convert_response_text_to_chat_response_format(text)
-
-        # Tool setup, TODO: refactor this slightly since this can also yield events
-        chat_tools, mcp_tool_to_server, mcp_list_message = (
-            await self._convert_response_tools_to_chat_tools(tools) if tools else (None, {}, None)
-        )
-        if mcp_list_message:
-            output_messages.append(mcp_list_message)
-
-        ctx = ChatCompletionContext(
-            model=model,
-            messages=messages,
-            response_tools=tools,
-            chat_tools=chat_tools,
-            mcp_tool_to_server=mcp_tool_to_server,
-            temperature=temperature,
-            response_format=response_format,
-        )
-
-        # Create initial response and emit response.created immediately
-        response_id = f"resp-{uuid.uuid4()}"
-        created_at = int(time.time())
-
-        initial_response = OpenAIResponseObject(
-            created_at=created_at,
-            id=response_id,
-            model=model,
-            object="response",
-            status="in_progress",
-            output=output_messages.copy(),
-            text=text,
-        )
-
-        yield OpenAIResponseObjectStreamResponseCreated(response=initial_response)
-
-        n_iter = 0
-        messages = ctx.messages.copy()
-
-        while True:
-            completion_result = await self.inference_api.openai_chat_completion(
-                model=ctx.model,
-                messages=messages,
-                tools=ctx.chat_tools,
-                stream=True,
-                temperature=ctx.temperature,
-                response_format=ctx.response_format,
-            )
-
-            # Process streaming chunks and build complete response
-            chat_response_id = ""
-            chat_response_content = []
-            chat_response_tool_calls: dict[int, OpenAIChatCompletionToolCall] = {}
-            chunk_created = 0
-            chunk_model = ""
-            chunk_finish_reason = ""
-            sequence_number = 0
-
-            # Create a placeholder message item for delta events
-            message_item_id = f"msg_{uuid.uuid4()}"
-            # Track tool call items for streaming events
-            tool_call_item_ids: dict[int, str] = {}
-            # Track content parts for streaming events
-            content_part_emitted = False
-
-            async for chunk in completion_result:
-                chat_response_id = chunk.id
-                chunk_created = chunk.created
-                chunk_model = chunk.model
-                for chunk_choice in chunk.choices:
-                    # Emit incremental text content as delta events
-                    if chunk_choice.delta.content:
-                        # Emit content_part.added event for first text chunk
-                        if not content_part_emitted:
-                            content_part_emitted = True
-                            sequence_number += 1
-                            yield OpenAIResponseObjectStreamResponseContentPartAdded(
-                                response_id=response_id,
-                                item_id=message_item_id,
-                                part=OpenAIResponseContentPartOutputText(
-                                    text="",  # Will be filled incrementally via text deltas
-                                ),
-                                sequence_number=sequence_number,
-                            )
-                        sequence_number += 1
-                        yield OpenAIResponseObjectStreamResponseOutputTextDelta(
-                            content_index=0,
-                            delta=chunk_choice.delta.content,
-                            item_id=message_item_id,
-                            output_index=0,
-                            sequence_number=sequence_number,
-                        )
-
-                    # Collect content for final response
-                    chat_response_content.append(chunk_choice.delta.content or "")
-                    if chunk_choice.finish_reason:
-                        chunk_finish_reason = chunk_choice.finish_reason
-
-                    # Aggregate tool call arguments across chunks
-                    if chunk_choice.delta.tool_calls:
-                        for tool_call in chunk_choice.delta.tool_calls:
-                            response_tool_call = chat_response_tool_calls.get(tool_call.index, None)
-                            # Create new tool call entry if this is the first chunk for this index
-                            is_new_tool_call = response_tool_call is None
-                            if is_new_tool_call:
-                                tool_call_dict: dict[str, Any] = tool_call.model_dump()
-                                tool_call_dict.pop("type", None)
-                                response_tool_call = OpenAIChatCompletionToolCall(**tool_call_dict)
-                                chat_response_tool_calls[tool_call.index] = response_tool_call
-
-                                # Create item ID for this tool call for streaming events
-                                tool_call_item_id = f"fc_{uuid.uuid4()}"
-                                tool_call_item_ids[tool_call.index] = tool_call_item_id
-
-                                # Emit output_item.added event for the new function call
-                                sequence_number += 1
-                                function_call_item = OpenAIResponseOutputMessageFunctionToolCall(
-                                    arguments="",  # Will be filled incrementally via delta events
-                                    call_id=tool_call.id or "",
-                                    name=tool_call.function.name if tool_call.function else "",
-                                    id=tool_call_item_id,
-                                    status="in_progress",
-                                )
-                                yield OpenAIResponseObjectStreamResponseOutputItemAdded(
-                                    response_id=response_id,
-                                    item=function_call_item,
-                                    output_index=len(output_messages),
-                                    sequence_number=sequence_number,
-                                )
-
-                            # Stream tool call arguments as they arrive (differentiate between MCP and function calls)
-                            if tool_call.function and tool_call.function.arguments:
-                                tool_call_item_id = tool_call_item_ids[tool_call.index]
-                                sequence_number += 1
-
-                                # Check if this is an MCP tool call
-                                is_mcp_tool = (
-                                    ctx.mcp_tool_to_server
-                                    and tool_call.function.name
-                                    and tool_call.function.name in ctx.mcp_tool_to_server
-                                )
-                                if is_mcp_tool:
-                                    # Emit MCP-specific argument delta event
-                                    yield OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta(
-                                        delta=tool_call.function.arguments,
-                                        item_id=tool_call_item_id,
-                                        output_index=len(output_messages),
-                                        sequence_number=sequence_number,
-                                    )
-                                else:
-                                    # Emit function call argument delta event
-                                    yield OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta(
-                                        delta=tool_call.function.arguments,
-                                        item_id=tool_call_item_id,
-                                        output_index=len(output_messages),
-                                        sequence_number=sequence_number,
-                                    )
-
-                                # Accumulate arguments for final response (only for subsequent chunks)
-                                if not is_new_tool_call:
-                                    response_tool_call.function.arguments = (
-                                        response_tool_call.function.arguments or ""
-                                    ) + tool_call.function.arguments
-
-            # Emit arguments.done events for completed tool calls (differentiate between MCP and function calls)
-            for tool_call_index in sorted(chat_response_tool_calls.keys()):
-                tool_call_item_id = tool_call_item_ids[tool_call_index]
-                final_arguments = chat_response_tool_calls[tool_call_index].function.arguments or ""
-                tool_call_name = chat_response_tool_calls[tool_call_index].function.name
-
-                # Check if this is an MCP tool call
-                is_mcp_tool = ctx.mcp_tool_to_server and tool_call_name and tool_call_name in ctx.mcp_tool_to_server
-                sequence_number += 1
-                if is_mcp_tool:
-                    # Emit MCP-specific argument done event
-                    yield OpenAIResponseObjectStreamResponseMcpCallArgumentsDone(
-                        arguments=final_arguments,
-                        item_id=tool_call_item_id,
-                        output_index=len(output_messages),
-                        sequence_number=sequence_number,
-                    )
-                else:
-                    # Emit function call argument done event
-                    yield OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone(
-                        arguments=final_arguments,
-                        item_id=tool_call_item_id,
-                        output_index=len(output_messages),
-                        sequence_number=sequence_number,
-                    )
-
-            # Convert collected chunks to complete response
-            if chat_response_tool_calls:
-                tool_calls = [chat_response_tool_calls[i] for i in sorted(chat_response_tool_calls.keys())]
-            else:
-                tool_calls = None
-
-            # Emit content_part.done event if text content was streamed (before content gets cleared)
-            if content_part_emitted:
-                final_text = "".join(chat_response_content)
-                sequence_number += 1
-                yield OpenAIResponseObjectStreamResponseContentPartDone(
-                    response_id=response_id,
-                    item_id=message_item_id,
-                    part=OpenAIResponseContentPartOutputText(
-                        text=final_text,
-                    ),
-                    sequence_number=sequence_number,
-                )
-
-            # Clear content when there are tool calls (OpenAI spec behavior)
-            if chat_response_tool_calls:
-                chat_response_content = []
-
-            assistant_message = OpenAIAssistantMessageParam(
-                content="".join(chat_response_content),
-                tool_calls=tool_calls,
-            )
-            current_response = OpenAIChatCompletion(
-                id=chat_response_id,
-                choices=[
-                    OpenAIChoice(
-                        message=assistant_message,
-                        finish_reason=chunk_finish_reason,
-                        index=0,
-                    )
-                ],
-                created=chunk_created,
-                model=chunk_model,
-            )
-
-            function_tool_calls = []
-            non_function_tool_calls = []
-
-            next_turn_messages = messages.copy()
-            for choice in current_response.choices:
-                next_turn_messages.append(choice.message)
-
-                if choice.message.tool_calls and tools:
-                    for tool_call in choice.message.tool_calls:
-                        if _is_function_tool_call(tool_call, tools):
-                            function_tool_calls.append(tool_call)
-                        else:
-                            non_function_tool_calls.append(tool_call)
-                else:
-                    output_messages.append(await _convert_chat_choice_to_response_message(choice))
-
-            # execute non-function tool calls
-            for tool_call in non_function_tool_calls:
-                # Find the item_id for this tool call
-                matching_item_id = None
-                for index, item_id in tool_call_item_ids.items():
-                    response_tool_call = chat_response_tool_calls.get(index)
-                    if response_tool_call and response_tool_call.id == tool_call.id:
-                        matching_item_id = item_id
-                        break
-
-                # Use a fallback item_id if not found
-                if not matching_item_id:
-                    matching_item_id = f"tc_{uuid.uuid4()}"
-
-                # Execute tool call with streaming
-                tool_call_log = None
-                tool_response_message = None
-                async for result in self._execute_tool_call(
-                    tool_call, ctx, sequence_number, response_id, len(output_messages), matching_item_id
-                ):
-                    if result.stream_event:
-                        # Forward streaming events
-                        sequence_number = result.sequence_number
-                        yield result.stream_event
-
-                    if result.final_output_message is not None:
-                        tool_call_log = result.final_output_message
-                        tool_response_message = result.final_input_message
-                        sequence_number = result.sequence_number
-
-                if tool_call_log:
-                    output_messages.append(tool_call_log)
-
-                    # Emit output_item.done event for completed non-function tool call
-                    if matching_item_id:
-                        sequence_number += 1
-                        yield OpenAIResponseObjectStreamResponseOutputItemDone(
-                            response_id=response_id,
-                            item=tool_call_log,
-                            output_index=len(output_messages) - 1,
-                            sequence_number=sequence_number,
-                        )
-
-                if tool_response_message:
-                    next_turn_messages.append(tool_response_message)
-
-            for tool_call in function_tool_calls:
-                # Find the item_id for this tool call from our tracking dictionary
-                matching_item_id = None
-                for index, item_id in tool_call_item_ids.items():
-                    response_tool_call = chat_response_tool_calls.get(index)
-                    if response_tool_call and response_tool_call.id == tool_call.id:
-                        matching_item_id = item_id
-                        break
-
-                # Use existing item_id or create new one if not found
-                final_item_id = matching_item_id or f"fc_{uuid.uuid4()}"
-
-                function_call_item = OpenAIResponseOutputMessageFunctionToolCall(
-                    arguments=tool_call.function.arguments or "",
-                    call_id=tool_call.id,
-                    name=tool_call.function.name or "",
-                    id=final_item_id,
-                    status="completed",
-                )
-                output_messages.append(function_call_item)
-
-                # Emit output_item.done event for completed function call
-                sequence_number += 1
-                yield OpenAIResponseObjectStreamResponseOutputItemDone(
-                    response_id=response_id,
-                    item=function_call_item,
-                    output_index=len(output_messages) - 1,
-                    sequence_number=sequence_number,
-                )
-
-            if not function_tool_calls and not non_function_tool_calls:
-                break
-
-            if function_tool_calls:
-                logger.info("Exiting inference loop since there is a function (client-side) tool call")
-                break
-
-            n_iter += 1
-            if n_iter >= max_infer_iters:
-                logger.info(f"Exiting inference loop since iteration count({n_iter}) exceeds {max_infer_iters=}")
-                break
-
-            messages = next_turn_messages
-
-        # Create final response
-        final_response = OpenAIResponseObject(
-            created_at=created_at,
-            id=response_id,
-            model=model,
-            object="response",
-            status="completed",
-            text=text,
-            output=output_messages,
-        )
-
-        # Emit response.completed
-        yield OpenAIResponseObjectStreamResponseCompleted(response=final_response)
-
-        if store:
-            await self._store_response(
-                response=final_response,
-                input=input,
-            )
-
-    async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
-        return await self.responses_store.delete_response_object(response_id)
-
-    async def _convert_response_tools_to_chat_tools(
-        self, tools: list[OpenAIResponseInputTool]
-    ) -> tuple[
-        list[ChatCompletionToolParam],
-        dict[str, OpenAIResponseInputToolMCP],
-        OpenAIResponseOutput | None,
-    ]:
-        from llama_stack.apis.agents.openai_responses import (
-            MCPListToolsTool,
-        )
-        from llama_stack.apis.tools import Tool
-
-        mcp_tool_to_server = {}
-
-        def make_openai_tool(tool_name: str, tool: Tool) -> ChatCompletionToolParam:
-            tool_def = ToolDefinition(
-                tool_name=tool_name,
-                description=tool.description,
-                parameters={
-                    param.name: ToolParamDefinition(
-                        param_type=param.parameter_type,
-                        description=param.description,
-                        required=param.required,
-                        default=param.default,
-                    )
-                    for param in tool.parameters
-                },
-            )
-            return convert_tooldef_to_openai_tool(tool_def)
-
-        mcp_list_message = None
-        chat_tools: list[ChatCompletionToolParam] = []
-        for input_tool in tools:
-            # TODO: Handle other tool types
-            if input_tool.type == "function":
-                chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump()))
-            elif input_tool.type in WebSearchToolTypes:
-                tool_name = "web_search"
-                tool = await self.tool_groups_api.get_tool(tool_name)
-                if not tool:
-                    raise ValueError(f"Tool {tool_name} not found")
-                chat_tools.append(make_openai_tool(tool_name, tool))
-            elif input_tool.type == "file_search":
-                tool_name = "knowledge_search"
-                tool = await self.tool_groups_api.get_tool(tool_name)
-                if not tool:
-                    raise ValueError(f"Tool {tool_name} not found")
-                chat_tools.append(make_openai_tool(tool_name, tool))
-            elif input_tool.type == "mcp":
-                from llama_stack.providers.utils.tools.mcp import list_mcp_tools
-
-                always_allowed = None
-                never_allowed = None
-                if input_tool.allowed_tools:
-                    if isinstance(input_tool.allowed_tools, list):
-                        always_allowed = input_tool.allowed_tools
-                    elif isinstance(input_tool.allowed_tools, AllowedToolsFilter):
-                        always_allowed = input_tool.allowed_tools.always
-                        never_allowed = input_tool.allowed_tools.never
-
-                tool_defs = await list_mcp_tools(
-                    endpoint=input_tool.server_url,
-                    headers=input_tool.headers or {},
-                )
-
-                mcp_list_message = OpenAIResponseOutputMessageMCPListTools(
-                    id=f"mcp_list_{uuid.uuid4()}",
-                    status="completed",
-                    server_label=input_tool.server_label,
-                    tools=[],
-                )
-                for t in tool_defs.data:
-                    if never_allowed and t.name in never_allowed:
-                        continue
-                    if not always_allowed or t.name in always_allowed:
-                        chat_tools.append(make_openai_tool(t.name, t))
-                        if t.name in mcp_tool_to_server:
-                            raise ValueError(f"Duplicate tool name {t.name} found for server {input_tool.server_label}")
-                        mcp_tool_to_server[t.name] = input_tool
-                        mcp_list_message.tools.append(
-                            MCPListToolsTool(
-                                name=t.name,
-                                description=t.description,
-                                input_schema={
-                                    "type": "object",
-                                    "properties": {
-                                        p.name: {
-                                            "type": p.parameter_type,
-                                            "description": p.description,
-                                        }
-                                        for p in t.parameters
-                                    },
-                                    "required": [p.name for p in t.parameters if p.required],
-                                },
-                            )
-                        )
-            else:
-                raise ValueError(f"Llama Stack OpenAI Responses does not yet support tool type: {input_tool.type}")
-        return chat_tools, mcp_tool_to_server, mcp_list_message
-
-    async def _execute_knowledge_search_via_vector_store(
-        self,
-        query: str,
-        response_file_search_tool: OpenAIResponseInputToolFileSearch,
-    ) -> ToolInvocationResult:
-        """Execute knowledge search using vector_stores.search API with filters support."""
-        search_results = []
-
-        # Create search tasks for all vector stores
-        async def search_single_store(vector_store_id):
-            try:
-                search_response = await self.vector_io_api.openai_search_vector_store(
-                    vector_store_id=vector_store_id,
-                    query=query,
-                    filters=response_file_search_tool.filters,
-                    max_num_results=response_file_search_tool.max_num_results,
-                    ranking_options=response_file_search_tool.ranking_options,
-                    rewrite_query=False,
-                )
-                return search_response.data
-            except Exception as e:
-                logger.warning(f"Failed to search vector store {vector_store_id}: {e}")
-                return []
-
-        # Run all searches in parallel using gather
-        search_tasks = [search_single_store(vid) for vid in response_file_search_tool.vector_store_ids]
-        all_results = await asyncio.gather(*search_tasks)
-
-        # Flatten results
-        for results in all_results:
-            search_results.extend(results)
-
-        # Convert search results to tool result format matching memory.py
-        # Format the results as interleaved content similar to memory.py
-        content_items = []
-        content_items.append(
-            TextContentItem(
-                text=f"knowledge_search tool found {len(search_results)} chunks:\nBEGIN of knowledge_search tool results.\n"
-            )
-        )
-
-        for i, result_item in enumerate(search_results):
-            chunk_text = result_item.content[0].text if result_item.content else ""
-            metadata_text = f"document_id: {result_item.file_id}, score: {result_item.score}"
-            if result_item.attributes:
-                metadata_text += f", attributes: {result_item.attributes}"
-            text_content = f"[{i + 1}] {metadata_text}\n{chunk_text}\n"
-            content_items.append(TextContentItem(text=text_content))
-
-        content_items.append(TextContentItem(text="END of knowledge_search tool results.\n"))
-        content_items.append(
-            TextContentItem(
-                text=f'The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query.\n',
-            )
-        )
-
-        return ToolInvocationResult(
-            content=content_items,
-            metadata={
-                "document_ids": [r.file_id for r in search_results],
-                "chunks": [r.content[0].text if r.content else "" for r in search_results],
-                "scores": [r.score for r in search_results],
-            },
-        )
-
-    async def _execute_tool_call(
-        self,
-        tool_call: OpenAIChatCompletionToolCall,
-        ctx: ChatCompletionContext,
-        sequence_number: int,
-        response_id: str,
-        output_index: int,
-        item_id: str,
-    ) -> AsyncIterator[ToolExecutionResult]:
-        from llama_stack.providers.utils.inference.prompt_adapter import (
-            interleaved_content_as_str,
-        )
-
-        tool_call_id = tool_call.id
-        function = tool_call.function
-        tool_kwargs = json.loads(function.arguments) if function.arguments else {}
-
-        if not function or not tool_call_id or not function.name:
-            yield ToolExecutionResult(sequence_number=sequence_number)
-            return
-
-        # Emit in_progress event based on tool type (only for tools with specific streaming events)
-        progress_event = None
-        if ctx.mcp_tool_to_server and function.name in ctx.mcp_tool_to_server:
-            sequence_number += 1
-            progress_event = OpenAIResponseObjectStreamResponseMcpCallInProgress(
-                item_id=item_id,
-                output_index=output_index,
-                sequence_number=sequence_number,
-            )
-        elif function.name == "web_search":
-            sequence_number += 1
-            progress_event = OpenAIResponseObjectStreamResponseWebSearchCallInProgress(
-                item_id=item_id,
-                output_index=output_index,
-                sequence_number=sequence_number,
-            )
-        # Note: knowledge_search and other custom tools don't have specific streaming events in OpenAI spec
-
-        if progress_event:
-            yield ToolExecutionResult(stream_event=progress_event, sequence_number=sequence_number)
-
-        # For web search, emit searching event
-        if function.name == "web_search":
-            sequence_number += 1
-            searching_event = OpenAIResponseObjectStreamResponseWebSearchCallSearching(
-                item_id=item_id,
-                output_index=output_index,
-                sequence_number=sequence_number,
-            )
-            yield ToolExecutionResult(stream_event=searching_event, sequence_number=sequence_number)
-
-        # Execute the actual tool call
-        error_exc = None
-        result = None
-        try:
-            if ctx.mcp_tool_to_server and function.name in ctx.mcp_tool_to_server:
-                from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool
-
-                mcp_tool = ctx.mcp_tool_to_server[function.name]
-                result = await invoke_mcp_tool(
-                    endpoint=mcp_tool.server_url,
-                    headers=mcp_tool.headers or {},
-                    tool_name=function.name,
-                    kwargs=tool_kwargs,
-                )
-            elif function.name == "knowledge_search":
-                response_file_search_tool = next(
-                    (t for t in ctx.response_tools if isinstance(t, OpenAIResponseInputToolFileSearch)),
-                    None,
-                )
-                if response_file_search_tool:
-                    # Use vector_stores.search API instead of knowledge_search tool
-                    # to support filters and ranking_options
-                    query = tool_kwargs.get("query", "")
-                    result = await self._execute_knowledge_search_via_vector_store(
-                        query=query,
-                        response_file_search_tool=response_file_search_tool,
-                    )
-            else:
-                result = await self.tool_runtime_api.invoke_tool(
-                    tool_name=function.name,
-                    kwargs=tool_kwargs,
-                )
-        except Exception as e:
-            error_exc = e
-
-        # Emit completion or failure event based on result (only for tools with specific streaming events)
-        has_error = error_exc or (result and ((result.error_code and result.error_code > 0) or result.error_message))
-        completion_event = None
-
-        if ctx.mcp_tool_to_server and function.name in ctx.mcp_tool_to_server:
-            sequence_number += 1
-            if has_error:
-                completion_event = OpenAIResponseObjectStreamResponseMcpCallFailed(
-                    sequence_number=sequence_number,
-                )
-            else:
-                completion_event = OpenAIResponseObjectStreamResponseMcpCallCompleted(
-                    sequence_number=sequence_number,
-                )
-        elif function.name == "web_search":
-            sequence_number += 1
-            completion_event = OpenAIResponseObjectStreamResponseWebSearchCallCompleted(
-                item_id=item_id,
-                output_index=output_index,
-                sequence_number=sequence_number,
-            )
-        # Note: knowledge_search and other custom tools don't have specific completion events in OpenAI spec
-
-        if completion_event:
-            yield ToolExecutionResult(stream_event=completion_event, sequence_number=sequence_number)
-
-        # Build the result message and input message
-        if function.name in ctx.mcp_tool_to_server:
-            from llama_stack.apis.agents.openai_responses import (
-                OpenAIResponseOutputMessageMCPCall,
-            )
-
-            message = OpenAIResponseOutputMessageMCPCall(
-                id=tool_call_id,
-                arguments=function.arguments,
-                name=function.name,
-                server_label=ctx.mcp_tool_to_server[function.name].server_label,
-            )
-            if error_exc:
-                message.error = str(error_exc)
-            elif (result and result.error_code and result.error_code > 0) or (result and result.error_message):
-                message.error = f"Error (code {result.error_code}): {result.error_message}"
-            elif result and result.content:
-                message.output = interleaved_content_as_str(result.content)
-        else:
-            if function.name == "web_search":
-                message = OpenAIResponseOutputMessageWebSearchToolCall(
-                    id=tool_call_id,
-                    status="completed",
-                )
-                if has_error:
-                    message.status = "failed"
-            elif function.name == "knowledge_search":
-                message = OpenAIResponseOutputMessageFileSearchToolCall(
-                    id=tool_call_id,
-                    queries=[tool_kwargs.get("query", "")],
-                    status="completed",
-                )
-                if result and "document_ids" in result.metadata:
-                    message.results = []
-                    for i, doc_id in enumerate(result.metadata["document_ids"]):
-                        text = result.metadata["chunks"][i] if "chunks" in result.metadata else None
-                        score = result.metadata["scores"][i] if "scores" in result.metadata else None
-                        message.results.append(
-                            OpenAIResponseOutputMessageFileSearchToolCallResults(
-                                file_id=doc_id,
-                                filename=doc_id,
-                                text=text,
-                                score=score,
-                                attributes={},
-                            )
-                        )
-                if has_error:
-                    message.status = "failed"
-            else:
-                raise ValueError(f"Unknown tool {function.name} called")
-
-        input_message = None
-        if result and result.content:
-            if isinstance(result.content, str):
-                content = result.content
-            elif isinstance(result.content, list):
-                from llama_stack.apis.common.content_types import (
-                    ImageContentItem,
-                    TextContentItem,
-                )
-
-                content = []
-                for item in result.content:
-                    if isinstance(item, TextContentItem):
-                        part = OpenAIChatCompletionContentPartTextParam(text=item.text)
-                    elif isinstance(item, ImageContentItem):
-                        if item.image.data:
-                            url = f"data:image;base64,{item.image.data}"
-                        else:
-                            url = item.image.url
-                        part = OpenAIChatCompletionContentPartImageParam(image_url=OpenAIImageURL(url=url))
-                    else:
-                        raise ValueError(f"Unknown result content type: {type(item)}")
-                    content.append(part)
-            else:
-                raise ValueError(f"Unknown result content type: {type(result.content)}")
-            input_message = OpenAIToolMessageParam(content=content, tool_call_id=tool_call_id)
-        else:
-            text = str(error_exc) if error_exc else "Tool execution failed"
-            input_message = OpenAIToolMessageParam(content=text, tool_call_id=tool_call_id)
-
-        # Yield the final result
-        yield ToolExecutionResult(
-            sequence_number=sequence_number, final_output_message=message, final_input_message=input_message
-        )
-
-
-def _is_function_tool_call(
-    tool_call: OpenAIChatCompletionToolCall,
-    tools: list[OpenAIResponseInputTool],
-) -> bool:
-    if not tool_call.function:
-        return False
-    for t in tools:
-        if t.type == "function" and t.name == tool_call.function.name:
-            return True
-    return False
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/__init__.py b/llama_stack/providers/inline/agents/meta_reference/responses/__init__.py
new file mode 100644
index 000000000..756f351d8
--- /dev/null
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
new file mode 100644
index 000000000..e528a4005
--- /dev/null
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@@ -0,0 +1,271 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import time
+import uuid
+from collections.abc import AsyncIterator
+
+from pydantic import BaseModel
+
+from llama_stack.apis.agents import Order
+from llama_stack.apis.agents.openai_responses import (
+    ListOpenAIResponseInputItem,
+    ListOpenAIResponseObject,
+    OpenAIDeleteResponseObject,
+    OpenAIResponseInput,
+    OpenAIResponseInputMessageContentText,
+    OpenAIResponseInputTool,
+    OpenAIResponseMessage,
+    OpenAIResponseObject,
+    OpenAIResponseObjectStream,
+    OpenAIResponseText,
+    OpenAIResponseTextFormat,
+)
+from llama_stack.apis.inference import (
+    Inference,
+    OpenAISystemMessageParam,
+)
+from llama_stack.apis.tools import ToolGroups, ToolRuntime
+from llama_stack.apis.vector_io import VectorIO
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.responses.responses_store import ResponsesStore
+
+from .streaming import StreamingResponseOrchestrator
+from .tool_executor import ToolExecutor
+from .types import ChatCompletionContext
+from .utils import (
+    convert_response_input_to_chat_messages,
+    convert_response_text_to_chat_response_format,
+)
+
+logger = get_logger(name=__name__, category="responses")
+
+
+class OpenAIResponsePreviousResponseWithInputItems(BaseModel):
+    input_items: ListOpenAIResponseInputItem
+    response: OpenAIResponseObject
+
+
+class OpenAIResponsesImpl:
+    def __init__(
+        self,
+        inference_api: Inference,
+        tool_groups_api: ToolGroups,
+        tool_runtime_api: ToolRuntime,
+        responses_store: ResponsesStore,
+        vector_io_api: VectorIO,  # VectorIO
+    ):
+        self.inference_api = inference_api
+        self.tool_groups_api = tool_groups_api
+        self.tool_runtime_api = tool_runtime_api
+        self.responses_store = responses_store
+        self.vector_io_api = vector_io_api
+        self.tool_executor = ToolExecutor(
+            tool_groups_api=tool_groups_api,
+            tool_runtime_api=tool_runtime_api,
+            vector_io_api=vector_io_api,
+        )
+
+    async def _prepend_previous_response(
+        self,
+        input: str | list[OpenAIResponseInput],
+        previous_response_id: str | None = None,
+    ):
+        if previous_response_id:
+            previous_response_with_input = await self.responses_store.get_response_object(previous_response_id)
+
+            # previous response input items
+            new_input_items = previous_response_with_input.input
+
+            # previous response output items
+            new_input_items.extend(previous_response_with_input.output)
+
+            # new input items from the current request
+            if isinstance(input, str):
+                new_input_items.append(OpenAIResponseMessage(content=input, role="user"))
+            else:
+                new_input_items.extend(input)
+
+            input = new_input_items
+
+        return input
+
+    async def _prepend_instructions(self, messages, instructions):
+        if instructions:
+            messages.insert(0, OpenAISystemMessageParam(content=instructions))
+
+    async def get_openai_response(
+        self,
+        response_id: str,
+    ) -> OpenAIResponseObject:
+        response_with_input = await self.responses_store.get_response_object(response_id)
+        return OpenAIResponseObject(**{k: v for k, v in response_with_input.model_dump().items() if k != "input"})
+
+    async def list_openai_responses(
+        self,
+        after: str | None = None,
+        limit: int | None = 50,
+        model: str | None = None,
+        order: Order | None = Order.desc,
+    ) -> ListOpenAIResponseObject:
+        return await self.responses_store.list_responses(after, limit, model, order)
+
+    async def list_openai_response_input_items(
+        self,
+        response_id: str,
+        after: str | None = None,
+        before: str | None = None,
+        include: list[str] | None = None,
+        limit: int | None = 20,
+        order: Order | None = Order.desc,
+    ) -> ListOpenAIResponseInputItem:
+        """List input items for a given OpenAI response.
+
+        :param response_id: The ID of the response to retrieve input items for.
+        :param after: An item ID to list items after, used for pagination.
+        :param before: An item ID to list items before, used for pagination.
+        :param include: Additional fields to include in the response.
+        :param limit: A limit on the number of objects to be returned.
+        :param order: The order to return the input items in.
+        :returns: An ListOpenAIResponseInputItem.
+        """
+        return await self.responses_store.list_response_input_items(response_id, after, before, include, limit, order)
+
+    async def _store_response(
+        self,
+        response: OpenAIResponseObject,
+        input: str | list[OpenAIResponseInput],
+    ) -> None:
+        new_input_id = f"msg_{uuid.uuid4()}"
+        if isinstance(input, str):
+            # synthesize a message from the input string
+            input_content = OpenAIResponseInputMessageContentText(text=input)
+            input_content_item = OpenAIResponseMessage(
+                role="user",
+                content=[input_content],
+                id=new_input_id,
+            )
+            input_items_data = [input_content_item]
+        else:
+            # we already have a list of messages
+            input_items_data = []
+            for input_item in input:
+                if isinstance(input_item, OpenAIResponseMessage):
+                    # These may or may not already have an id, so dump to dict, check for id, and add if missing
+                    input_item_dict = input_item.model_dump()
+                    if "id" not in input_item_dict:
+                        input_item_dict["id"] = new_input_id
+                    input_items_data.append(OpenAIResponseMessage(**input_item_dict))
+                else:
+                    input_items_data.append(input_item)
+
+        await self.responses_store.store_response_object(
+            response_object=response,
+            input=input_items_data,
+        )
+
+    async def create_openai_response(
+        self,
+        input: str | list[OpenAIResponseInput],
+        model: str,
+        instructions: str | None = None,
+        previous_response_id: str | None = None,
+        store: bool | None = True,
+        stream: bool | None = False,
+        temperature: float | None = None,
+        text: OpenAIResponseText | None = None,
+        tools: list[OpenAIResponseInputTool] | None = None,
+        include: list[str] | None = None,
+        max_infer_iters: int | None = 10,
+    ):
+        stream = bool(stream)
+        text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text
+
+        stream_gen = self._create_streaming_response(
+            input=input,
+            model=model,
+            instructions=instructions,
+            previous_response_id=previous_response_id,
+            store=store,
+            temperature=temperature,
+            text=text,
+            tools=tools,
+            max_infer_iters=max_infer_iters,
+        )
+
+        if stream:
+            return stream_gen
+        else:
+            response = None
+            async for stream_chunk in stream_gen:
+                if stream_chunk.type == "response.completed":
+                    if response is not None:
+                        raise ValueError("The response stream completed multiple times! Earlier response: {response}")
+                    response = stream_chunk.response
+                    # don't leave the generator half complete!
+
+            if response is None:
+                raise ValueError("The response stream never completed")
+            return response
+
+    async def _create_streaming_response(
+        self,
+        input: str | list[OpenAIResponseInput],
+        model: str,
+        instructions: str | None = None,
+        previous_response_id: str | None = None,
+        store: bool | None = True,
+        temperature: float | None = None,
+        text: OpenAIResponseText | None = None,
+        tools: list[OpenAIResponseInputTool] | None = None,
+        max_infer_iters: int | None = 10,
+    ) -> AsyncIterator[OpenAIResponseObjectStream]:
+        # Input preprocessing
+        input = await self._prepend_previous_response(input, previous_response_id)
+        messages = await convert_response_input_to_chat_messages(input)
+        await self._prepend_instructions(messages, instructions)
+
+        # Structured outputs
+        response_format = await convert_response_text_to_chat_response_format(text)
+
+        ctx = ChatCompletionContext(
+            model=model,
+            messages=messages,
+            response_tools=tools,
+            temperature=temperature,
+            response_format=response_format,
+        )
+
+        # Create orchestrator and delegate streaming logic
+        response_id = f"resp-{uuid.uuid4()}"
+        created_at = int(time.time())
+
+        orchestrator = StreamingResponseOrchestrator(
+            inference_api=self.inference_api,
+            ctx=ctx,
+            response_id=response_id,
+            created_at=created_at,
+            text=text,
+            max_infer_iters=max_infer_iters,
+            tool_executor=self.tool_executor,
+        )
+
+        # Stream the response
+        final_response = None
+        async for stream_chunk in orchestrator.create_response():
+            if stream_chunk.type == "response.completed":
+                final_response = stream_chunk.response
+            yield stream_chunk
+
+        # Store the response if requested
+        if store and final_response:
+            await self._store_response(
+                response=final_response,
+                input=input,
+            )
+
+    async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
+        return await self.responses_store.delete_response_object(response_id)
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
new file mode 100644
index 000000000..0879e978a
--- /dev/null
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -0,0 +1,634 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import uuid
+from collections.abc import AsyncIterator
+from typing import Any
+
+from llama_stack.apis.agents.openai_responses import (
+    AllowedToolsFilter,
+    MCPListToolsTool,
+    OpenAIResponseContentPartOutputText,
+    OpenAIResponseInputTool,
+    OpenAIResponseInputToolMCP,
+    OpenAIResponseObject,
+    OpenAIResponseObjectStream,
+    OpenAIResponseObjectStreamResponseCompleted,
+    OpenAIResponseObjectStreamResponseContentPartAdded,
+    OpenAIResponseObjectStreamResponseContentPartDone,
+    OpenAIResponseObjectStreamResponseCreated,
+    OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta,
+    OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone,
+    OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta,
+    OpenAIResponseObjectStreamResponseMcpCallArgumentsDone,
+    OpenAIResponseObjectStreamResponseMcpListToolsCompleted,
+    OpenAIResponseObjectStreamResponseMcpListToolsInProgress,
+    OpenAIResponseObjectStreamResponseOutputItemAdded,
+    OpenAIResponseObjectStreamResponseOutputItemDone,
+    OpenAIResponseObjectStreamResponseOutputTextDelta,
+    OpenAIResponseOutput,
+    OpenAIResponseOutputMessageFunctionToolCall,
+    OpenAIResponseOutputMessageMCPListTools,
+    OpenAIResponseText,
+    WebSearchToolTypes,
+)
+from llama_stack.apis.inference import (
+    Inference,
+    OpenAIAssistantMessageParam,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionToolCall,
+    OpenAIChoice,
+)
+from llama_stack.log import get_logger
+
+from .types import ChatCompletionContext, ChatCompletionResult
+from .utils import convert_chat_choice_to_response_message, is_function_tool_call
+
+logger = get_logger(name=__name__, category="responses")
+
+
+class StreamingResponseOrchestrator:
+    def __init__(
+        self,
+        inference_api: Inference,
+        ctx: ChatCompletionContext,
+        response_id: str,
+        created_at: int,
+        text: OpenAIResponseText,
+        max_infer_iters: int,
+        tool_executor,  # Will be the tool execution logic from the main class
+    ):
+        self.inference_api = inference_api
+        self.ctx = ctx
+        self.response_id = response_id
+        self.created_at = created_at
+        self.text = text
+        self.max_infer_iters = max_infer_iters
+        self.tool_executor = tool_executor
+        self.sequence_number = 0
+        # Store MCP tool mapping that gets built during tool processing
+        self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = {}
+
+    async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
+        # Initialize output messages
+        output_messages: list[OpenAIResponseOutput] = []
+        # Create initial response and emit response.created immediately
+        initial_response = OpenAIResponseObject(
+            created_at=self.created_at,
+            id=self.response_id,
+            model=self.ctx.model,
+            object="response",
+            status="in_progress",
+            output=output_messages.copy(),
+            text=self.text,
+        )
+
+        yield OpenAIResponseObjectStreamResponseCreated(response=initial_response)
+
+        # Process all tools (including MCP tools) and emit streaming events
+        if self.ctx.response_tools:
+            async for stream_event in self._process_tools(self.ctx.response_tools, output_messages):
+                yield stream_event
+
+        n_iter = 0
+        messages = self.ctx.messages.copy()
+
+        while True:
+            completion_result = await self.inference_api.openai_chat_completion(
+                model=self.ctx.model,
+                messages=messages,
+                tools=self.ctx.chat_tools,
+                stream=True,
+                temperature=self.ctx.temperature,
+                response_format=self.ctx.response_format,
+            )
+
+            # Process streaming chunks and build complete response
+            completion_result_data = None
+            async for stream_event_or_result in self._process_streaming_chunks(completion_result, output_messages):
+                if isinstance(stream_event_or_result, ChatCompletionResult):
+                    completion_result_data = stream_event_or_result
+                else:
+                    yield stream_event_or_result
+            if not completion_result_data:
+                raise ValueError("Streaming chunk processor failed to return completion data")
+            current_response = self._build_chat_completion(completion_result_data)
+
+            function_tool_calls, non_function_tool_calls, next_turn_messages = self._separate_tool_calls(
+                current_response, messages
+            )
+
+            # Handle choices with no tool calls
+            for choice in current_response.choices:
+                if not (choice.message.tool_calls and self.ctx.response_tools):
+                    output_messages.append(await convert_chat_choice_to_response_message(choice))
+
+            # Execute tool calls and coordinate results
+            async for stream_event in self._coordinate_tool_execution(
+                function_tool_calls,
+                non_function_tool_calls,
+                completion_result_data,
+                output_messages,
+                next_turn_messages,
+            ):
+                yield stream_event
+
+            if not function_tool_calls and not non_function_tool_calls:
+                break
+
+            if function_tool_calls:
+                logger.info("Exiting inference loop since there is a function (client-side) tool call")
+                break
+
+            n_iter += 1
+            if n_iter >= self.max_infer_iters:
+                logger.info(f"Exiting inference loop since iteration count({n_iter}) exceeds {self.max_infer_iters=}")
+                break
+
+            messages = next_turn_messages
+
+        # Create final response
+        final_response = OpenAIResponseObject(
+            created_at=self.created_at,
+            id=self.response_id,
+            model=self.ctx.model,
+            object="response",
+            status="completed",
+            text=self.text,
+            output=output_messages,
+        )
+
+        # Emit response.completed
+        yield OpenAIResponseObjectStreamResponseCompleted(response=final_response)
+
+    def _separate_tool_calls(self, current_response, messages) -> tuple[list, list, list]:
+        """Separate tool calls into function and non-function categories."""
+        function_tool_calls = []
+        non_function_tool_calls = []
+        next_turn_messages = messages.copy()
+
+        for choice in current_response.choices:
+            next_turn_messages.append(choice.message)
+
+            if choice.message.tool_calls and self.ctx.response_tools:
+                for tool_call in choice.message.tool_calls:
+                    if is_function_tool_call(tool_call, self.ctx.response_tools):
+                        function_tool_calls.append(tool_call)
+                    else:
+                        non_function_tool_calls.append(tool_call)
+
+        return function_tool_calls, non_function_tool_calls, next_turn_messages
+
+    async def _process_streaming_chunks(
+        self, completion_result, output_messages: list[OpenAIResponseOutput]
+    ) -> AsyncIterator[OpenAIResponseObjectStream | ChatCompletionResult]:
+        """Process streaming chunks and emit events, returning completion data."""
+        # Initialize result tracking
+        chat_response_id = ""
+        chat_response_content = []
+        chat_response_tool_calls: dict[int, OpenAIChatCompletionToolCall] = {}
+        chunk_created = 0
+        chunk_model = ""
+        chunk_finish_reason = ""
+
+        # Create a placeholder message item for delta events
+        message_item_id = f"msg_{uuid.uuid4()}"
+        # Track tool call items for streaming events
+        tool_call_item_ids: dict[int, str] = {}
+        # Track content parts for streaming events
+        content_part_emitted = False
+
+        async for chunk in completion_result:
+            chat_response_id = chunk.id
+            chunk_created = chunk.created
+            chunk_model = chunk.model
+            for chunk_choice in chunk.choices:
+                # Emit incremental text content as delta events
+                if chunk_choice.delta.content:
+                    # Emit content_part.added event for first text chunk
+                    if not content_part_emitted:
+                        content_part_emitted = True
+                        self.sequence_number += 1
+                        yield OpenAIResponseObjectStreamResponseContentPartAdded(
+                            response_id=self.response_id,
+                            item_id=message_item_id,
+                            part=OpenAIResponseContentPartOutputText(
+                                text="",  # Will be filled incrementally via text deltas
+                            ),
+                            sequence_number=self.sequence_number,
+                        )
+                    self.sequence_number += 1
+                    yield OpenAIResponseObjectStreamResponseOutputTextDelta(
+                        content_index=0,
+                        delta=chunk_choice.delta.content,
+                        item_id=message_item_id,
+                        output_index=0,
+                        sequence_number=self.sequence_number,
+                    )
+
+                # Collect content for final response
+                chat_response_content.append(chunk_choice.delta.content or "")
+                if chunk_choice.finish_reason:
+                    chunk_finish_reason = chunk_choice.finish_reason
+
+                # Aggregate tool call arguments across chunks
+                if chunk_choice.delta.tool_calls:
+                    for tool_call in chunk_choice.delta.tool_calls:
+                        response_tool_call = chat_response_tool_calls.get(tool_call.index, None)
+                        # Create new tool call entry if this is the first chunk for this index
+                        is_new_tool_call = response_tool_call is None
+                        if is_new_tool_call:
+                            tool_call_dict: dict[str, Any] = tool_call.model_dump()
+                            tool_call_dict.pop("type", None)
+                            response_tool_call = OpenAIChatCompletionToolCall(**tool_call_dict)
+                            chat_response_tool_calls[tool_call.index] = response_tool_call
+
+                            # Create item ID for this tool call for streaming events
+                            tool_call_item_id = f"fc_{uuid.uuid4()}"
+                            tool_call_item_ids[tool_call.index] = tool_call_item_id
+
+                            # Emit output_item.added event for the new function call
+                            self.sequence_number += 1
+                            function_call_item = OpenAIResponseOutputMessageFunctionToolCall(
+                                arguments="",  # Will be filled incrementally via delta events
+                                call_id=tool_call.id or "",
+                                name=tool_call.function.name if tool_call.function else "",
+                                id=tool_call_item_id,
+                                status="in_progress",
+                            )
+                            yield OpenAIResponseObjectStreamResponseOutputItemAdded(
+                                response_id=self.response_id,
+                                item=function_call_item,
+                                output_index=len(output_messages),
+                                sequence_number=self.sequence_number,
+                            )
+
+                        # Stream tool call arguments as they arrive (differentiate between MCP and function calls)
+                        if tool_call.function and tool_call.function.arguments:
+                            tool_call_item_id = tool_call_item_ids[tool_call.index]
+                            self.sequence_number += 1
+
+                            # Check if this is an MCP tool call
+                            is_mcp_tool = tool_call.function.name and tool_call.function.name in self.mcp_tool_to_server
+                            if is_mcp_tool:
+                                # Emit MCP-specific argument delta event
+                                yield OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta(
+                                    delta=tool_call.function.arguments,
+                                    item_id=tool_call_item_id,
+                                    output_index=len(output_messages),
+                                    sequence_number=self.sequence_number,
+                                )
+                            else:
+                                # Emit function call argument delta event
+                                yield OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta(
+                                    delta=tool_call.function.arguments,
+                                    item_id=tool_call_item_id,
+                                    output_index=len(output_messages),
+                                    sequence_number=self.sequence_number,
+                                )
+
+                            # Accumulate arguments for final response (only for subsequent chunks)
+                            if not is_new_tool_call:
+                                response_tool_call.function.arguments = (
+                                    response_tool_call.function.arguments or ""
+                                ) + tool_call.function.arguments
+
+        # Emit arguments.done events for completed tool calls (differentiate between MCP and function calls)
+        for tool_call_index in sorted(chat_response_tool_calls.keys()):
+            tool_call_item_id = tool_call_item_ids[tool_call_index]
+            final_arguments = chat_response_tool_calls[tool_call_index].function.arguments or ""
+            tool_call_name = chat_response_tool_calls[tool_call_index].function.name
+
+            # Check if this is an MCP tool call
+            is_mcp_tool = tool_call_name and tool_call_name in self.mcp_tool_to_server
+            self.sequence_number += 1
+            done_event_cls = (
+                OpenAIResponseObjectStreamResponseMcpCallArgumentsDone
+                if is_mcp_tool
+                else OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone
+            )
+            yield done_event_cls(
+                arguments=final_arguments,
+                item_id=tool_call_item_id,
+                output_index=len(output_messages),
+                sequence_number=self.sequence_number,
+            )
+
+        # Emit content_part.done event if text content was streamed (before content gets cleared)
+        if content_part_emitted:
+            final_text = "".join(chat_response_content)
+            self.sequence_number += 1
+            yield OpenAIResponseObjectStreamResponseContentPartDone(
+                response_id=self.response_id,
+                item_id=message_item_id,
+                part=OpenAIResponseContentPartOutputText(
+                    text=final_text,
+                ),
+                sequence_number=self.sequence_number,
+            )
+
+        # Clear content when there are tool calls (OpenAI spec behavior)
+        if chat_response_tool_calls:
+            chat_response_content = []
+
+        yield ChatCompletionResult(
+            response_id=chat_response_id,
+            content=chat_response_content,
+            tool_calls=chat_response_tool_calls,
+            created=chunk_created,
+            model=chunk_model,
+            finish_reason=chunk_finish_reason,
+            message_item_id=message_item_id,
+            tool_call_item_ids=tool_call_item_ids,
+            content_part_emitted=content_part_emitted,
+        )
+
+    def _build_chat_completion(self, result: ChatCompletionResult) -> OpenAIChatCompletion:
+        """Build OpenAIChatCompletion from ChatCompletionResult."""
+        # Convert collected chunks to complete response
+        if result.tool_calls:
+            tool_calls = [result.tool_calls[i] for i in sorted(result.tool_calls.keys())]
+        else:
+            tool_calls = None
+
+        assistant_message = OpenAIAssistantMessageParam(
+            content=result.content_text,
+            tool_calls=tool_calls,
+        )
+        return OpenAIChatCompletion(
+            id=result.response_id,
+            choices=[
+                OpenAIChoice(
+                    message=assistant_message,
+                    finish_reason=result.finish_reason,
+                    index=0,
+                )
+            ],
+            created=result.created,
+            model=result.model,
+        )
+
+    async def _coordinate_tool_execution(
+        self,
+        function_tool_calls: list,
+        non_function_tool_calls: list,
+        completion_result_data: ChatCompletionResult,
+        output_messages: list[OpenAIResponseOutput],
+        next_turn_messages: list,
+    ) -> AsyncIterator[OpenAIResponseObjectStream]:
+        """Coordinate execution of both function and non-function tool calls."""
+        # Execute non-function tool calls
+        for tool_call in non_function_tool_calls:
+            # Find the item_id for this tool call
+            matching_item_id = None
+            for index, item_id in completion_result_data.tool_call_item_ids.items():
+                response_tool_call = completion_result_data.tool_calls.get(index)
+                if response_tool_call and response_tool_call.id == tool_call.id:
+                    matching_item_id = item_id
+                    break
+
+            # Use a fallback item_id if not found
+            if not matching_item_id:
+                matching_item_id = f"tc_{uuid.uuid4()}"
+
+            # Execute tool call with streaming
+            tool_call_log = None
+            tool_response_message = None
+            async for result in self.tool_executor.execute_tool_call(
+                tool_call,
+                self.ctx,
+                self.sequence_number,
+                len(output_messages),
+                matching_item_id,
+                self.mcp_tool_to_server,
+            ):
+                if result.stream_event:
+                    # Forward streaming events
+                    self.sequence_number = result.sequence_number
+                    yield result.stream_event
+
+                if result.final_output_message is not None:
+                    tool_call_log = result.final_output_message
+                    tool_response_message = result.final_input_message
+                    self.sequence_number = result.sequence_number
+
+            if tool_call_log:
+                output_messages.append(tool_call_log)
+
+                # Emit output_item.done event for completed non-function tool call
+                if matching_item_id:
+                    self.sequence_number += 1
+                    yield OpenAIResponseObjectStreamResponseOutputItemDone(
+                        response_id=self.response_id,
+                        item=tool_call_log,
+                        output_index=len(output_messages) - 1,
+                        sequence_number=self.sequence_number,
+                    )
+
+            if tool_response_message:
+                next_turn_messages.append(tool_response_message)
+
+        # Execute function tool calls (client-side)
+        for tool_call in function_tool_calls:
+            # Find the item_id for this tool call from our tracking dictionary
+            matching_item_id = None
+            for index, item_id in completion_result_data.tool_call_item_ids.items():
+                response_tool_call = completion_result_data.tool_calls.get(index)
+                if response_tool_call and response_tool_call.id == tool_call.id:
+                    matching_item_id = item_id
+                    break
+
+            # Use existing item_id or create new one if not found
+            final_item_id = matching_item_id or f"fc_{uuid.uuid4()}"
+
+            function_call_item = OpenAIResponseOutputMessageFunctionToolCall(
+                arguments=tool_call.function.arguments or "",
+                call_id=tool_call.id,
+                name=tool_call.function.name or "",
+                id=final_item_id,
+                status="completed",
+            )
+            output_messages.append(function_call_item)
+
+            # Emit output_item.done event for completed function call
+            self.sequence_number += 1
+            yield OpenAIResponseObjectStreamResponseOutputItemDone(
+                response_id=self.response_id,
+                item=function_call_item,
+                output_index=len(output_messages) - 1,
+                sequence_number=self.sequence_number,
+            )
+
+    async def _process_tools(
+        self, tools: list[OpenAIResponseInputTool], output_messages: list[OpenAIResponseOutput]
+    ) -> AsyncIterator[OpenAIResponseObjectStream]:
+        """Process all tools and emit appropriate streaming events."""
+        from openai.types.chat import ChatCompletionToolParam
+
+        from llama_stack.apis.tools import Tool
+        from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
+        from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool
+
+        def make_openai_tool(tool_name: str, tool: Tool) -> ChatCompletionToolParam:
+            tool_def = ToolDefinition(
+                tool_name=tool_name,
+                description=tool.description,
+                parameters={
+                    param.name: ToolParamDefinition(
+                        param_type=param.parameter_type,
+                        description=param.description,
+                        required=param.required,
+                        default=param.default,
+                    )
+                    for param in tool.parameters
+                },
+            )
+            return convert_tooldef_to_openai_tool(tool_def)
+
+        # Initialize chat_tools if not already set
+        if self.ctx.chat_tools is None:
+            self.ctx.chat_tools = []
+
+        for input_tool in tools:
+            if input_tool.type == "function":
+                self.ctx.chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump()))
+            elif input_tool.type in WebSearchToolTypes:
+                tool_name = "web_search"
+                # Need to access tool_groups_api from tool_executor
+                tool = await self.tool_executor.tool_groups_api.get_tool(tool_name)
+                if not tool:
+                    raise ValueError(f"Tool {tool_name} not found")
+                self.ctx.chat_tools.append(make_openai_tool(tool_name, tool))
+            elif input_tool.type == "file_search":
+                tool_name = "knowledge_search"
+                tool = await self.tool_executor.tool_groups_api.get_tool(tool_name)
+                if not tool:
+                    raise ValueError(f"Tool {tool_name} not found")
+                self.ctx.chat_tools.append(make_openai_tool(tool_name, tool))
+            elif input_tool.type == "mcp":
+                async for stream_event in self._process_mcp_tool(input_tool, output_messages):
+                    yield stream_event
+            else:
+                raise ValueError(f"Llama Stack OpenAI Responses does not yet support tool type: {input_tool.type}")
+
+    async def _process_mcp_tool(
+        self, mcp_tool: OpenAIResponseInputToolMCP, output_messages: list[OpenAIResponseOutput]
+    ) -> AsyncIterator[OpenAIResponseObjectStream]:
+        """Process an MCP tool configuration and emit appropriate streaming events."""
+        from llama_stack.providers.utils.tools.mcp import list_mcp_tools
+
+        # Emit mcp_list_tools.in_progress
+        self.sequence_number += 1
+        yield OpenAIResponseObjectStreamResponseMcpListToolsInProgress(
+            sequence_number=self.sequence_number,
+        )
+
+        try:
+            # Parse allowed/never allowed tools
+            always_allowed = None
+            never_allowed = None
+            if mcp_tool.allowed_tools:
+                if isinstance(mcp_tool.allowed_tools, list):
+                    always_allowed = mcp_tool.allowed_tools
+                elif isinstance(mcp_tool.allowed_tools, AllowedToolsFilter):
+                    always_allowed = mcp_tool.allowed_tools.always
+                    never_allowed = mcp_tool.allowed_tools.never
+
+            # Call list_mcp_tools
+            tool_defs = await list_mcp_tools(
+                endpoint=mcp_tool.server_url,
+                headers=mcp_tool.headers or {},
+            )
+
+            # Create the MCP list tools message
+            mcp_list_message = OpenAIResponseOutputMessageMCPListTools(
+                id=f"mcp_list_{uuid.uuid4()}",
+                server_label=mcp_tool.server_label,
+                tools=[],
+            )
+
+            # Process tools and update context
+            for t in tool_defs.data:
+                if never_allowed and t.name in never_allowed:
+                    continue
+                if not always_allowed or t.name in always_allowed:
+                    # Add to chat tools for inference
+                    from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
+                    from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool
+
+                    tool_def = ToolDefinition(
+                        tool_name=t.name,
+                        description=t.description,
+                        parameters={
+                            param.name: ToolParamDefinition(
+                                param_type=param.parameter_type,
+                                description=param.description,
+                                required=param.required,
+                                default=param.default,
+                            )
+                            for param in t.parameters
+                        },
+                    )
+                    openai_tool = convert_tooldef_to_openai_tool(tool_def)
+                    if self.ctx.chat_tools is None:
+                        self.ctx.chat_tools = []
+                    self.ctx.chat_tools.append(openai_tool)
+
+                    # Add to MCP tool mapping
+                    if t.name in self.mcp_tool_to_server:
+                        raise ValueError(f"Duplicate tool name {t.name} found for server {mcp_tool.server_label}")
+                    self.mcp_tool_to_server[t.name] = mcp_tool
+
+                    # Add to MCP list message
+                    mcp_list_message.tools.append(
+                        MCPListToolsTool(
+                            name=t.name,
+                            description=t.description,
+                            input_schema={
+                                "type": "object",
+                                "properties": {
+                                    p.name: {
+                                        "type": p.parameter_type,
+                                        "description": p.description,
+                                    }
+                                    for p in t.parameters
+                                },
+                                "required": [p.name for p in t.parameters if p.required],
+                            },
+                        )
+                    )
+
+            # Add the MCP list message to output
+            output_messages.append(mcp_list_message)
+
+            # Emit output_item.added for the MCP list tools message
+            self.sequence_number += 1
+            yield OpenAIResponseObjectStreamResponseOutputItemAdded(
+                response_id=self.response_id,
+                item=mcp_list_message,
+                output_index=len(output_messages) - 1,
+                sequence_number=self.sequence_number,
+            )
+
+            # Emit mcp_list_tools.completed
+            self.sequence_number += 1
+            yield OpenAIResponseObjectStreamResponseMcpListToolsCompleted(
+                sequence_number=self.sequence_number,
+            )
+
+            # Emit output_item.done for the MCP list tools message
+            self.sequence_number += 1
+            yield OpenAIResponseObjectStreamResponseOutputItemDone(
+                response_id=self.response_id,
+                item=mcp_list_message,
+                output_index=len(output_messages) - 1,
+                sequence_number=self.sequence_number,
+            )
+
+        except Exception as e:
+            # TODO: Emit mcp_list_tools.failed event if needed
+            logger.exception(f"Failed to list MCP tools from {mcp_tool.server_url}: {e}")
+            raise
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py b/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
new file mode 100644
index 000000000..5b98b4f51
--- /dev/null
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
@@ -0,0 +1,379 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import asyncio
+import json
+from collections.abc import AsyncIterator
+
+from llama_stack.apis.agents.openai_responses import (
+    OpenAIResponseInputToolFileSearch,
+    OpenAIResponseInputToolMCP,
+    OpenAIResponseObjectStreamResponseMcpCallCompleted,
+    OpenAIResponseObjectStreamResponseMcpCallFailed,
+    OpenAIResponseObjectStreamResponseMcpCallInProgress,
+    OpenAIResponseObjectStreamResponseWebSearchCallCompleted,
+    OpenAIResponseObjectStreamResponseWebSearchCallInProgress,
+    OpenAIResponseObjectStreamResponseWebSearchCallSearching,
+    OpenAIResponseOutputMessageFileSearchToolCall,
+    OpenAIResponseOutputMessageFileSearchToolCallResults,
+    OpenAIResponseOutputMessageWebSearchToolCall,
+)
+from llama_stack.apis.common.content_types import (
+    ImageContentItem,
+    TextContentItem,
+)
+from llama_stack.apis.inference import (
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartTextParam,
+    OpenAIChatCompletionToolCall,
+    OpenAIImageURL,
+    OpenAIToolMessageParam,
+)
+from llama_stack.apis.tools import ToolGroups, ToolInvocationResult, ToolRuntime
+from llama_stack.apis.vector_io import VectorIO
+from llama_stack.log import get_logger
+
+from .types import ChatCompletionContext, ToolExecutionResult
+
+logger = get_logger(name=__name__, category="responses")
+
+
+class ToolExecutor:
+    def __init__(
+        self,
+        tool_groups_api: ToolGroups,
+        tool_runtime_api: ToolRuntime,
+        vector_io_api: VectorIO,
+    ):
+        self.tool_groups_api = tool_groups_api
+        self.tool_runtime_api = tool_runtime_api
+        self.vector_io_api = vector_io_api
+
+    async def execute_tool_call(
+        self,
+        tool_call: OpenAIChatCompletionToolCall,
+        ctx: ChatCompletionContext,
+        sequence_number: int,
+        output_index: int,
+        item_id: str,
+        mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None,
+    ) -> AsyncIterator[ToolExecutionResult]:
+        tool_call_id = tool_call.id
+        function = tool_call.function
+        tool_kwargs = json.loads(function.arguments) if function.arguments else {}
+
+        if not function or not tool_call_id or not function.name:
+            yield ToolExecutionResult(sequence_number=sequence_number)
+            return
+
+        # Emit progress events for tool execution start
+        async for event_result in self._emit_progress_events(
+            function.name, ctx, sequence_number, output_index, item_id, mcp_tool_to_server
+        ):
+            sequence_number = event_result.sequence_number
+            yield event_result
+
+        # Execute the actual tool call
+        error_exc, result = await self._execute_tool(function.name, tool_kwargs, ctx, mcp_tool_to_server)
+
+        # Emit completion events for tool execution
+        has_error = error_exc or (result and ((result.error_code and result.error_code > 0) or result.error_message))
+        async for event_result in self._emit_completion_events(
+            function.name, ctx, sequence_number, output_index, item_id, has_error, mcp_tool_to_server
+        ):
+            sequence_number = event_result.sequence_number
+            yield event_result
+
+        # Build result messages from tool execution
+        output_message, input_message = await self._build_result_messages(
+            function, tool_call_id, tool_kwargs, ctx, error_exc, result, has_error, mcp_tool_to_server
+        )
+
+        # Yield the final result
+        yield ToolExecutionResult(
+            sequence_number=sequence_number, final_output_message=output_message, final_input_message=input_message
+        )
+
+    async def _execute_knowledge_search_via_vector_store(
+        self,
+        query: str,
+        response_file_search_tool: OpenAIResponseInputToolFileSearch,
+    ) -> ToolInvocationResult:
+        """Execute knowledge search using vector_stores.search API with filters support."""
+        search_results = []
+
+        # Create search tasks for all vector stores
+        async def search_single_store(vector_store_id):
+            try:
+                search_response = await self.vector_io_api.openai_search_vector_store(
+                    vector_store_id=vector_store_id,
+                    query=query,
+                    filters=response_file_search_tool.filters,
+                    max_num_results=response_file_search_tool.max_num_results,
+                    ranking_options=response_file_search_tool.ranking_options,
+                    rewrite_query=False,
+                )
+                return search_response.data
+            except Exception as e:
+                logger.warning(f"Failed to search vector store {vector_store_id}: {e}")
+                return []
+
+        # Run all searches in parallel using gather
+        search_tasks = [search_single_store(vid) for vid in response_file_search_tool.vector_store_ids]
+        all_results = await asyncio.gather(*search_tasks)
+
+        # Flatten results
+        for results in all_results:
+            search_results.extend(results)
+
+        # Convert search results to tool result format matching memory.py
+        # Format the results as interleaved content similar to memory.py
+        content_items = []
+        content_items.append(
+            TextContentItem(
+                text=f"knowledge_search tool found {len(search_results)} chunks:\nBEGIN of knowledge_search tool results.\n"
+            )
+        )
+
+        for i, result_item in enumerate(search_results):
+            chunk_text = result_item.content[0].text if result_item.content else ""
+            metadata_text = f"document_id: {result_item.file_id}, score: {result_item.score}"
+            if result_item.attributes:
+                metadata_text += f", attributes: {result_item.attributes}"
+            text_content = f"[{i + 1}] {metadata_text}\n{chunk_text}\n"
+            content_items.append(TextContentItem(text=text_content))
+
+        content_items.append(TextContentItem(text="END of knowledge_search tool results.\n"))
+        content_items.append(
+            TextContentItem(
+                text=f'The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query.\n',
+            )
+        )
+
+        return ToolInvocationResult(
+            content=content_items,
+            metadata={
+                "document_ids": [r.file_id for r in search_results],
+                "chunks": [r.content[0].text if r.content else "" for r in search_results],
+                "scores": [r.score for r in search_results],
+            },
+        )
+
+    async def _emit_progress_events(
+        self,
+        function_name: str,
+        ctx: ChatCompletionContext,
+        sequence_number: int,
+        output_index: int,
+        item_id: str,
+        mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None,
+    ) -> AsyncIterator[ToolExecutionResult]:
+        """Emit progress events for tool execution start."""
+        # Emit in_progress event based on tool type (only for tools with specific streaming events)
+        progress_event = None
+        if mcp_tool_to_server and function_name in mcp_tool_to_server:
+            sequence_number += 1
+            progress_event = OpenAIResponseObjectStreamResponseMcpCallInProgress(
+                item_id=item_id,
+                output_index=output_index,
+                sequence_number=sequence_number,
+            )
+        elif function_name == "web_search":
+            sequence_number += 1
+            progress_event = OpenAIResponseObjectStreamResponseWebSearchCallInProgress(
+                item_id=item_id,
+                output_index=output_index,
+                sequence_number=sequence_number,
+            )
+        # Note: knowledge_search and other custom tools don't have specific streaming events in OpenAI spec
+
+        if progress_event:
+            yield ToolExecutionResult(stream_event=progress_event, sequence_number=sequence_number)
+
+        # For web search, emit searching event
+        if function_name == "web_search":
+            sequence_number += 1
+            searching_event = OpenAIResponseObjectStreamResponseWebSearchCallSearching(
+                item_id=item_id,
+                output_index=output_index,
+                sequence_number=sequence_number,
+            )
+            yield ToolExecutionResult(stream_event=searching_event, sequence_number=sequence_number)
+
+    async def _execute_tool(
+        self,
+        function_name: str,
+        tool_kwargs: dict,
+        ctx: ChatCompletionContext,
+        mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None,
+    ) -> tuple[Exception | None, any]:
+        """Execute the tool and return error exception and result."""
+        error_exc = None
+        result = None
+
+        try:
+            if mcp_tool_to_server and function_name in mcp_tool_to_server:
+                from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool
+
+                mcp_tool = mcp_tool_to_server[function_name]
+                result = await invoke_mcp_tool(
+                    endpoint=mcp_tool.server_url,
+                    headers=mcp_tool.headers or {},
+                    tool_name=function_name,
+                    kwargs=tool_kwargs,
+                )
+            elif function_name == "knowledge_search":
+                response_file_search_tool = next(
+                    (t for t in ctx.response_tools if isinstance(t, OpenAIResponseInputToolFileSearch)),
+                    None,
+                )
+                if response_file_search_tool:
+                    # Use vector_stores.search API instead of knowledge_search tool
+                    # to support filters and ranking_options
+                    query = tool_kwargs.get("query", "")
+                    result = await self._execute_knowledge_search_via_vector_store(
+                        query=query,
+                        response_file_search_tool=response_file_search_tool,
+                    )
+            else:
+                result = await self.tool_runtime_api.invoke_tool(
+                    tool_name=function_name,
+                    kwargs=tool_kwargs,
+                )
+        except Exception as e:
+            error_exc = e
+
+        return error_exc, result
+
+    async def _emit_completion_events(
+        self,
+        function_name: str,
+        ctx: ChatCompletionContext,
+        sequence_number: int,
+        output_index: int,
+        item_id: str,
+        has_error: bool,
+        mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None,
+    ) -> AsyncIterator[ToolExecutionResult]:
+        """Emit completion or failure events for tool execution."""
+        completion_event = None
+
+        if mcp_tool_to_server and function_name in mcp_tool_to_server:
+            sequence_number += 1
+            if has_error:
+                completion_event = OpenAIResponseObjectStreamResponseMcpCallFailed(
+                    sequence_number=sequence_number,
+                )
+            else:
+                completion_event = OpenAIResponseObjectStreamResponseMcpCallCompleted(
+                    sequence_number=sequence_number,
+                )
+        elif function_name == "web_search":
+            sequence_number += 1
+            completion_event = OpenAIResponseObjectStreamResponseWebSearchCallCompleted(
+                item_id=item_id,
+                output_index=output_index,
+                sequence_number=sequence_number,
+            )
+        # Note: knowledge_search and other custom tools don't have specific completion events in OpenAI spec
+
+        if completion_event:
+            yield ToolExecutionResult(stream_event=completion_event, sequence_number=sequence_number)
+
+    async def _build_result_messages(
+        self,
+        function,
+        tool_call_id: str,
+        tool_kwargs: dict,
+        ctx: ChatCompletionContext,
+        error_exc: Exception | None,
+        result: any,
+        has_error: bool,
+        mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None,
+    ) -> tuple[any, any]:
+        """Build output and input messages from tool execution results."""
+        from llama_stack.providers.utils.inference.prompt_adapter import (
+            interleaved_content_as_str,
+        )
+
+        # Build output message
+        if mcp_tool_to_server and function.name in mcp_tool_to_server:
+            from llama_stack.apis.agents.openai_responses import (
+                OpenAIResponseOutputMessageMCPCall,
+            )
+
+            message = OpenAIResponseOutputMessageMCPCall(
+                id=tool_call_id,
+                arguments=function.arguments,
+                name=function.name,
+                server_label=mcp_tool_to_server[function.name].server_label,
+            )
+            if error_exc:
+                message.error = str(error_exc)
+            elif (result and result.error_code and result.error_code > 0) or (result and result.error_message):
+                message.error = f"Error (code {result.error_code}): {result.error_message}"
+            elif result and result.content:
+                message.output = interleaved_content_as_str(result.content)
+        else:
+            if function.name == "web_search":
+                message = OpenAIResponseOutputMessageWebSearchToolCall(
+                    id=tool_call_id,
+                    status="completed",
+                )
+                if has_error:
+                    message.status = "failed"
+            elif function.name == "knowledge_search":
+                message = OpenAIResponseOutputMessageFileSearchToolCall(
+                    id=tool_call_id,
+                    queries=[tool_kwargs.get("query", "")],
+                    status="completed",
+                )
+                if result and "document_ids" in result.metadata:
+                    message.results = []
+                    for i, doc_id in enumerate(result.metadata["document_ids"]):
+                        text = result.metadata["chunks"][i] if "chunks" in result.metadata else None
+                        score = result.metadata["scores"][i] if "scores" in result.metadata else None
+                        message.results.append(
+                            OpenAIResponseOutputMessageFileSearchToolCallResults(
+                                file_id=doc_id,
+                                filename=doc_id,
+                                text=text,
+                                score=score,
+                                attributes={},
+                            )
+                        )
+                if has_error:
+                    message.status = "failed"
+            else:
+                raise ValueError(f"Unknown tool {function.name} called")
+
+        # Build input message
+        input_message = None
+        if result and result.content:
+            if isinstance(result.content, str):
+                content = result.content
+            elif isinstance(result.content, list):
+                content = []
+                for item in result.content:
+                    if isinstance(item, TextContentItem):
+                        part = OpenAIChatCompletionContentPartTextParam(text=item.text)
+                    elif isinstance(item, ImageContentItem):
+                        if item.image.data:
+                            url = f"data:image;base64,{item.image.data}"
+                        else:
+                            url = item.image.url
+                        part = OpenAIChatCompletionContentPartImageParam(image_url=OpenAIImageURL(url=url))
+                    else:
+                        raise ValueError(f"Unknown result content type: {type(item)}")
+                    content.append(part)
+            else:
+                raise ValueError(f"Unknown result content type: {type(result.content)}")
+            input_message = OpenAIToolMessageParam(content=content, tool_call_id=tool_call_id)
+        else:
+            text = str(error_exc) if error_exc else "Tool execution failed"
+            input_message = OpenAIToolMessageParam(content=text, tool_call_id=tool_call_id)
+
+        return message, input_message
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/types.py b/llama_stack/providers/inline/agents/meta_reference/responses/types.py
new file mode 100644
index 000000000..89086c262
--- /dev/null
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/types.py
@@ -0,0 +1,60 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from dataclasses import dataclass
+
+from openai.types.chat import ChatCompletionToolParam
+from pydantic import BaseModel
+
+from llama_stack.apis.agents.openai_responses import (
+    OpenAIResponseInputTool,
+    OpenAIResponseObjectStream,
+    OpenAIResponseOutput,
+)
+from llama_stack.apis.inference import OpenAIChatCompletionToolCall, OpenAIMessageParam, OpenAIResponseFormatParam
+
+
+class ToolExecutionResult(BaseModel):
+    """Result of streaming tool execution."""
+
+    stream_event: OpenAIResponseObjectStream | None = None
+    sequence_number: int
+    final_output_message: OpenAIResponseOutput | None = None
+    final_input_message: OpenAIMessageParam | None = None
+
+
+@dataclass
+class ChatCompletionResult:
+    """Result of processing streaming chat completion chunks."""
+
+    response_id: str
+    content: list[str]
+    tool_calls: dict[int, OpenAIChatCompletionToolCall]
+    created: int
+    model: str
+    finish_reason: str
+    message_item_id: str  # For streaming events
+    tool_call_item_ids: dict[int, str]  # For streaming events
+    content_part_emitted: bool  # Tracking state
+
+    @property
+    def content_text(self) -> str:
+        """Get joined content as string."""
+        return "".join(self.content)
+
+    @property
+    def has_tool_calls(self) -> bool:
+        """Check if there are any tool calls."""
+        return bool(self.tool_calls)
+
+
+class ChatCompletionContext(BaseModel):
+    model: str
+    messages: list[OpenAIMessageParam]
+    response_tools: list[OpenAIResponseInputTool] | None = None
+    chat_tools: list[ChatCompletionToolParam] | None = None
+    temperature: float | None
+    response_format: OpenAIResponseFormatParam
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/utils.py b/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
new file mode 100644
index 000000000..1507a55c8
--- /dev/null
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
@@ -0,0 +1,169 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import uuid
+
+from llama_stack.apis.agents.openai_responses import (
+    OpenAIResponseInput,
+    OpenAIResponseInputFunctionToolCallOutput,
+    OpenAIResponseInputMessageContent,
+    OpenAIResponseInputMessageContentImage,
+    OpenAIResponseInputMessageContentText,
+    OpenAIResponseInputTool,
+    OpenAIResponseMessage,
+    OpenAIResponseOutputMessageContent,
+    OpenAIResponseOutputMessageContentOutputText,
+    OpenAIResponseOutputMessageFunctionToolCall,
+    OpenAIResponseText,
+)
+from llama_stack.apis.inference import (
+    OpenAIAssistantMessageParam,
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartParam,
+    OpenAIChatCompletionContentPartTextParam,
+    OpenAIChatCompletionToolCall,
+    OpenAIChatCompletionToolCallFunction,
+    OpenAIChoice,
+    OpenAIDeveloperMessageParam,
+    OpenAIImageURL,
+    OpenAIJSONSchema,
+    OpenAIMessageParam,
+    OpenAIResponseFormatJSONObject,
+    OpenAIResponseFormatJSONSchema,
+    OpenAIResponseFormatParam,
+    OpenAIResponseFormatText,
+    OpenAISystemMessageParam,
+    OpenAIToolMessageParam,
+    OpenAIUserMessageParam,
+)
+
+
+async def convert_chat_choice_to_response_message(choice: OpenAIChoice) -> OpenAIResponseMessage:
+    """Convert an OpenAI Chat Completion choice into an OpenAI Response output message."""
+    output_content = ""
+    if isinstance(choice.message.content, str):
+        output_content = choice.message.content
+    elif isinstance(choice.message.content, OpenAIChatCompletionContentPartTextParam):
+        output_content = choice.message.content.text
+    else:
+        raise ValueError(
+            f"Llama Stack OpenAI Responses does not yet support output content type: {type(choice.message.content)}"
+        )
+
+    return OpenAIResponseMessage(
+        id=f"msg_{uuid.uuid4()}",
+        content=[OpenAIResponseOutputMessageContentOutputText(text=output_content)],
+        status="completed",
+        role="assistant",
+    )
+
+
+async def convert_response_content_to_chat_content(
+    content: (str | list[OpenAIResponseInputMessageContent] | list[OpenAIResponseOutputMessageContent]),
+) -> str | list[OpenAIChatCompletionContentPartParam]:
+    """
+    Convert the content parts from an OpenAI Response API request into OpenAI Chat Completion content parts.
+
+    The content schemas of each API look similar, but are not exactly the same.
+    """
+    if isinstance(content, str):
+        return content
+
+    converted_parts = []
+    for content_part in content:
+        if isinstance(content_part, OpenAIResponseInputMessageContentText):
+            converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text))
+        elif isinstance(content_part, OpenAIResponseOutputMessageContentOutputText):
+            converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text))
+        elif isinstance(content_part, OpenAIResponseInputMessageContentImage):
+            if content_part.image_url:
+                image_url = OpenAIImageURL(url=content_part.image_url, detail=content_part.detail)
+                converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
+        elif isinstance(content_part, str):
+            converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part))
+        else:
+            raise ValueError(
+                f"Llama Stack OpenAI Responses does not yet support content type '{type(content_part)}' in this context"
+            )
+    return converted_parts
+
+
+async def convert_response_input_to_chat_messages(
+    input: str | list[OpenAIResponseInput],
+) -> list[OpenAIMessageParam]:
+    """
+    Convert the input from an OpenAI Response API request into OpenAI Chat Completion messages.
+    """
+    messages: list[OpenAIMessageParam] = []
+    if isinstance(input, list):
+        for input_item in input:
+            if isinstance(input_item, OpenAIResponseInputFunctionToolCallOutput):
+                messages.append(
+                    OpenAIToolMessageParam(
+                        content=input_item.output,
+                        tool_call_id=input_item.call_id,
+                    )
+                )
+            elif isinstance(input_item, OpenAIResponseOutputMessageFunctionToolCall):
+                tool_call = OpenAIChatCompletionToolCall(
+                    index=0,
+                    id=input_item.call_id,
+                    function=OpenAIChatCompletionToolCallFunction(
+                        name=input_item.name,
+                        arguments=input_item.arguments,
+                    ),
+                )
+                messages.append(OpenAIAssistantMessageParam(tool_calls=[tool_call]))
+            else:
+                content = await convert_response_content_to_chat_content(input_item.content)
+                message_type = await get_message_type_by_role(input_item.role)
+                if message_type is None:
+                    raise ValueError(
+                        f"Llama Stack OpenAI Responses does not yet support message role '{input_item.role}' in this context"
+                    )
+                messages.append(message_type(content=content))
+    else:
+        messages.append(OpenAIUserMessageParam(content=input))
+    return messages
+
+
+async def convert_response_text_to_chat_response_format(
+    text: OpenAIResponseText,
+) -> OpenAIResponseFormatParam:
+    """
+    Convert an OpenAI Response text parameter into an OpenAI Chat Completion response format.
+    """
+    if not text.format or text.format["type"] == "text":
+        return OpenAIResponseFormatText(type="text")
+    if text.format["type"] == "json_object":
+        return OpenAIResponseFormatJSONObject()
+    if text.format["type"] == "json_schema":
+        return OpenAIResponseFormatJSONSchema(
+            json_schema=OpenAIJSONSchema(name=text.format["name"], schema=text.format["schema"])
+        )
+    raise ValueError(f"Unsupported text format: {text.format}")
+
+
+async def get_message_type_by_role(role: str):
+    role_to_type = {
+        "user": OpenAIUserMessageParam,
+        "system": OpenAISystemMessageParam,
+        "assistant": OpenAIAssistantMessageParam,
+        "developer": OpenAIDeveloperMessageParam,
+    }
+    return role_to_type.get(role)
+
+
+def is_function_tool_call(
+    tool_call: OpenAIChatCompletionToolCall,
+    tools: list[OpenAIResponseInputTool],
+) -> bool:
+    if not tool_call.function:
+        return False
+    for t in tools:
+        if t.type == "function" and t.name == tool_call.function.name:
+            return True
+    return False
diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py
index 9a77c8cc4..6297cc2ed 100644
--- a/llama_stack/providers/utils/inference/openai_compat.py
+++ b/llama_stack/providers/utils/inference/openai_compat.py
@@ -31,15 +31,15 @@ from openai.types.chat import (
 from openai.types.chat import (
     ChatCompletionContentPartTextParam as OpenAIChatCompletionContentPartTextParam,
 )
+from openai.types.chat import (
+    ChatCompletionMessageFunctionToolCall as OpenAIChatCompletionMessageFunctionToolCall,
+)
 from openai.types.chat import (
     ChatCompletionMessageParam as OpenAIChatCompletionMessage,
 )
 from openai.types.chat import (
     ChatCompletionMessageToolCall,
 )
-from openai.types.chat import (
-    ChatCompletionMessageToolCallParam as OpenAIChatCompletionMessageToolCall,
-)
 from openai.types.chat import (
     ChatCompletionSystemMessageParam as OpenAIChatCompletionSystemMessage,
 )
@@ -633,7 +633,7 @@ async def convert_message_to_openai_dict_new(
         )
     elif isinstance(message, CompletionMessage):
         tool_calls = [
-            OpenAIChatCompletionMessageToolCall(
+            OpenAIChatCompletionMessageFunctionToolCall(
                 id=tool.call_id,
                 function=OpenAIFunction(
                     name=(tool.tool_name if not isinstance(tool.tool_name, BuiltinTool) else tool.tool_name.value),
@@ -903,7 +903,7 @@ def _convert_openai_request_response_format(
 
 
 def _convert_openai_tool_calls(
-    tool_calls: list[OpenAIChatCompletionMessageToolCall],
+    tool_calls: list[OpenAIChatCompletionMessageFunctionToolCall],
 ) -> list[ToolCall]:
     """
     Convert an OpenAI ChatCompletionMessageToolCall list into a list of ToolCall.
diff --git a/llama_stack/ui/.nvmrc b/llama_stack/ui/.nvmrc
new file mode 100644
index 000000000..1384ff6a1
--- /dev/null
+++ b/llama_stack/ui/.nvmrc
@@ -0,0 +1 @@
+22.5.1
diff --git a/llama_stack/ui/.prettierignore b/llama_stack/ui/.prettierignore
index 1b8ac8894..b737ae6ed 100644
--- a/llama_stack/ui/.prettierignore
+++ b/llama_stack/ui/.prettierignore
@@ -1,3 +1,12 @@
 # Ignore artifacts:
 build
 coverage
+.next
+node_modules
+dist
+*.lock
+*.log
+
+# Generated files
+*.min.js
+*.min.css
diff --git a/llama_stack/ui/.prettierrc b/llama_stack/ui/.prettierrc
index 0967ef424..059475a24 100644
--- a/llama_stack/ui/.prettierrc
+++ b/llama_stack/ui/.prettierrc
@@ -1 +1,10 @@
-{}
+{
+  "semi": true,
+  "trailingComma": "es5",
+  "singleQuote": false,
+  "printWidth": 80,
+  "tabWidth": 2,
+  "useTabs": false,
+  "bracketSpacing": true,
+  "arrowParens": "avoid"
+}
diff --git a/llama_stack/ui/app/api/v1/[...path]/route.ts b/llama_stack/ui/app/api/v1/[...path]/route.ts
index 1959f9099..51c1f8004 100644
--- a/llama_stack/ui/app/api/v1/[...path]/route.ts
+++ b/llama_stack/ui/app/api/v1/[...path]/route.ts
@@ -47,7 +47,7 @@ async function proxyRequest(request: NextRequest, method: string) {
     const responseText = await response.text();
 
     console.log(
-      `Response from FastAPI: ${response.status} ${response.statusText}`,
+      `Response from FastAPI: ${response.status} ${response.statusText}`
     );
 
     // Create response with same status and headers
@@ -74,7 +74,7 @@ async function proxyRequest(request: NextRequest, method: string) {
         backend_url: BACKEND_URL,
         timestamp: new Date().toISOString(),
       },
-      { status: 500 },
+      { status: 500 }
     );
   }
 }
diff --git a/llama_stack/ui/app/auth/signin/page.tsx b/llama_stack/ui/app/auth/signin/page.tsx
index c9510fd6b..0ccb4a397 100644
--- a/llama_stack/ui/app/auth/signin/page.tsx
+++ b/llama_stack/ui/app/auth/signin/page.tsx
@@ -51,9 +51,9 @@ export default function SignInPage() {
               onClick={() => {
                 console.log("Signing in with GitHub...");
                 signIn("github", { callbackUrl: "/auth/signin" }).catch(
-                  (error) => {
+                  error => {
                     console.error("Sign in error:", error);
-                  },
+                  }
                 );
               }}
               className="w-full"
diff --git a/llama_stack/ui/app/chat-playground/page.tsx b/llama_stack/ui/app/chat-playground/page.tsx
index d8094af85..b8651aca0 100644
--- a/llama_stack/ui/app/chat-playground/page.tsx
+++ b/llama_stack/ui/app/chat-playground/page.tsx
@@ -29,14 +29,13 @@ export default function ChatPlaygroundPage() {
 
   const isModelsLoading = modelsLoading ?? true;
 
-
   useEffect(() => {
     const fetchModels = async () => {
       try {
         setModelsLoading(true);
         setModelsError(null);
         const modelList = await client.models.list();
-        const llmModels = modelList.filter(model => model.model_type === 'llm');
+        const llmModels = modelList.filter(model => model.model_type === "llm");
         setModels(llmModels);
         if (llmModels.length > 0) {
           setSelectedModel(llmModels[0].identifier);
@@ -53,103 +52,122 @@ export default function ChatPlaygroundPage() {
   }, [client]);
 
   const extractTextContent = (content: unknown): string => {
-    if (typeof content === 'string') {
+    if (typeof content === "string") {
       return content;
     }
     if (Array.isArray(content)) {
       return content
-        .filter(item => item && typeof item === 'object' && 'type' in item && item.type === 'text')
-        .map(item => (item && typeof item === 'object' && 'text' in item) ? String(item.text) : '')
-        .join('');
+        .filter(
+          item =>
+            item &&
+            typeof item === "object" &&
+            "type" in item &&
+            item.type === "text"
+        )
+        .map(item =>
+          item && typeof item === "object" && "text" in item
+            ? String(item.text)
+            : ""
+        )
+        .join("");
     }
-    if (content && typeof content === 'object' && 'type' in content && content.type === 'text' && 'text' in content) {
-      return String(content.text) || '';
+    if (
+      content &&
+      typeof content === "object" &&
+      "type" in content &&
+      content.type === "text" &&
+      "text" in content
+    ) {
+      return String(content.text) || "";
     }
-    return '';
+    return "";
   };
 
   const handleInputChange = (e: React.ChangeEvent<HTMLTextAreaElement>) => {
     setInput(e.target.value);
   };
 
-const handleSubmit = async (event?: { preventDefault?: () => void }) => {
-  event?.preventDefault?.();
-  if (!input.trim()) return;
+  const handleSubmit = async (event?: { preventDefault?: () => void }) => {
+    event?.preventDefault?.();
+    if (!input.trim()) return;
 
-  // Add user message to chat
-  const userMessage: Message = {
-    id: Date.now().toString(),
-    role: "user",
-    content: input.trim(),
-    createdAt: new Date(),
-  };
-
-  setMessages(prev => [...prev, userMessage]);
-  setInput("");
-
-  // Use the helper function with the content
-  await handleSubmitWithContent(userMessage.content);
-};
-
-const handleSubmitWithContent = async (content: string) => {
-  setIsGenerating(true);
-  setError(null);
-
-  try {
-    const messageParams: CompletionCreateParams["messages"] = [
-      ...messages.map(msg => {
-        const msgContent = typeof msg.content === 'string' ? msg.content : extractTextContent(msg.content);
-        if (msg.role === "user") {
-          return { role: "user" as const, content: msgContent };
-        } else if (msg.role === "assistant") {
-          return { role: "assistant" as const, content: msgContent };
-        } else {
-          return { role: "system" as const, content: msgContent };
-        }
-      }),
-      { role: "user" as const, content }
-    ];
-
-    const response = await client.chat.completions.create({
-      model: selectedModel,
-      messages: messageParams,
-      stream: true,
-    });
-
-    const assistantMessage: Message = {
-      id: (Date.now() + 1).toString(),
-      role: "assistant",
-      content: "",
+    // Add user message to chat
+    const userMessage: Message = {
+      id: Date.now().toString(),
+      role: "user",
+      content: input.trim(),
       createdAt: new Date(),
     };
 
-    setMessages(prev => [...prev, assistantMessage]);
-    let fullContent = "";
-    for await (const chunk of response) {
-      if (chunk.choices && chunk.choices[0]?.delta?.content) {
-        const deltaContent = chunk.choices[0].delta.content;
-        fullContent += deltaContent;
+    setMessages(prev => [...prev, userMessage]);
+    setInput("");
 
-        flushSync(() => {
-          setMessages(prev => {
-            const newMessages = [...prev];
-            const lastMessage = newMessages[newMessages.length - 1];
-            if (lastMessage.role === "assistant") {
-              lastMessage.content = fullContent;
-            }
-            return newMessages;
+    // Use the helper function with the content
+    await handleSubmitWithContent(userMessage.content);
+  };
+
+  const handleSubmitWithContent = async (content: string) => {
+    setIsGenerating(true);
+    setError(null);
+
+    try {
+      const messageParams: CompletionCreateParams["messages"] = [
+        ...messages.map(msg => {
+          const msgContent =
+            typeof msg.content === "string"
+              ? msg.content
+              : extractTextContent(msg.content);
+          if (msg.role === "user") {
+            return { role: "user" as const, content: msgContent };
+          } else if (msg.role === "assistant") {
+            return { role: "assistant" as const, content: msgContent };
+          } else {
+            return { role: "system" as const, content: msgContent };
+          }
+        }),
+        { role: "user" as const, content },
+      ];
+
+      const response = await client.chat.completions.create({
+        model: selectedModel,
+        messages: messageParams,
+        stream: true,
+      });
+
+      const assistantMessage: Message = {
+        id: (Date.now() + 1).toString(),
+        role: "assistant",
+        content: "",
+        createdAt: new Date(),
+      };
+
+      setMessages(prev => [...prev, assistantMessage]);
+      let fullContent = "";
+      for await (const chunk of response) {
+        if (chunk.choices && chunk.choices[0]?.delta?.content) {
+          const deltaContent = chunk.choices[0].delta.content;
+          fullContent += deltaContent;
+
+          flushSync(() => {
+            setMessages(prev => {
+              const newMessages = [...prev];
+              const lastMessage = newMessages[newMessages.length - 1];
+              if (lastMessage.role === "assistant") {
+                lastMessage.content = fullContent;
+              }
+              return newMessages;
+            });
           });
-        });
+        }
       }
+    } catch (err) {
+      console.error("Error sending message:", err);
+      setError("Failed to send message. Please try again.");
+      setMessages(prev => prev.slice(0, -1));
+    } finally {
+      setIsGenerating(false);
     }
-  } catch (err) {
-    console.error("Error sending message:", err);
-    setError("Failed to send message. Please try again.");
-    setMessages(prev => prev.slice(0, -1));
-  } finally {
-    setIsGenerating(false);
-  }
-};
+  };
   const suggestions = [
     "Write a Python function that prints 'Hello, World!'",
     "Explain step-by-step how to solve this math problem: If x² + 6x + 9 = 25, what is x?",
@@ -163,7 +181,7 @@ const handleSubmitWithContent = async (content: string) => {
       content: message.content,
       createdAt: new Date(),
     };
-    setMessages(prev => [...prev, newMessage])
+    setMessages(prev => [...prev, newMessage]);
     handleSubmitWithContent(newMessage.content);
   };
 
@@ -177,12 +195,20 @@ const handleSubmitWithContent = async (content: string) => {
       <div className="mb-4 flex justify-between items-center">
         <h1 className="text-2xl font-bold">Chat Playground (Completions)</h1>
         <div className="flex gap-2">
-          <Select value={selectedModel} onValueChange={setSelectedModel} disabled={isModelsLoading || isGenerating}>
+          <Select
+            value={selectedModel}
+            onValueChange={setSelectedModel}
+            disabled={isModelsLoading || isGenerating}
+          >
             <SelectTrigger className="w-[180px]">
-              <SelectValue placeholder={isModelsLoading ? "Loading models..." : "Select Model"} />
+              <SelectValue
+                placeholder={
+                  isModelsLoading ? "Loading models..." : "Select Model"
+                }
+              />
             </SelectTrigger>
             <SelectContent>
-              {models.map((model) => (
+              {models.map(model => (
                 <SelectItem key={model.identifier} value={model.identifier}>
                   {model.identifier}
                 </SelectItem>
diff --git a/llama_stack/ui/app/logs/chat-completions/[id]/page.tsx b/llama_stack/ui/app/logs/chat-completions/[id]/page.tsx
index 82aa3496e..e11924f4c 100644
--- a/llama_stack/ui/app/logs/chat-completions/[id]/page.tsx
+++ b/llama_stack/ui/app/logs/chat-completions/[id]/page.tsx
@@ -33,12 +33,12 @@ export default function ChatCompletionDetailPage() {
       } catch (err) {
         console.error(
           `Error fetching chat completion detail for ID ${id}:`,
-          err,
+          err
         );
         setError(
           err instanceof Error
             ? err
-            : new Error("Failed to fetch completion detail"),
+            : new Error("Failed to fetch completion detail")
         );
       } finally {
         setIsLoading(false);
diff --git a/llama_stack/ui/app/logs/responses/[id]/page.tsx b/llama_stack/ui/app/logs/responses/[id]/page.tsx
index 7f4252856..922d35531 100644
--- a/llama_stack/ui/app/logs/responses/[id]/page.tsx
+++ b/llama_stack/ui/app/logs/responses/[id]/page.tsx
@@ -13,10 +13,10 @@ export default function ResponseDetailPage() {
   const client = useAuthClient();
 
   const [responseDetail, setResponseDetail] = useState<OpenAIResponse | null>(
-    null,
+    null
   );
   const [inputItems, setInputItems] = useState<InputItemListResponse | null>(
-    null,
+    null
   );
   const [isLoading, setIsLoading] = useState<boolean>(true);
   const [isLoadingInputItems, setIsLoadingInputItems] = useState<boolean>(true);
@@ -25,7 +25,7 @@ export default function ResponseDetailPage() {
 
   // Helper function to convert ResponseObject to OpenAIResponse
   const convertResponseObject = (
-    responseData: ResponseObject,
+    responseData: ResponseObject
   ): OpenAIResponse => {
     return {
       id: responseData.id,
@@ -73,12 +73,12 @@ export default function ResponseDetailPage() {
         } else {
           console.error(
             `Error fetching response detail for ID ${id}:`,
-            responseResult.reason,
+            responseResult.reason
           );
           setError(
             responseResult.reason instanceof Error
               ? responseResult.reason
-              : new Error("Failed to fetch response detail"),
+              : new Error("Failed to fetch response detail")
           );
         }
 
@@ -90,18 +90,18 @@ export default function ResponseDetailPage() {
         } else {
           console.error(
             `Error fetching input items for response ID ${id}:`,
-            inputItemsResult.reason,
+            inputItemsResult.reason
           );
           setInputItemsError(
             inputItemsResult.reason instanceof Error
               ? inputItemsResult.reason
-              : new Error("Failed to fetch input items"),
+              : new Error("Failed to fetch input items")
           );
         }
       } catch (err) {
         console.error(`Unexpected error fetching data for ID ${id}:`, err);
         setError(
-          err instanceof Error ? err : new Error("Unexpected error occurred"),
+          err instanceof Error ? err : new Error("Unexpected error occurred")
         );
       } finally {
         setIsLoading(false);
diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.tsx b/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.tsx
index 6896b992a..d58de3085 100644
--- a/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.tsx
+++ b/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.tsx
@@ -18,7 +18,10 @@ import {
   PropertiesCard,
   PropertyItem,
 } from "@/components/layout/detail-layout";
-import { PageBreadcrumb, BreadcrumbSegment } from "@/components/layout/page-breadcrumb";
+import {
+  PageBreadcrumb,
+  BreadcrumbSegment,
+} from "@/components/layout/page-breadcrumb";
 
 export default function ContentDetailPage() {
   const params = useParams();
@@ -28,13 +31,13 @@ export default function ContentDetailPage() {
   const contentId = params.contentId as string;
   const client = useAuthClient();
 
-  const getTextFromContent = (content: any): string => {
-    if (typeof content === 'string') {
+  const getTextFromContent = (content: unknown): string => {
+    if (typeof content === "string") {
       return content;
-    } else if (content && content.type === 'text') {
+    } else if (content && content.type === "text") {
       return content.text;
     }
-    return '';
+    return "";
   };
 
   const [store, setStore] = useState<VectorStore | null>(null);
@@ -44,7 +47,9 @@ export default function ContentDetailPage() {
   const [error, setError] = useState<Error | null>(null);
   const [isEditing, setIsEditing] = useState(false);
   const [editedContent, setEditedContent] = useState("");
-  const [editedMetadata, setEditedMetadata] = useState<Record<string, any>>({});
+  const [editedMetadata, setEditedMetadata] = useState<Record<string, unknown>>(
+    {}
+  );
   const [isEditingEmbedding, setIsEditingEmbedding] = useState(false);
   const [editedEmbedding, setEditedEmbedding] = useState<number[]>([]);
 
@@ -64,8 +69,13 @@ export default function ContentDetailPage() {
         setFile(fileResponse as VectorStoreFile);
 
         const contentsAPI = new ContentsAPI(client);
-        const contentsResponse = await contentsAPI.listContents(vectorStoreId, fileId);
-        const targetContent = contentsResponse.data.find(c => c.id === contentId);
+        const contentsResponse = await contentsAPI.listContents(
+          vectorStoreId,
+          fileId
+        );
+        const targetContent = contentsResponse.data.find(
+          c => c.id === contentId
+        );
 
         if (targetContent) {
           setContent(targetContent);
@@ -76,7 +86,9 @@ export default function ContentDetailPage() {
           throw new Error(`Content ${contentId} not found`);
         }
       } catch (err) {
-        setError(err instanceof Error ? err : new Error("Failed to load content."));
+        setError(
+          err instanceof Error ? err : new Error("Failed to load content.")
+        );
       } finally {
         setIsLoading(false);
       }
@@ -88,7 +100,8 @@ export default function ContentDetailPage() {
     if (!content) return;
 
     try {
-      const updates: { content?: string; metadata?: Record<string, any> } = {};
+      const updates: { content?: string; metadata?: Record<string, unknown> } =
+        {};
 
       if (editedContent !== getTextFromContent(content.content)) {
         updates.content = editedContent;
@@ -100,25 +113,32 @@ export default function ContentDetailPage() {
 
       if (Object.keys(updates).length > 0) {
         const contentsAPI = new ContentsAPI(client);
-        const updatedContent = await contentsAPI.updateContent(vectorStoreId, fileId, contentId, updates);
+        const updatedContent = await contentsAPI.updateContent(
+          vectorStoreId,
+          fileId,
+          contentId,
+          updates
+        );
         setContent(updatedContent);
       }
 
       setIsEditing(false);
     } catch (err) {
-      console.error('Failed to update content:', err);
+      console.error("Failed to update content:", err);
     }
   };
 
   const handleDelete = async () => {
-    if (!confirm('Are you sure you want to delete this content?')) return;
+    if (!confirm("Are you sure you want to delete this content?")) return;
 
     try {
       const contentsAPI = new ContentsAPI(client);
       await contentsAPI.deleteContent(vectorStoreId, fileId, contentId);
-      router.push(`/logs/vector-stores/${vectorStoreId}/files/${fileId}/contents`);
+      router.push(
+        `/logs/vector-stores/${vectorStoreId}/files/${fileId}/contents`
+      );
     } catch (err) {
-      console.error('Failed to delete content:', err);
+      console.error("Failed to delete content:", err);
     }
   };
 
@@ -134,10 +154,19 @@ export default function ContentDetailPage() {
 
   const breadcrumbSegments: BreadcrumbSegment[] = [
     { label: "Vector Stores", href: "/logs/vector-stores" },
-    { label: store?.name || vectorStoreId, href: `/logs/vector-stores/${vectorStoreId}` },
+    {
+      label: store?.name || vectorStoreId,
+      href: `/logs/vector-stores/${vectorStoreId}`,
+    },
     { label: "Files", href: `/logs/vector-stores/${vectorStoreId}` },
-    { label: fileId, href: `/logs/vector-stores/${vectorStoreId}/files/${fileId}` },
-    { label: "Contents", href: `/logs/vector-stores/${vectorStoreId}/files/${fileId}/contents` },
+    {
+      label: fileId,
+      href: `/logs/vector-stores/${vectorStoreId}/files/${fileId}`,
+    },
+    {
+      label: "Contents",
+      href: `/logs/vector-stores/${vectorStoreId}/files/${fileId}/contents`,
+    },
     { label: contentId },
   ];
 
@@ -186,7 +215,7 @@ export default function ContentDetailPage() {
           {isEditing ? (
             <textarea
               value={editedContent}
-              onChange={(e) => setEditedContent(e.target.value)}
+              onChange={e => setEditedContent(e.target.value)}
               className="w-full h-64 p-3 border rounded-md resize-none font-mono text-sm"
               placeholder="Enter content..."
             />
@@ -206,16 +235,23 @@ export default function ContentDetailPage() {
           <div className="flex gap-2">
             {isEditingEmbedding ? (
               <>
-                <Button size="sm" onClick={() => {
-                  setIsEditingEmbedding(false);
-                }}>
+                <Button
+                  size="sm"
+                  onClick={() => {
+                    setIsEditingEmbedding(false);
+                  }}
+                >
                   <Save className="h-4 w-4 mr-1" />
                   Save
                 </Button>
-                <Button size="sm" variant="outline" onClick={() => {
-                  setEditedEmbedding(content?.embedding || []);
-                  setIsEditingEmbedding(false);
-                }}>
+                <Button
+                  size="sm"
+                  variant="outline"
+                  onClick={() => {
+                    setEditedEmbedding(content?.embedding || []);
+                    setIsEditingEmbedding(false);
+                  }}
+                >
                   <X className="h-4 w-4 mr-1" />
                   Cancel
                 </Button>
@@ -237,14 +273,16 @@ export default function ContentDetailPage() {
                 </p>
                 <textarea
                   value={JSON.stringify(editedEmbedding, null, 2)}
-                  onChange={(e) => {
+                  onChange={e => {
                     try {
                       const parsed = JSON.parse(e.target.value);
-                      if (Array.isArray(parsed) && parsed.every(v => typeof v === 'number')) {
+                      if (
+                        Array.isArray(parsed) &&
+                        parsed.every(v => typeof v === "number")
+                      ) {
                         setEditedEmbedding(parsed);
                       }
-                    } catch {
-                    }
+                    } catch {}
                   }}
                   className="w-full h-32 p-3 border rounded-md resize-none font-mono text-xs"
                   placeholder="Enter embedding as JSON array..."
@@ -259,8 +297,15 @@ export default function ContentDetailPage() {
                 </div>
                 <div className="p-3 bg-gray-50 dark:bg-gray-800 rounded-md max-h-32 overflow-y-auto">
                   <pre className="whitespace-pre-wrap font-mono text-xs text-gray-900 dark:text-gray-100">
-                    [{content.embedding.slice(0, 20).map(v => v.toFixed(6)).join(', ')}
-                    {content.embedding.length > 20 ? `\n... and ${content.embedding.length - 20} more values` : ''}]
+                    [
+                    {content.embedding
+                      .slice(0, 20)
+                      .map(v => v.toFixed(6))
+                      .join(", ")}
+                    {content.embedding.length > 20
+                      ? `\n... and ${content.embedding.length - 20} more values`
+                      : ""}
+                    ]
                   </pre>
                 </div>
               </div>
@@ -284,7 +329,7 @@ export default function ContentDetailPage() {
                 <div key={key} className="flex gap-2">
                   <Input
                     value={key}
-                    onChange={(e) => {
+                    onChange={e => {
                       const newMetadata = { ...editedMetadata };
                       delete newMetadata[key];
                       newMetadata[e.target.value] = value;
@@ -294,11 +339,13 @@ export default function ContentDetailPage() {
                     className="flex-1"
                   />
                   <Input
-                    value={typeof value === 'string' ? value : JSON.stringify(value)}
-                    onChange={(e) => {
+                    value={
+                      typeof value === "string" ? value : JSON.stringify(value)
+                    }
+                    onChange={e => {
                       setEditedMetadata({
                         ...editedMetadata,
-                        [key]: e.target.value
+                        [key]: e.target.value,
                       });
                     }}
                     placeholder="Value"
@@ -312,7 +359,7 @@ export default function ContentDetailPage() {
                 onClick={() => {
                   setEditedMetadata({
                     ...editedMetadata,
-                    ['']: ''
+                    [""]: "",
                   });
                 }}
               >
@@ -325,7 +372,7 @@ export default function ContentDetailPage() {
                 <div key={key} className="flex justify-between py-1">
                   <span className="font-medium text-gray-600">{key}:</span>
                   <span className="font-mono text-sm">
-                    {typeof value === 'string' ? value : JSON.stringify(value)}
+                    {typeof value === "string" ? value : JSON.stringify(value)}
                   </span>
                 </div>
               ))}
@@ -351,15 +398,15 @@ export default function ContentDetailPage() {
         value={`${getTextFromContent(content.content).length} chars`}
       />
       {content.metadata.chunk_window && (
-        <PropertyItem
-          label="Position"
-          value={content.metadata.chunk_window}
-        />
+        <PropertyItem label="Position" value={content.metadata.chunk_window} />
       )}
       {file && (
         <>
           <PropertyItem label="File Status" value={file.status} />
-          <PropertyItem label="File Usage" value={`${file.usage_bytes} bytes`} />
+          <PropertyItem
+            label="File Usage"
+            value={`${file.usage_bytes} bytes`}
+          />
         </>
       )}
       {store && (
diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.tsx b/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.tsx
index d43223c6c..0283db9e7 100644
--- a/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.tsx
+++ b/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.tsx
@@ -18,7 +18,10 @@ import {
   PropertiesCard,
   PropertyItem,
 } from "@/components/layout/detail-layout";
-import { PageBreadcrumb, BreadcrumbSegment } from "@/components/layout/page-breadcrumb";
+import {
+  PageBreadcrumb,
+  BreadcrumbSegment,
+} from "@/components/layout/page-breadcrumb";
 import {
   Table,
   TableBody,
@@ -36,23 +39,21 @@ export default function ContentsListPage() {
   const fileId = params.fileId as string;
   const client = useAuthClient();
 
-  const getTextFromContent = (content: any): string => {
-    if (typeof content === 'string') {
+  const getTextFromContent = (content: unknown): string => {
+    if (typeof content === "string") {
       return content;
-    } else if (content && content.type === 'text') {
+    } else if (content && content.type === "text") {
       return content.text;
     }
-    return '';
+    return "";
   };
 
   const [store, setStore] = useState<VectorStore | null>(null);
   const [file, setFile] = useState<VectorStoreFile | null>(null);
   const [contents, setContents] = useState<VectorStoreContentItem[]>([]);
   const [isLoadingStore, setIsLoadingStore] = useState(true);
-  const [isLoadingFile, setIsLoadingFile] = useState(true);
   const [isLoadingContents, setIsLoadingContents] = useState(true);
   const [errorStore, setErrorStore] = useState<Error | null>(null);
-  const [errorFile, setErrorFile] = useState<Error | null>(null);
   const [errorContents, setErrorContents] = useState<Error | null>(null);
 
   useEffect(() => {
@@ -65,7 +66,9 @@ export default function ContentsListPage() {
         const response = await client.vectorStores.retrieve(vectorStoreId);
         setStore(response as VectorStore);
       } catch (err) {
-        setErrorStore(err instanceof Error ? err : new Error("Failed to load vector store."));
+        setErrorStore(
+          err instanceof Error ? err : new Error("Failed to load vector store.")
+        );
       } finally {
         setIsLoadingStore(false);
       }
@@ -80,10 +83,15 @@ export default function ContentsListPage() {
       setIsLoadingFile(true);
       setErrorFile(null);
       try {
-        const response = await client.vectorStores.files.retrieve(vectorStoreId, fileId);
+        const response = await client.vectorStores.files.retrieve(
+          vectorStoreId,
+          fileId
+        );
         setFile(response as VectorStoreFile);
       } catch (err) {
-        setErrorFile(err instanceof Error ? err : new Error("Failed to load file."));
+        setErrorFile(
+          err instanceof Error ? err : new Error("Failed to load file.")
+        );
       } finally {
         setIsLoadingFile(false);
       }
@@ -99,10 +107,16 @@ export default function ContentsListPage() {
       setErrorContents(null);
       try {
         const contentsAPI = new ContentsAPI(client);
-        const contentsResponse = await contentsAPI.listContents(vectorStoreId, fileId, { limit: 100 });
+        const contentsResponse = await contentsAPI.listContents(
+          vectorStoreId,
+          fileId,
+          { limit: 100 }
+        );
         setContents(contentsResponse.data);
       } catch (err) {
-        setErrorContents(err instanceof Error ? err : new Error("Failed to load contents."));
+        setErrorContents(
+          err instanceof Error ? err : new Error("Failed to load contents.")
+        );
       } finally {
         setIsLoadingContents(false);
       }
@@ -116,26 +130,36 @@ export default function ContentsListPage() {
       await contentsAPI.deleteContent(vectorStoreId, fileId, contentId);
       setContents(contents.filter(content => content.id !== contentId));
     } catch (err) {
-      console.error('Failed to delete content:', err);
+      console.error("Failed to delete content:", err);
     }
   };
 
   const handleViewContent = (contentId: string) => {
-    router.push(`/logs/vector-stores/${vectorStoreId}/files/${fileId}/contents/${contentId}`);
+    router.push(
+      `/logs/vector-stores/${vectorStoreId}/files/${fileId}/contents/${contentId}`
+    );
   };
 
   const title = `Contents in File: ${fileId}`;
 
   const breadcrumbSegments: BreadcrumbSegment[] = [
     { label: "Vector Stores", href: "/logs/vector-stores" },
-    { label: store?.name || vectorStoreId, href: `/logs/vector-stores/${vectorStoreId}` },
+    {
+      label: store?.name || vectorStoreId,
+      href: `/logs/vector-stores/${vectorStoreId}`,
+    },
     { label: "Files", href: `/logs/vector-stores/${vectorStoreId}` },
-    { label: fileId, href: `/logs/vector-stores/${vectorStoreId}/files/${fileId}` },
+    {
+      label: fileId,
+      href: `/logs/vector-stores/${vectorStoreId}/files/${fileId}`,
+    },
     { label: "Contents" },
   ];
 
   if (errorStore) {
-    return <DetailErrorView title={title} id={vectorStoreId} error={errorStore} />;
+    return (
+      <DetailErrorView title={title} id={vectorStoreId} error={errorStore} />
+    );
   }
   if (isLoadingStore) {
     return <DetailLoadingView title={title} />;
@@ -175,7 +199,7 @@ export default function ContentsListPage() {
                 </TableRow>
               </TableHeader>
               <TableBody>
-                {contents.map((content) => (
+                {contents.map(content => (
                   <TableRow key={content.id}>
                     <TableCell className="font-mono text-xs">
                       <Button
@@ -189,7 +213,10 @@ export default function ContentsListPage() {
                     </TableCell>
                     <TableCell>
                       <div className="max-w-md">
-                        <p className="text-sm truncate" title={getTextFromContent(content.content)}>
+                        <p
+                          className="text-sm truncate"
+                          title={getTextFromContent(content.content)}
+                        >
                           {getTextFromContent(content.content)}
                         </p>
                       </div>
@@ -197,12 +224,25 @@ export default function ContentsListPage() {
                     <TableCell className="text-xs text-gray-500">
                       {content.embedding && content.embedding.length > 0 ? (
                         <div className="max-w-xs">
-                          <span className="font-mono text-xs bg-gray-100 dark:bg-gray-800 rounded px-1 py-0.5" title={`${content.embedding.length}D vector: [${content.embedding.slice(0, 3).map(v => v.toFixed(3)).join(', ')}...]`}>
-                            [{content.embedding.slice(0, 3).map(v => v.toFixed(3)).join(', ')}...] ({content.embedding.length}D)
+                          <span
+                            className="font-mono text-xs bg-gray-100 dark:bg-gray-800 rounded px-1 py-0.5"
+                            title={`${content.embedding.length}D vector: [${content.embedding
+                              .slice(0, 3)
+                              .map(v => v.toFixed(3))
+                              .join(", ")}...]`}
+                          >
+                            [
+                            {content.embedding
+                              .slice(0, 3)
+                              .map(v => v.toFixed(3))
+                              .join(", ")}
+                            ...] ({content.embedding.length}D)
                           </span>
                         </div>
                       ) : (
-                        <span className="text-gray-400 dark:text-gray-500 italic">No embedding</span>
+                        <span className="text-gray-400 dark:text-gray-500 italic">
+                          No embedding
+                        </span>
                       )}
                     </TableCell>
                     <TableCell className="text-xs text-gray-500">
@@ -211,7 +251,9 @@ export default function ContentsListPage() {
                         : `${content.metadata.content_length || 0} chars`}
                     </TableCell>
                     <TableCell className="text-xs">
-                      {new Date(content.created_timestamp * 1000).toLocaleString()}
+                      {new Date(
+                        content.created_timestamp * 1000
+                      ).toLocaleString()}
                     </TableCell>
                     <TableCell>
                       <div className="flex gap-1">
diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/page.tsx b/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/page.tsx
index bdcf76e1b..fc6ee43f5 100644
--- a/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/page.tsx
+++ b/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/page.tsx
@@ -4,9 +4,12 @@ import { useEffect, useState } from "react";
 import { useParams, useRouter } from "next/navigation";
 import { useAuthClient } from "@/hooks/use-auth-client";
 import type { VectorStore } from "llama-stack-client/resources/vector-stores/vector-stores";
-import type { VectorStoreFile, FileContentResponse } from "llama-stack-client/resources/vector-stores/files";
+import type {
+  VectorStoreFile,
+  FileContentResponse,
+} from "llama-stack-client/resources/vector-stores/files";
 import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
-import { Skeleton } from '@/components/ui/skeleton';
+import { Skeleton } from "@/components/ui/skeleton";
 import { Button } from "@/components/ui/button";
 import { List } from "lucide-react";
 import {
@@ -17,7 +20,10 @@ import {
   PropertiesCard,
   PropertyItem,
 } from "@/components/layout/detail-layout";
-import { PageBreadcrumb, BreadcrumbSegment } from "@/components/layout/page-breadcrumb";
+import {
+  PageBreadcrumb,
+  BreadcrumbSegment,
+} from "@/components/layout/page-breadcrumb";
 
 export default function FileDetailPage() {
   const params = useParams();
@@ -46,7 +52,9 @@ export default function FileDetailPage() {
         const response = await client.vectorStores.retrieve(vectorStoreId);
         setStore(response as VectorStore);
       } catch (err) {
-        setErrorStore(err instanceof Error ? err : new Error("Failed to load vector store."));
+        setErrorStore(
+          err instanceof Error ? err : new Error("Failed to load vector store.")
+        );
       } finally {
         setIsLoadingStore(false);
       }
@@ -61,10 +69,15 @@ export default function FileDetailPage() {
       setIsLoadingFile(true);
       setErrorFile(null);
       try {
-        const response = await client.vectorStores.files.retrieve(vectorStoreId, fileId);
+        const response = await client.vectorStores.files.retrieve(
+          vectorStoreId,
+          fileId
+        );
         setFile(response as VectorStoreFile);
       } catch (err) {
-        setErrorFile(err instanceof Error ? err : new Error("Failed to load file."));
+        setErrorFile(
+          err instanceof Error ? err : new Error("Failed to load file.")
+        );
       } finally {
         setIsLoadingFile(false);
       }
@@ -79,10 +92,15 @@ export default function FileDetailPage() {
       setIsLoadingContents(true);
       setErrorContents(null);
       try {
-        const response = await client.vectorStores.files.content(vectorStoreId, fileId);
+        const response = await client.vectorStores.files.content(
+          vectorStoreId,
+          fileId
+        );
         setContents(response);
       } catch (err) {
-        setErrorContents(err instanceof Error ? err : new Error("Failed to load contents."));
+        setErrorContents(
+          err instanceof Error ? err : new Error("Failed to load contents.")
+        );
       } finally {
         setIsLoadingContents(false);
       }
@@ -91,20 +109,27 @@ export default function FileDetailPage() {
   }, [vectorStoreId, fileId, client]);
 
   const handleViewContents = () => {
-    router.push(`/logs/vector-stores/${vectorStoreId}/files/${fileId}/contents`);
+    router.push(
+      `/logs/vector-stores/${vectorStoreId}/files/${fileId}/contents`
+    );
   };
 
   const title = `File: ${fileId}`;
 
   const breadcrumbSegments: BreadcrumbSegment[] = [
     { label: "Vector Stores", href: "/logs/vector-stores" },
-    { label: store?.name || vectorStoreId, href: `/logs/vector-stores/${vectorStoreId}` },
+    {
+      label: store?.name || vectorStoreId,
+      href: `/logs/vector-stores/${vectorStoreId}`,
+    },
     { label: "Files", href: `/logs/vector-stores/${vectorStoreId}` },
     { label: fileId },
   ];
 
   if (errorStore) {
-    return <DetailErrorView title={title} id={vectorStoreId} error={errorStore} />;
+    return (
+      <DetailErrorView title={title} id={vectorStoreId} error={errorStore} />
+    );
   }
   if (isLoadingStore) {
     return <DetailLoadingView title={title} />;
@@ -136,19 +161,29 @@ export default function FileDetailPage() {
                 <h3 className="text-lg font-medium mb-2">File Details</h3>
                 <div className="grid grid-cols-2 gap-4 text-sm">
                   <div>
-                    <span className="font-medium text-gray-600 dark:text-gray-400">Status:</span>
+                    <span className="font-medium text-gray-600 dark:text-gray-400">
+                      Status:
+                    </span>
                     <span className="ml-2">{file.status}</span>
                   </div>
                   <div>
-                    <span className="font-medium text-gray-600 dark:text-gray-400">Size:</span>
+                    <span className="font-medium text-gray-600 dark:text-gray-400">
+                      Size:
+                    </span>
                     <span className="ml-2">{file.usage_bytes} bytes</span>
                   </div>
                   <div>
-                    <span className="font-medium text-gray-600 dark:text-gray-400">Created:</span>
-                    <span className="ml-2">{new Date(file.created_at * 1000).toLocaleString()}</span>
+                    <span className="font-medium text-gray-600 dark:text-gray-400">
+                      Created:
+                    </span>
+                    <span className="ml-2">
+                      {new Date(file.created_at * 1000).toLocaleString()}
+                    </span>
                   </div>
                   <div>
-                    <span className="font-medium text-gray-600 dark:text-gray-400">Content Strategy:</span>
+                    <span className="font-medium text-gray-600 dark:text-gray-400">
+                      Content Strategy:
+                    </span>
                     <span className="ml-2">{file.chunking_strategy.type}</span>
                   </div>
                 </div>
@@ -166,9 +201,7 @@ export default function FileDetailPage() {
               </div>
             </div>
           ) : (
-            <p className="text-gray-500 italic text-sm">
-              File not found.
-            </p>
+            <p className="text-gray-500 italic text-sm">File not found.</p>
           )}
         </CardContent>
       </Card>
@@ -192,16 +225,27 @@ export default function FileDetailPage() {
             <div className="space-y-3">
               <div className="grid grid-cols-2 gap-4 text-sm">
                 <div>
-                  <span className="font-medium text-gray-600 dark:text-gray-400">Content Items:</span>
+                  <span className="font-medium text-gray-600 dark:text-gray-400">
+                    Content Items:
+                  </span>
                   <span className="ml-2">{contents.content.length}</span>
                 </div>
                 <div>
-                  <span className="font-medium text-gray-600 dark:text-gray-400">Total Characters:</span>
-                  <span className="ml-2">{contents.content.reduce((total, item) => total + item.text.length, 0)}</span>
+                  <span className="font-medium text-gray-600 dark:text-gray-400">
+                    Total Characters:
+                  </span>
+                  <span className="ml-2">
+                    {contents.content.reduce(
+                      (total, item) => total + item.text.length,
+                      0
+                    )}
+                  </span>
                 </div>
               </div>
               <div className="pt-2">
-                <span className="text-sm font-medium text-gray-600 dark:text-gray-400">Preview:</span>
+                <span className="text-sm font-medium text-gray-600 dark:text-gray-400">
+                  Preview:
+                </span>
                 <div className="mt-1 bg-gray-50 dark:bg-gray-800 rounded-md p-3">
                   <p className="text-sm text-gray-900 dark:text-gray-100 line-clamp-3">
                     {contents.content[0]?.text.substring(0, 200)}...
diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/page.tsx b/llama_stack/ui/app/logs/vector-stores/[id]/page.tsx
index f27c9d802..cad50506c 100644
--- a/llama_stack/ui/app/logs/vector-stores/[id]/page.tsx
+++ b/llama_stack/ui/app/logs/vector-stores/[id]/page.tsx
@@ -1,7 +1,7 @@
 "use client";
 
 import { useEffect, useState } from "react";
-import { useParams, useRouter } from "next/navigation";
+import { useParams } from "next/navigation";
 import { useAuthClient } from "@/hooks/use-auth-client";
 import type { VectorStore } from "llama-stack-client/resources/vector-stores/vector-stores";
 import type { VectorStoreFile } from "llama-stack-client/resources/vector-stores/files";
@@ -11,7 +11,6 @@ export default function VectorStoreDetailPage() {
   const params = useParams();
   const id = params.id as string;
   const client = useAuthClient();
-  const router = useRouter();
 
   const [store, setStore] = useState<VectorStore | null>(null);
   const [files, setFiles] = useState<VectorStoreFile[]>([]);
@@ -34,9 +33,7 @@ export default function VectorStoreDetailPage() {
         setStore(response as VectorStore);
       } catch (err) {
         setErrorStore(
-          err instanceof Error
-            ? err
-            : new Error("Failed to load vector store."),
+          err instanceof Error ? err : new Error("Failed to load vector store.")
         );
       } finally {
         setIsLoadingStore(false);
@@ -55,18 +52,18 @@ export default function VectorStoreDetailPage() {
       setIsLoadingFiles(true);
       setErrorFiles(null);
       try {
-        const result = await client.vectorStores.files.list(id as any);
-        setFiles((result as any).data);
+        const result = await client.vectorStores.files.list(id);
+        setFiles((result as { data: VectorStoreFile[] }).data);
       } catch (err) {
         setErrorFiles(
-          err instanceof Error ? err : new Error("Failed to load files."),
+          err instanceof Error ? err : new Error("Failed to load files.")
         );
       } finally {
         setIsLoadingFiles(false);
       }
     };
     fetchFiles();
-  }, [id]);
+  }, [id, client.vectorStores.files]);
 
   return (
     <VectorStoreDetailView
diff --git a/llama_stack/ui/app/logs/vector-stores/page.tsx b/llama_stack/ui/app/logs/vector-stores/page.tsx
index 664513caa..72196d496 100644
--- a/llama_stack/ui/app/logs/vector-stores/page.tsx
+++ b/llama_stack/ui/app/logs/vector-stores/page.tsx
@@ -1,7 +1,6 @@
 "use client";
 
 import React from "react";
-import { useAuthClient } from "@/hooks/use-auth-client";
 import type {
   ListVectorStoresResponse,
   VectorStore,
@@ -12,7 +11,6 @@ import { Button } from "@/components/ui/button";
 import {
   Table,
   TableBody,
-  TableCaption,
   TableCell,
   TableHead,
   TableHeader,
@@ -21,7 +19,6 @@ import {
 import { Skeleton } from "@/components/ui/skeleton";
 
 export default function VectorStoresPage() {
-  const client = useAuthClient();
   const router = useRouter();
   const {
     data: stores,
@@ -37,7 +34,7 @@ export default function VectorStoresPage() {
         after: params.after,
         limit: params.limit,
         order: params.order,
-      } as any);
+      } as Parameters<typeof client.vectorStores.list>[0]);
       return response as ListVectorStoresResponse;
     },
     errorMessagePrefix: "vector stores",
@@ -53,11 +50,11 @@ export default function VectorStoresPage() {
   const renderContent = () => {
     if (status === "loading") {
       return (
-          <div className="space-y-2">
-            <Skeleton className="h-8 w-full"/>
-            <Skeleton className="h-4 w-full"/>
-            <Skeleton className="h-4 w-full"/>
-          </div>
+        <div className="space-y-2">
+          <Skeleton className="h-8 w-full" />
+          <Skeleton className="h-4 w-full" />
+          <Skeleton className="h-4 w-full" />
+        </div>
       );
     }
 
@@ -70,72 +67,72 @@ export default function VectorStoresPage() {
     }
 
     return (
-        <div className="overflow-auto flex-1 min-h-0">
-          <Table>
-            <TableHeader>
-              <TableRow>
-                <TableHead>ID</TableHead>
-                <TableHead>Name</TableHead>
-                <TableHead>Created</TableHead>
-                <TableHead>Completed</TableHead>
-                <TableHead>Cancelled</TableHead>
-                <TableHead>Failed</TableHead>
-                <TableHead>In Progress</TableHead>
-                <TableHead>Total</TableHead>
-                <TableHead>Usage Bytes</TableHead>
-                <TableHead>Provider ID</TableHead>
-                <TableHead>Provider Vector DB ID</TableHead>
-              </TableRow>
-            </TableHeader>
-            <TableBody>
-              {stores.map((store) => {
-                const fileCounts = store.file_counts;
-                const metadata = store.metadata || {};
-                const providerId = metadata.provider_id ?? "";
-                const providerDbId = metadata.provider_vector_db_id ?? "";
+      <div className="overflow-auto flex-1 min-h-0">
+        <Table>
+          <TableHeader>
+            <TableRow>
+              <TableHead>ID</TableHead>
+              <TableHead>Name</TableHead>
+              <TableHead>Created</TableHead>
+              <TableHead>Completed</TableHead>
+              <TableHead>Cancelled</TableHead>
+              <TableHead>Failed</TableHead>
+              <TableHead>In Progress</TableHead>
+              <TableHead>Total</TableHead>
+              <TableHead>Usage Bytes</TableHead>
+              <TableHead>Provider ID</TableHead>
+              <TableHead>Provider Vector DB ID</TableHead>
+            </TableRow>
+          </TableHeader>
+          <TableBody>
+            {stores.map(store => {
+              const fileCounts = store.file_counts;
+              const metadata = store.metadata || {};
+              const providerId = metadata.provider_id ?? "";
+              const providerDbId = metadata.provider_vector_db_id ?? "";
 
-                return (
-                    <TableRow
-                        key={store.id}
-                        onClick={() => router.push(`/logs/vector-stores/${store.id}`)}
-                        className="cursor-pointer hover:bg-muted/50"
+              return (
+                <TableRow
+                  key={store.id}
+                  onClick={() => router.push(`/logs/vector-stores/${store.id}`)}
+                  className="cursor-pointer hover:bg-muted/50"
+                >
+                  <TableCell>
+                    <Button
+                      variant="link"
+                      className="p-0 h-auto font-mono text-blue-600 hover:text-blue-800 dark:text-blue-400 dark:hover:text-blue-300"
+                      onClick={() =>
+                        router.push(`/logs/vector-stores/${store.id}`)
+                      }
                     >
-                    <TableCell>
-                      <Button
-                        variant="link"
-                        className="p-0 h-auto font-mono text-blue-600 hover:text-blue-800 dark:text-blue-400 dark:hover:text-blue-300"
-                        onClick={() =>
-                          router.push(`/logs/vector-stores/${store.id}`)
-                        }
-                      >
-                        {store.id}
-                      </Button>
-                    </TableCell>
-                      <TableCell>{store.name}</TableCell>
-                      <TableCell>
-                        {new Date(store.created_at * 1000).toLocaleString()}
-                      </TableCell>
-                      <TableCell>{fileCounts.completed}</TableCell>
-                      <TableCell>{fileCounts.cancelled}</TableCell>
-                      <TableCell>{fileCounts.failed}</TableCell>
-                      <TableCell>{fileCounts.in_progress}</TableCell>
-                      <TableCell>{fileCounts.total}</TableCell>
-                      <TableCell>{store.usage_bytes}</TableCell>
-                      <TableCell>{providerId}</TableCell>
-                      <TableCell>{providerDbId}</TableCell>
-                    </TableRow>
-                );
-              })}
-            </TableBody>
-          </Table>
-        </div>
+                      {store.id}
+                    </Button>
+                  </TableCell>
+                  <TableCell>{store.name}</TableCell>
+                  <TableCell>
+                    {new Date(store.created_at * 1000).toLocaleString()}
+                  </TableCell>
+                  <TableCell>{fileCounts.completed}</TableCell>
+                  <TableCell>{fileCounts.cancelled}</TableCell>
+                  <TableCell>{fileCounts.failed}</TableCell>
+                  <TableCell>{fileCounts.in_progress}</TableCell>
+                  <TableCell>{fileCounts.total}</TableCell>
+                  <TableCell>{store.usage_bytes}</TableCell>
+                  <TableCell>{providerId}</TableCell>
+                  <TableCell>{providerDbId}</TableCell>
+                </TableRow>
+              );
+            })}
+          </TableBody>
+        </Table>
+      </div>
     );
   };
 
   return (
-      <div className="space-y-4">
-        <h1 className="text-2xl font-semibold">Vector Stores</h1>
-        {renderContent()}
-      </div>
+    <div className="space-y-4">
+      <h1 className="text-2xl font-semibold">Vector Stores</h1>
+      {renderContent()}
+    </div>
   );
 }
diff --git a/llama_stack/ui/components/chat-completions/chat-completion-detail.test.tsx b/llama_stack/ui/components/chat-completions/chat-completion-detail.test.tsx
index 5348dbc3a..52258eda9 100644
--- a/llama_stack/ui/components/chat-completions/chat-completion-detail.test.tsx
+++ b/llama_stack/ui/components/chat-completions/chat-completion-detail.test.tsx
@@ -14,7 +14,7 @@ describe("ChatCompletionDetailView", () => {
         isLoading={true}
         error={null}
         id="test-id"
-      />,
+      />
     );
     // Use the data-slot attribute for Skeletons
     const skeletons = container.querySelectorAll('[data-slot="skeleton"]');
@@ -28,10 +28,10 @@ describe("ChatCompletionDetailView", () => {
         isLoading={false}
         error={{ name: "Error", message: "Network Error" }}
         id="err-id"
-      />,
+      />
     );
     expect(
-      screen.getByText(/Error loading details for ID err-id: Network Error/),
+      screen.getByText(/Error loading details for ID err-id: Network Error/)
     ).toBeInTheDocument();
   });
 
@@ -42,11 +42,11 @@ describe("ChatCompletionDetailView", () => {
         isLoading={false}
         error={{ name: "Error", message: "" }}
         id="err-id"
-      />,
+      />
     );
     // Use regex to match the error message regardless of whitespace
     expect(
-      screen.getByText(/Error loading details for ID\s*err-id\s*:/),
+      screen.getByText(/Error loading details for ID\s*err-id\s*:/)
     ).toBeInTheDocument();
   });
 
@@ -57,11 +57,11 @@ describe("ChatCompletionDetailView", () => {
         isLoading={false}
         error={{} as Error}
         id="err-id"
-      />,
+      />
     );
     // Use regex to match the error message regardless of whitespace
     expect(
-      screen.getByText(/Error loading details for ID\s*err-id\s*:/),
+      screen.getByText(/Error loading details for ID\s*err-id\s*:/)
     ).toBeInTheDocument();
   });
 
@@ -72,10 +72,10 @@ describe("ChatCompletionDetailView", () => {
         isLoading={false}
         error={null}
         id="notfound-id"
-      />,
+      />
     );
     expect(
-      screen.getByText("No details found for ID: notfound-id."),
+      screen.getByText("No details found for ID: notfound-id.")
     ).toBeInTheDocument();
   });
 
@@ -100,7 +100,7 @@ describe("ChatCompletionDetailView", () => {
         isLoading={false}
         error={null}
         id={mockCompletion.id}
-      />,
+      />
     );
     // Input
     expect(screen.getByText("Input")).toBeInTheDocument();
@@ -112,7 +112,7 @@ describe("ChatCompletionDetailView", () => {
     expect(screen.getByText("Properties")).toBeInTheDocument();
     expect(screen.getByText("Created:")).toBeInTheDocument();
     expect(
-      screen.getByText(new Date(1710000000 * 1000).toLocaleString()),
+      screen.getByText(new Date(1710000000 * 1000).toLocaleString())
     ).toBeInTheDocument();
     expect(screen.getByText("ID:")).toBeInTheDocument();
     expect(screen.getByText("comp_123")).toBeInTheDocument();
@@ -150,7 +150,7 @@ describe("ChatCompletionDetailView", () => {
         isLoading={false}
         error={null}
         id={mockCompletion.id}
-      />,
+      />
     );
     // Output should include the tool call block (should be present twice: input and output)
     const toolCallLabels = screen.getAllByText("Tool Call");
@@ -178,13 +178,13 @@ describe("ChatCompletionDetailView", () => {
         isLoading={false}
         error={null}
         id={mockCompletion.id}
-      />,
+      />
     );
     // Input section should be present but empty
     expect(screen.getByText("Input")).toBeInTheDocument();
     // Output section should show fallback message
     expect(
-      screen.getByText("No message found in assistant's choice."),
+      screen.getByText("No message found in assistant's choice.")
     ).toBeInTheDocument();
     // Properties should show N/A for finish reason
     expect(screen.getByText("Finish Reason:")).toBeInTheDocument();
diff --git a/llama_stack/ui/components/chat-completions/chat-completion-detail.tsx b/llama_stack/ui/components/chat-completions/chat-completion-detail.tsx
index 200807864..0d11d2444 100644
--- a/llama_stack/ui/components/chat-completions/chat-completion-detail.tsx
+++ b/llama_stack/ui/components/chat-completions/chat-completion-detail.tsx
@@ -53,14 +53,14 @@ export function ChatCompletionDetailView({
           {completion.choices?.[0]?.message?.tool_calls &&
           Array.isArray(completion.choices[0].message.tool_calls) &&
           !completion.input_messages?.some(
-            (im) =>
+            im =>
               im.role === "assistant" &&
               im.tool_calls &&
               Array.isArray(im.tool_calls) &&
-              im.tool_calls.length > 0,
+              im.tool_calls.length > 0
           )
             ? completion.choices[0].message.tool_calls.map(
-                (toolCall: any, index: number) => {
+                (toolCall: { function?: { name?: string } }, index: number) => {
                   const assistantToolCallMessage: ChatMessage = {
                     role: "assistant",
                     tool_calls: [toolCall],
@@ -72,7 +72,7 @@ export function ChatCompletionDetailView({
                       message={assistantToolCallMessage}
                     />
                   );
-                },
+                }
               )
             : null}
         </CardContent>
@@ -89,7 +89,7 @@ export function ChatCompletionDetailView({
             />
           ) : (
             <p className="text-gray-500 italic text-sm">
-              No message found in assistant's choice.
+              No message found in assistant&apos;s choice.
             </p>
           )}
         </CardContent>
@@ -120,13 +120,18 @@ export function ChatCompletionDetailView({
               value={
                 <div>
                   <ul className="list-disc list-inside pl-4 mt-1">
-                    {toolCalls.map((toolCall: any, index: number) => (
-                      <li key={index}>
-                        <span className="text-gray-900 font-medium">
-                          {toolCall.function?.name || "N/A"}
-                        </span>
-                      </li>
-                    ))}
+                    {toolCalls.map(
+                      (
+                        toolCall: { function?: { name?: string } },
+                        index: number
+                      ) => (
+                        <li key={index}>
+                          <span className="text-gray-900 font-medium">
+                            {toolCall.function?.name || "N/A"}
+                          </span>
+                        </li>
+                      )
+                    )}
                   </ul>
                 </div>
               }
diff --git a/llama_stack/ui/components/chat-completions/chat-completion-table.test.tsx b/llama_stack/ui/components/chat-completions/chat-completion-table.test.tsx
index 9171e0106..1cae95ddf 100644
--- a/llama_stack/ui/components/chat-completions/chat-completion-table.test.tsx
+++ b/llama_stack/ui/components/chat-completions/chat-completion-table.test.tsx
@@ -83,7 +83,7 @@ describe("ChatCompletionsTable", () => {
     // Default pass-through implementations
     truncateText.mockImplementation((text: string | undefined) => text);
     extractTextFromContentPart.mockImplementation((content: unknown) =>
-      typeof content === "string" ? content : "extracted text",
+      typeof content === "string" ? content : "extracted text"
     );
     extractDisplayableText.mockImplementation((message: unknown) => {
       const msg = message as { content?: string };
@@ -138,7 +138,7 @@ describe("ChatCompletionsTable", () => {
     if (row) {
       fireEvent.click(row);
       expect(mockPush).toHaveBeenCalledWith(
-        "/logs/chat-completions/completion_123",
+        "/logs/chat-completions/completion_123"
       );
     } else {
       throw new Error('Row with "Test prompt" not found for router mock test.');
@@ -162,7 +162,7 @@ describe("ChatCompletionsTable", () => {
       expect(tableCaption).toBeInTheDocument();
       if (tableCaption) {
         const captionSkeleton = tableCaption.querySelector(
-          '[data-slot="skeleton"]',
+          '[data-slot="skeleton"]'
         );
         expect(captionSkeleton).toBeInTheDocument();
       }
@@ -172,7 +172,7 @@ describe("ChatCompletionsTable", () => {
       expect(tableBody).toBeInTheDocument();
       if (tableBody) {
         const bodySkeletons = tableBody.querySelectorAll(
-          '[data-slot="skeleton"]',
+          '[data-slot="skeleton"]'
         );
         expect(bodySkeletons.length).toBeGreaterThan(0);
       }
@@ -192,14 +192,14 @@ describe("ChatCompletionsTable", () => {
 
       render(<ChatCompletionsTable {...defaultProps} />);
       expect(
-        screen.getByText("Unable to load chat completions"),
+        screen.getByText("Unable to load chat completions")
       ).toBeInTheDocument();
       expect(screen.getByText(errorMessage)).toBeInTheDocument();
     });
 
     test.each([{ name: "Error", message: "" }, {}])(
       "renders default error message when error has no message",
-      (errorObject) => {
+      errorObject => {
         mockedUsePagination.mockReturnValue({
           data: [],
           status: "error",
@@ -210,14 +210,14 @@ describe("ChatCompletionsTable", () => {
 
         render(<ChatCompletionsTable {...defaultProps} />);
         expect(
-          screen.getByText("Unable to load chat completions"),
+          screen.getByText("Unable to load chat completions")
         ).toBeInTheDocument();
         expect(
           screen.getByText(
-            "An unexpected error occurred while loading the data.",
-          ),
+            "An unexpected error occurred while loading the data."
+          )
         ).toBeInTheDocument();
-      },
+      }
     );
   });
 
@@ -225,7 +225,7 @@ describe("ChatCompletionsTable", () => {
     test('renders "No chat completions found." and no table when data array is empty', () => {
       render(<ChatCompletionsTable {...defaultProps} />);
       expect(
-        screen.getByText("No chat completions found."),
+        screen.getByText("No chat completions found.")
       ).toBeInTheDocument();
 
       // Ensure that the table structure is NOT rendered in the empty state
@@ -292,7 +292,7 @@ describe("ChatCompletionsTable", () => {
 
       // Table caption
       expect(
-        screen.getByText("A list of your recent chat completions."),
+        screen.getByText("A list of your recent chat completions.")
       ).toBeInTheDocument();
 
       // Table headers
@@ -306,14 +306,14 @@ describe("ChatCompletionsTable", () => {
       expect(screen.getByText("Test output")).toBeInTheDocument();
       expect(screen.getByText("llama-test-model")).toBeInTheDocument();
       expect(
-        screen.getByText(new Date(1710000000 * 1000).toLocaleString()),
+        screen.getByText(new Date(1710000000 * 1000).toLocaleString())
       ).toBeInTheDocument();
 
       expect(screen.getByText("Another input")).toBeInTheDocument();
       expect(screen.getByText("Another output")).toBeInTheDocument();
       expect(screen.getByText("llama-another-model")).toBeInTheDocument();
       expect(
-        screen.getByText(new Date(1710001000 * 1000).toLocaleString()),
+        screen.getByText(new Date(1710001000 * 1000).toLocaleString())
       ).toBeInTheDocument();
     });
   });
@@ -328,7 +328,7 @@ describe("ChatCompletionsTable", () => {
           return typeof text === "string" && text.length > effectiveMaxLength
             ? text.slice(0, effectiveMaxLength) + "..."
             : text;
-        },
+        }
       );
 
       const longInput =
@@ -368,7 +368,7 @@ describe("ChatCompletionsTable", () => {
 
       // The truncated text should be present for both input and output
       const truncatedTexts = screen.getAllByText(
-        longInput.slice(0, 10) + "...",
+        longInput.slice(0, 10) + "..."
       );
       expect(truncatedTexts.length).toBe(2); // one for input, one for output
     });
@@ -420,7 +420,7 @@ describe("ChatCompletionsTable", () => {
       // Verify the extracted text appears in the table
       expect(screen.getByText("Extracted input")).toBeInTheDocument();
       expect(
-        screen.getByText("Extracted output from assistant"),
+        screen.getByText("Extracted output from assistant")
       ).toBeInTheDocument();
     });
   });
diff --git a/llama_stack/ui/components/chat-completions/chat-completions-table.tsx b/llama_stack/ui/components/chat-completions/chat-completions-table.tsx
index 65f6c71af..64e8167f2 100644
--- a/llama_stack/ui/components/chat-completions/chat-completions-table.tsx
+++ b/llama_stack/ui/components/chat-completions/chat-completions-table.tsx
@@ -5,6 +5,7 @@ import {
   UsePaginationOptions,
   ListChatCompletionsResponse,
 } from "@/lib/types";
+import { ListChatCompletionsParams } from "@/lib/llama-stack-client";
 import { LogsTable, LogTableRow } from "@/components/logs/logs-table";
 import {
   extractTextFromContentPart,
@@ -38,14 +39,14 @@ export function ChatCompletionsTable({
       limit: number;
       model?: string;
       order?: string;
-    },
+    }
   ) => {
     const response = await client.chat.completions.list({
       after: params.after,
       limit: params.limit,
       ...(params.model && { model: params.model }),
       ...(params.order && { order: params.order }),
-    } as any);
+    } as ListChatCompletionsParams);
 
     return response as ListChatCompletionsResponse;
   };
diff --git a/llama_stack/ui/components/chat-completions/chat-messasge-item.tsx b/llama_stack/ui/components/chat-completions/chat-messasge-item.tsx
index 6170e816e..de097e630 100644
--- a/llama_stack/ui/components/chat-completions/chat-messasge-item.tsx
+++ b/llama_stack/ui/components/chat-completions/chat-messasge-item.tsx
@@ -37,21 +37,26 @@ export function ChatMessageItem({ message }: ChatMessageItemProps) {
       ) {
         return (
           <>
-            {message.tool_calls.map((toolCall: any, index: number) => {
-              const formattedToolCall = formatToolCallToString(toolCall);
-              const toolCallContent = (
-                <ToolCallBlock>
-                  {formattedToolCall || "Error: Could not display tool call"}
-                </ToolCallBlock>
-              );
-              return (
-                <MessageBlock
-                  key={index}
-                  label="Tool Call"
-                  content={toolCallContent}
-                />
-              );
-            })}
+            {message.tool_calls.map(
+              (
+                toolCall: { function?: { name?: string; arguments?: unknown } },
+                index: number
+              ) => {
+                const formattedToolCall = formatToolCallToString(toolCall);
+                const toolCallContent = (
+                  <ToolCallBlock>
+                    {formattedToolCall || "Error: Could not display tool call"}
+                  </ToolCallBlock>
+                );
+                return (
+                  <MessageBlock
+                    key={index}
+                    label="Tool Call"
+                    content={toolCallContent}
+                  />
+                );
+              }
+            )}
           </>
         );
       } else {
diff --git a/llama_stack/ui/components/chat-playground/chat-message.tsx b/llama_stack/ui/components/chat-playground/chat-message.tsx
index e5d621c81..84c798e29 100644
--- a/llama_stack/ui/components/chat-playground/chat-message.tsx
+++ b/llama_stack/ui/components/chat-playground/chat-message.tsx
@@ -1,18 +1,18 @@
-"use client"
+"use client";
 
-import React, { useMemo, useState } from "react"
-import { cva, type VariantProps } from "class-variance-authority"
-import { motion } from "framer-motion"
-import { Ban, ChevronRight, Code2, Loader2, Terminal } from "lucide-react"
+import React, { useMemo, useState } from "react";
+import { cva, type VariantProps } from "class-variance-authority";
+import { motion } from "framer-motion";
+import { Ban, ChevronRight, Code2, Loader2, Terminal } from "lucide-react";
 
-import { cn } from "@/lib/utils"
+import { cn } from "@/lib/utils";
 import {
   Collapsible,
   CollapsibleContent,
   CollapsibleTrigger,
-} from "@/components/ui/collapsible"
-import { FilePreview } from "@/components/ui/file-preview"
-import { MarkdownRenderer } from "@/components/chat-playground/markdown-renderer"
+} from "@/components/ui/collapsible";
+import { FilePreview } from "@/components/ui/file-preview";
+import { MarkdownRenderer } from "@/components/chat-playground/markdown-renderer";
 
 const chatBubbleVariants = cva(
   "group/message relative break-words rounded-lg p-3 text-sm sm:max-w-[70%]",
@@ -52,66 +52,66 @@ const chatBubbleVariants = cva(
       },
     ],
   }
-)
+);
 
-type Animation = VariantProps<typeof chatBubbleVariants>["animation"]
+type Animation = VariantProps<typeof chatBubbleVariants>["animation"];
 
 interface Attachment {
-  name?: string
-  contentType?: string
-  url: string
+  name?: string;
+  contentType?: string;
+  url: string;
 }
 
 interface PartialToolCall {
-  state: "partial-call"
-  toolName: string
+  state: "partial-call";
+  toolName: string;
 }
 
 interface ToolCall {
-  state: "call"
-  toolName: string
+  state: "call";
+  toolName: string;
 }
 
 interface ToolResult {
-  state: "result"
-  toolName: string
+  state: "result";
+  toolName: string;
   result: {
-    __cancelled?: boolean
-    [key: string]: any
-  }
+    __cancelled?: boolean;
+    [key: string]: unknown;
+  };
 }
 
-type ToolInvocation = PartialToolCall | ToolCall | ToolResult
+type ToolInvocation = PartialToolCall | ToolCall | ToolResult;
 
 interface ReasoningPart {
-  type: "reasoning"
-  reasoning: string
+  type: "reasoning";
+  reasoning: string;
 }
 
 interface ToolInvocationPart {
-  type: "tool-invocation"
-  toolInvocation: ToolInvocation
+  type: "tool-invocation";
+  toolInvocation: ToolInvocation;
 }
 
 interface TextPart {
-  type: "text"
-  text: string
+  type: "text";
+  text: string;
 }
 
 // For compatibility with AI SDK types, not used
 interface SourcePart {
-  type: "source"
-  source?: any
+  type: "source";
+  source?: unknown;
 }
 
 interface FilePart {
-  type: "file"
-  mimeType: string
-  data: string
+  type: "file";
+  mimeType: string;
+  data: string;
 }
 
 interface StepStartPart {
-  type: "step-start"
+  type: "step-start";
 }
 
 type MessagePart =
@@ -120,22 +120,22 @@ type MessagePart =
   | ToolInvocationPart
   | SourcePart
   | FilePart
-  | StepStartPart
+  | StepStartPart;
 
 export interface Message {
-  id: string
-  role: "user" | "assistant" | (string & {})
-  content: string
-  createdAt?: Date
-  experimental_attachments?: Attachment[]
-  toolInvocations?: ToolInvocation[]
-  parts?: MessagePart[]
+  id: string;
+  role: "user" | "assistant" | (string & {});
+  content: string;
+  createdAt?: Date;
+  experimental_attachments?: Attachment[];
+  toolInvocations?: ToolInvocation[];
+  parts?: MessagePart[];
 }
 
 export interface ChatMessageProps extends Message {
-  showTimeStamp?: boolean
-  animation?: Animation
-  actions?: React.ReactNode
+  showTimeStamp?: boolean;
+  animation?: Animation;
+  actions?: React.ReactNode;
 }
 
 export const ChatMessage: React.FC<ChatMessageProps> = ({
@@ -150,21 +150,21 @@ export const ChatMessage: React.FC<ChatMessageProps> = ({
   parts,
 }) => {
   const files = useMemo(() => {
-    return experimental_attachments?.map((attachment) => {
-      const dataArray = dataUrlToUint8Array(attachment.url)
+    return experimental_attachments?.map(attachment => {
+      const dataArray = dataUrlToUint8Array(attachment.url);
       const file = new File([dataArray], attachment.name ?? "Unknown", {
         type: attachment.contentType,
-      })
-      return file
-    })
-  }, [experimental_attachments])
+      });
+      return file;
+    });
+  }, [experimental_attachments]);
 
-  const isUser = role === "user"
+  const isUser = role === "user";
 
   const formattedTime = createdAt?.toLocaleTimeString("en-US", {
     hour: "2-digit",
     minute: "2-digit",
-  })
+  });
 
   if (isUser) {
     return (
@@ -174,7 +174,7 @@ export const ChatMessage: React.FC<ChatMessageProps> = ({
         {files ? (
           <div className="mb-1 flex flex-wrap gap-2">
             {files.map((file, index) => {
-              return <FilePreview file={file} key={index} />
+              return <FilePreview file={file} key={index} />;
             })}
           </div>
         ) : null}
@@ -195,7 +195,7 @@ export const ChatMessage: React.FC<ChatMessageProps> = ({
           </time>
         ) : null}
       </div>
-    )
+    );
   }
 
   if (parts && parts.length > 0) {
@@ -230,23 +230,23 @@ export const ChatMessage: React.FC<ChatMessageProps> = ({
               </time>
             ) : null}
           </div>
-        )
+        );
       } else if (part.type === "reasoning") {
-        return <ReasoningBlock key={`reasoning-${index}`} part={part} />
+        return <ReasoningBlock key={`reasoning-${index}`} part={part} />;
       } else if (part.type === "tool-invocation") {
         return (
           <ToolCall
             key={`tool-${index}`}
             toolInvocations={[part.toolInvocation]}
           />
-        )
+        );
       }
-      return null
-    })
+      return null;
+    });
   }
 
   if (toolInvocations && toolInvocations.length > 0) {
-    return <ToolCall toolInvocations={toolInvocations} />
+    return <ToolCall toolInvocations={toolInvocations} />;
   }
 
   return (
@@ -272,17 +272,17 @@ export const ChatMessage: React.FC<ChatMessageProps> = ({
         </time>
       ) : null}
     </div>
-  )
-}
+  );
+};
 
 function dataUrlToUint8Array(data: string) {
-  const base64 = data.split(",")[1]
-  const buf = Buffer.from(base64, "base64")
-  return new Uint8Array(buf)
+  const base64 = data.split(",")[1];
+  const buf = Buffer.from(base64, "base64");
+  return new Uint8Array(buf);
 }
 
 const ReasoningBlock = ({ part }: { part: ReasoningPart }) => {
-  const [isOpen, setIsOpen] = useState(false)
+  const [isOpen, setIsOpen] = useState(false);
 
   return (
     <div className="mb-2 flex flex-col items-start sm:max-w-[70%]">
@@ -319,20 +319,20 @@ const ReasoningBlock = ({ part }: { part: ReasoningPart }) => {
         </CollapsibleContent>
       </Collapsible>
     </div>
-  )
-}
+  );
+};
 
 function ToolCall({
   toolInvocations,
 }: Pick<ChatMessageProps, "toolInvocations">) {
-  if (!toolInvocations?.length) return null
+  if (!toolInvocations?.length) return null;
 
   return (
     <div className="flex flex-col items-start gap-2">
       {toolInvocations.map((invocation, index) => {
         const isCancelled =
           invocation.state === "result" &&
-          invocation.result.__cancelled === true
+          invocation.result.__cancelled === true;
 
         if (isCancelled) {
           return (
@@ -350,7 +350,7 @@ function ToolCall({
                 </span>
               </span>
             </div>
-          )
+          );
         }
 
         switch (invocation.state) {
@@ -373,7 +373,7 @@ function ToolCall({
                 </span>
                 <Loader2 className="h-3 w-3 animate-spin" />
               </div>
-            )
+            );
           case "result":
             return (
               <div
@@ -395,11 +395,11 @@ function ToolCall({
                   {JSON.stringify(invocation.result, null, 2)}
                 </pre>
               </div>
-            )
+            );
           default:
-            return null
+            return null;
         }
       })}
     </div>
-  )
+  );
 }
diff --git a/llama_stack/ui/components/chat-playground/chat.tsx b/llama_stack/ui/components/chat-playground/chat.tsx
index ee83fd9bb..023bf0728 100644
--- a/llama_stack/ui/components/chat-playground/chat.tsx
+++ b/llama_stack/ui/components/chat-playground/chat.tsx
@@ -1,4 +1,4 @@
-"use client"
+"use client";
 
 import {
   forwardRef,
@@ -6,48 +6,48 @@ import {
   useRef,
   useState,
   type ReactElement,
-} from "react"
-import { ArrowDown, ThumbsDown, ThumbsUp } from "lucide-react"
+} from "react";
+import { ArrowDown, ThumbsDown, ThumbsUp } from "lucide-react";
 
-import { cn } from "@/lib/utils"
-import { useAutoScroll } from "@/hooks/use-auto-scroll"
-import { Button } from "@/components/ui/button"
-import { type Message } from "@/components/chat-playground/chat-message"
-import { CopyButton } from "@/components/ui/copy-button"
-import { MessageInput } from "@/components/chat-playground/message-input"
-import { MessageList } from "@/components/chat-playground/message-list"
-import { PromptSuggestions } from "@/components/chat-playground/prompt-suggestions"
+import { cn } from "@/lib/utils";
+import { useAutoScroll } from "@/hooks/use-auto-scroll";
+import { Button } from "@/components/ui/button";
+import { type Message } from "@/components/chat-playground/chat-message";
+import { CopyButton } from "@/components/ui/copy-button";
+import { MessageInput } from "@/components/chat-playground/message-input";
+import { MessageList } from "@/components/chat-playground/message-list";
+import { PromptSuggestions } from "@/components/chat-playground/prompt-suggestions";
 
 interface ChatPropsBase {
   handleSubmit: (
     event?: { preventDefault?: () => void },
     options?: { experimental_attachments?: FileList }
-  ) => void
-  messages: Array<Message>
-  input: string
-  className?: string
-  handleInputChange: React.ChangeEventHandler<HTMLTextAreaElement>
-  isGenerating: boolean
-  stop?: () => void
+  ) => void;
+  messages: Array<Message>;
+  input: string;
+  className?: string;
+  handleInputChange: React.ChangeEventHandler<HTMLTextAreaElement>;
+  isGenerating: boolean;
+  stop?: () => void;
   onRateResponse?: (
     messageId: string,
     rating: "thumbs-up" | "thumbs-down"
-  ) => void
-  setMessages?: (messages: any[]) => void
-  transcribeAudio?: (blob: Blob) => Promise<string>
+  ) => void;
+  setMessages?: (messages: Message[]) => void;
+  transcribeAudio?: (blob: Blob) => Promise<string>;
 }
 
 interface ChatPropsWithoutSuggestions extends ChatPropsBase {
-  append?: never
-  suggestions?: never
+  append?: never;
+  suggestions?: never;
 }
 
 interface ChatPropsWithSuggestions extends ChatPropsBase {
-  append: (message: { role: "user"; content: string }) => void
-  suggestions: string[]
+  append: (message: { role: "user"; content: string }) => void;
+  suggestions: string[];
 }
 
-type ChatProps = ChatPropsWithoutSuggestions | ChatPropsWithSuggestions
+type ChatProps = ChatPropsWithoutSuggestions | ChatPropsWithSuggestions;
 
 export function Chat({
   messages,
@@ -63,34 +63,34 @@ export function Chat({
   setMessages,
   transcribeAudio,
 }: ChatProps) {
-  const lastMessage = messages.at(-1)
-  const isEmpty = messages.length === 0
-  const isTyping = lastMessage?.role === "user"
+  const lastMessage = messages.at(-1);
+  const isEmpty = messages.length === 0;
+  const isTyping = lastMessage?.role === "user";
 
-  const messagesRef = useRef(messages)
-  messagesRef.current = messages
+  const messagesRef = useRef(messages);
+  messagesRef.current = messages;
 
   // Enhanced stop function that marks pending tool calls as cancelled
   const handleStop = useCallback(() => {
-    stop?.()
+    stop?.();
 
-    if (!setMessages) return
+    if (!setMessages) return;
 
-    const latestMessages = [...messagesRef.current]
+    const latestMessages = [...messagesRef.current];
     const lastAssistantMessage = latestMessages.findLast(
-      (m) => m.role === "assistant"
-    )
+      m => m.role === "assistant"
+    );
 
-    if (!lastAssistantMessage) return
+    if (!lastAssistantMessage) return;
 
-    let needsUpdate = false
-    let updatedMessage = { ...lastAssistantMessage }
+    let needsUpdate = false;
+    let updatedMessage = { ...lastAssistantMessage };
 
     if (lastAssistantMessage.toolInvocations) {
       const updatedToolInvocations = lastAssistantMessage.toolInvocations.map(
-        (toolInvocation) => {
+        toolInvocation => {
           if (toolInvocation.state === "call") {
-            needsUpdate = true
+            needsUpdate = true;
             return {
               ...toolInvocation,
               state: "result",
@@ -98,61 +98,66 @@ export function Chat({
                 content: "Tool execution was cancelled",
                 __cancelled: true, // Special marker to indicate cancellation
               },
-            } as const
+            } as const;
           }
-          return toolInvocation
+          return toolInvocation;
         }
-      )
+      );
 
       if (needsUpdate) {
         updatedMessage = {
           ...updatedMessage,
           toolInvocations: updatedToolInvocations,
-        }
+        };
       }
     }
 
     if (lastAssistantMessage.parts && lastAssistantMessage.parts.length > 0) {
-      const updatedParts = lastAssistantMessage.parts.map((part: any) => {
-        if (
-          part.type === "tool-invocation" &&
-          part.toolInvocation &&
-          part.toolInvocation.state === "call"
-        ) {
-          needsUpdate = true
-          return {
-            ...part,
-            toolInvocation: {
-              ...part.toolInvocation,
-              state: "result",
-              result: {
-                content: "Tool execution was cancelled",
-                __cancelled: true,
+      const updatedParts = lastAssistantMessage.parts.map(
+        (part: {
+          type: string;
+          toolInvocation?: { state: string; toolName: string };
+        }) => {
+          if (
+            part.type === "tool-invocation" &&
+            part.toolInvocation &&
+            part.toolInvocation.state === "call"
+          ) {
+            needsUpdate = true;
+            return {
+              ...part,
+              toolInvocation: {
+                ...part.toolInvocation,
+                state: "result",
+                result: {
+                  content: "Tool execution was cancelled",
+                  __cancelled: true,
+                },
               },
-            },
+            };
           }
+          return part;
         }
-        return part
-      })
+      );
 
       if (needsUpdate) {
         updatedMessage = {
           ...updatedMessage,
           parts: updatedParts,
-        }
+        };
       }
     }
 
     if (needsUpdate) {
       const messageIndex = latestMessages.findIndex(
-        (m) => m.id === lastAssistantMessage.id
-      )
+        m => m.id === lastAssistantMessage.id
+      );
       if (messageIndex !== -1) {
-        latestMessages[messageIndex] = updatedMessage
-        setMessages(latestMessages)
+        latestMessages[messageIndex] = updatedMessage;
+        setMessages(latestMessages);
       }
     }
-  }, [stop, setMessages, messagesRef])
+  }, [stop, setMessages, messagesRef]);
 
   const messageOptions = useCallback(
     (message: Message) => ({
@@ -189,7 +194,7 @@ export function Chat({
       ),
     }),
     [onRateResponse]
-  )
+  );
 
   return (
     <ChatContainer className={className}>
@@ -237,15 +242,15 @@ export function Chat({
         </div>
       </div>
     </ChatContainer>
-  )
+  );
 }
-Chat.displayName = "Chat"
+Chat.displayName = "Chat";
 
 export function ChatMessages({
   messages,
   children,
 }: React.PropsWithChildren<{
-  messages: Message[]
+  messages: Message[];
 }>) {
   const {
     containerRef,
@@ -253,7 +258,7 @@ export function ChatMessages({
     handleScroll,
     shouldAutoScroll,
     handleTouchStart,
-  } = useAutoScroll([messages])
+  } = useAutoScroll([messages]);
 
   return (
     <div
@@ -281,7 +286,7 @@ export function ChatMessages({
         </div>
       )}
     </div>
-  )
+  );
 }
 
 export const ChatContainer = forwardRef<
@@ -294,56 +299,56 @@ export const ChatContainer = forwardRef<
       className={cn("flex flex-col max-h-full w-full", className)}
       {...props}
     />
-  )
-})
-ChatContainer.displayName = "ChatContainer"
+  );
+});
+ChatContainer.displayName = "ChatContainer";
 
 interface ChatFormProps {
-  className?: string
-  isPending: boolean
+  className?: string;
+  isPending: boolean;
   handleSubmit: (
     event?: { preventDefault?: () => void },
     options?: { experimental_attachments?: FileList }
-  ) => void
+  ) => void;
   children: (props: {
-    files: File[] | null
-    setFiles: React.Dispatch<React.SetStateAction<File[] | null>>
-  }) => ReactElement
+    files: File[] | null;
+    setFiles: React.Dispatch<React.SetStateAction<File[] | null>>;
+  }) => ReactElement;
 }
 
 export const ChatForm = forwardRef<HTMLFormElement, ChatFormProps>(
   ({ children, handleSubmit, isPending, className }, ref) => {
-    const [files, setFiles] = useState<File[] | null>(null)
+    const [files, setFiles] = useState<File[] | null>(null);
 
     const onSubmit = (event: React.FormEvent) => {
-      // if (isPending) {
-      //   event.preventDefault()
-      //   return
-      // }
-
-      if (!files) {
-        handleSubmit(event)
-        return
+      if (isPending) {
+        event.preventDefault();
+        return;
       }
 
-      const fileList = createFileList(files)
-      handleSubmit(event, { experimental_attachments: fileList })
-      setFiles(null)
-    }
+      if (!files) {
+        handleSubmit(event);
+        return;
+      }
+
+      const fileList = createFileList(files);
+      handleSubmit(event, { experimental_attachments: fileList });
+      setFiles(null);
+    };
 
     return (
       <form ref={ref} onSubmit={onSubmit} className={className}>
         {children({ files, setFiles })}
       </form>
-    )
+    );
   }
-)
-ChatForm.displayName = "ChatForm"
+);
+ChatForm.displayName = "ChatForm";
 
 function createFileList(files: File[] | FileList): FileList {
-  const dataTransfer = new DataTransfer()
+  const dataTransfer = new DataTransfer();
   for (const file of Array.from(files)) {
-    dataTransfer.items.add(file)
+    dataTransfer.items.add(file);
   }
-  return dataTransfer.files
+  return dataTransfer.files;
 }
diff --git a/llama_stack/ui/components/chat-playground/interrupt-prompt.tsx b/llama_stack/ui/components/chat-playground/interrupt-prompt.tsx
index 757863c62..157de7da1 100644
--- a/llama_stack/ui/components/chat-playground/interrupt-prompt.tsx
+++ b/llama_stack/ui/components/chat-playground/interrupt-prompt.tsx
@@ -1,11 +1,11 @@
-"use client"
+"use client";
 
-import { AnimatePresence, motion } from "framer-motion"
-import { X } from "lucide-react"
+import { AnimatePresence, motion } from "framer-motion";
+import { X } from "lucide-react";
 
 interface InterruptPromptProps {
-  isOpen: boolean
-  close: () => void
+  isOpen: boolean;
+  close: () => void;
 }
 
 export function InterruptPrompt({ isOpen, close }: InterruptPromptProps) {
@@ -37,5 +37,5 @@ export function InterruptPrompt({ isOpen, close }: InterruptPromptProps) {
         </motion.div>
       )}
     </AnimatePresence>
-  )
+  );
 }
diff --git a/llama_stack/ui/components/chat-playground/markdown-renderer.tsx b/llama_stack/ui/components/chat-playground/markdown-renderer.tsx
index 1c2781eaf..bc6bf5122 100644
--- a/llama_stack/ui/components/chat-playground/markdown-renderer.tsx
+++ b/llama_stack/ui/components/chat-playground/markdown-renderer.tsx
@@ -1,12 +1,12 @@
-import React, { Suspense, useEffect, useState } from "react"
-import Markdown from "react-markdown"
-import remarkGfm from "remark-gfm"
+import React, { Suspense, useEffect, useState } from "react";
+import Markdown from "react-markdown";
+import remarkGfm from "remark-gfm";
 
-import { cn } from "@/lib/utils"
-import { CopyButton } from "@/components/ui/copy-button"
+import { cn } from "@/lib/utils";
+import { CopyButton } from "@/components/ui/copy-button";
 
 interface MarkdownRendererProps {
-  children: string
+  children: string;
 }
 
 export function MarkdownRenderer({ children }: MarkdownRendererProps) {
@@ -16,34 +16,34 @@ export function MarkdownRenderer({ children }: MarkdownRendererProps) {
         {children}
       </Markdown>
     </div>
-  )
+  );
 }
 
 interface HighlightedPre extends React.HTMLAttributes<HTMLPreElement> {
-  children: string
-  language: string
+  children: string;
+  language: string;
 }
 
 const HighlightedPre = React.memo(
   ({ children, language, ...props }: HighlightedPre) => {
-    const [tokens, setTokens] = useState<any[] | null>(null)
-    const [isSupported, setIsSupported] = useState(false)
+    const [tokens, setTokens] = useState<unknown[] | null>(null);
+    const [isSupported, setIsSupported] = useState(false);
 
     useEffect(() => {
-      let mounted = true
+      let mounted = true;
 
       const loadAndHighlight = async () => {
         try {
-          const { codeToTokens, bundledLanguages } = await import("shiki")
+          const { codeToTokens, bundledLanguages } = await import("shiki");
 
-          if (!mounted) return
+          if (!mounted) return;
 
           if (!(language in bundledLanguages)) {
-            setIsSupported(false)
-            return
+            setIsSupported(false);
+            return;
           }
 
-          setIsSupported(true)
+          setIsSupported(true);
 
           const { tokens: highlightedTokens } = await codeToTokens(children, {
             lang: language as keyof typeof bundledLanguages,
@@ -52,31 +52,31 @@ const HighlightedPre = React.memo(
               light: "github-light",
               dark: "github-dark",
             },
-          })
+          });
 
           if (mounted) {
-            setTokens(highlightedTokens)
+            setTokens(highlightedTokens);
           }
-        } catch (error) {
+        } catch {
           if (mounted) {
-            setIsSupported(false)
+            setIsSupported(false);
           }
         }
-      }
+      };
 
-      loadAndHighlight()
+      loadAndHighlight();
 
       return () => {
-        mounted = false
-      }
-    }, [children, language])
+        mounted = false;
+      };
+    }, [children, language]);
 
     if (!isSupported) {
-      return <pre {...props}>{children}</pre>
+      return <pre {...props}>{children}</pre>;
     }
 
     if (!tokens) {
-      return <pre {...props}>{children}</pre>
+      return <pre {...props}>{children}</pre>;
     }
 
     return (
@@ -89,7 +89,7 @@ const HighlightedPre = React.memo(
                   const style =
                     typeof token.htmlStyle === "string"
                       ? undefined
-                      : token.htmlStyle
+                      : token.htmlStyle;
 
                   return (
                     <span
@@ -99,7 +99,7 @@ const HighlightedPre = React.memo(
                     >
                       {token.content}
                     </span>
-                  )
+                  );
                 })}
               </span>
               {lineIndex !== tokens.length - 1 && "\n"}
@@ -107,15 +107,15 @@ const HighlightedPre = React.memo(
           ))}
         </code>
       </pre>
-    )
+    );
   }
-)
-HighlightedPre.displayName = "HighlightedCode"
+);
+HighlightedPre.displayName = "HighlightedCode";
 
 interface CodeBlockProps extends React.HTMLAttributes<HTMLPreElement> {
-  children: React.ReactNode
-  className?: string
-  language: string
+  children: React.ReactNode;
+  className?: string;
+  language: string;
 }
 
 const CodeBlock = ({
@@ -127,12 +127,12 @@ const CodeBlock = ({
   const code =
     typeof children === "string"
       ? children
-      : childrenTakeAllStringContents(children)
+      : childrenTakeAllStringContents(children);
 
   const preClass = cn(
     "overflow-x-scroll rounded-md border bg-background/50 p-4 font-mono text-sm [scrollbar-width:none]",
     className
-  )
+  );
 
   return (
     <div className="group/code relative mb-4">
@@ -152,27 +152,27 @@ const CodeBlock = ({
         <CopyButton content={code} copyMessage="Copied code to clipboard" />
       </div>
     </div>
-  )
-}
+  );
+};
 
-function childrenTakeAllStringContents(element: any): string {
+function childrenTakeAllStringContents(element: unknown): string {
   if (typeof element === "string") {
-    return element
+    return element;
   }
 
   if (element?.props?.children) {
-    let children = element.props.children
+    const children = element.props.children;
 
     if (Array.isArray(children)) {
       return children
-        .map((child) => childrenTakeAllStringContents(child))
-        .join("")
+        .map(child => childrenTakeAllStringContents(child))
+        .join("");
     } else {
-      return childrenTakeAllStringContents(children)
+      return childrenTakeAllStringContents(children);
     }
   }
 
-  return ""
+  return "";
 }
 
 const COMPONENTS = {
@@ -184,8 +184,14 @@ const COMPONENTS = {
   strong: withClass("strong", "font-semibold"),
   a: withClass("a", "text-primary underline underline-offset-2"),
   blockquote: withClass("blockquote", "border-l-2 border-primary pl-4"),
-  code: ({ children, className, node, ...rest }: any) => {
-    const match = /language-(\w+)/.exec(className || "")
+  code: ({
+    children,
+    className,
+  }: {
+    children: React.ReactNode;
+    className?: string;
+  }) => {
+    const match = /language-(\w+)/.exec(className || "");
     return match ? (
       <CodeBlock className={className} language={match[1]} {...rest}>
         {children}
@@ -199,9 +205,9 @@ const COMPONENTS = {
       >
         {children}
       </code>
-    )
+    );
   },
-  pre: ({ children }: any) => children,
+  pre: ({ children }: { children: React.ReactNode }) => children,
   ol: withClass("ol", "list-decimal space-y-2 pl-6"),
   ul: withClass("ul", "list-disc space-y-2 pl-6"),
   li: withClass("li", "my-1.5"),
@@ -220,14 +226,14 @@ const COMPONENTS = {
   tr: withClass("tr", "m-0 border-t p-0 even:bg-muted"),
   p: withClass("p", "whitespace-pre-wrap"),
   hr: withClass("hr", "border-foreground/20"),
-}
+};
 
 function withClass(Tag: keyof JSX.IntrinsicElements, classes: string) {
-  const Component = ({ node, ...props }: any) => (
+  const Component = ({ ...props }: Record<string, unknown>) => (
     <Tag className={classes} {...props} />
-  )
-  Component.displayName = Tag
-  return Component
+  );
+  Component.displayName = Tag;
+  return Component;
 }
 
-export default MarkdownRenderer
+export default MarkdownRenderer;
diff --git a/llama_stack/ui/components/chat-playground/message-input.tsx b/llama_stack/ui/components/chat-playground/message-input.tsx
index 4a29386d9..8cfa73b30 100644
--- a/llama_stack/ui/components/chat-playground/message-input.tsx
+++ b/llama_stack/ui/components/chat-playground/message-input.tsx
@@ -1,41 +1,41 @@
-"use client"
+"use client";
 
-import React, { useEffect, useRef, useState } from "react"
-import { AnimatePresence, motion } from "framer-motion"
-import { ArrowUp, Info, Loader2, Mic, Paperclip, Square } from "lucide-react"
-import { omit } from "remeda"
+import React, { useEffect, useRef, useState } from "react";
+import { AnimatePresence, motion } from "framer-motion";
+import { ArrowUp, Info, Loader2, Mic, Paperclip, Square } from "lucide-react";
+import { omit } from "remeda";
 
-import { cn } from "@/lib/utils"
-import { useAudioRecording } from "@/hooks/use-audio-recording"
-import { useAutosizeTextArea } from "@/hooks/use-autosize-textarea"
-import { AudioVisualizer } from "@/components/ui/audio-visualizer"
-import { Button } from "@/components/ui/button"
-import { FilePreview } from "@/components/ui/file-preview"
-import { InterruptPrompt } from "@/components/chat-playground/interrupt-prompt"
+import { cn } from "@/lib/utils";
+import { useAudioRecording } from "@/hooks/use-audio-recording";
+import { useAutosizeTextArea } from "@/hooks/use-autosize-textarea";
+import { AudioVisualizer } from "@/components/ui/audio-visualizer";
+import { Button } from "@/components/ui/button";
+import { FilePreview } from "@/components/ui/file-preview";
+import { InterruptPrompt } from "@/components/chat-playground/interrupt-prompt";
 
 interface MessageInputBaseProps
   extends React.TextareaHTMLAttributes<HTMLTextAreaElement> {
-  value: string
-  submitOnEnter?: boolean
-  stop?: () => void
-  isGenerating: boolean
-  enableInterrupt?: boolean
-  transcribeAudio?: (blob: Blob) => Promise<string>
+  value: string;
+  submitOnEnter?: boolean;
+  stop?: () => void;
+  isGenerating: boolean;
+  enableInterrupt?: boolean;
+  transcribeAudio?: (blob: Blob) => Promise<string>;
 }
 
 interface MessageInputWithoutAttachmentProps extends MessageInputBaseProps {
-  allowAttachments?: false
+  allowAttachments?: false;
 }
 
 interface MessageInputWithAttachmentsProps extends MessageInputBaseProps {
-  allowAttachments: true
-  files: File[] | null
-  setFiles: React.Dispatch<React.SetStateAction<File[] | null>>
+  allowAttachments: true;
+  files: File[] | null;
+  setFiles: React.Dispatch<React.SetStateAction<File[] | null>>;
 }
 
 type MessageInputProps =
   | MessageInputWithoutAttachmentProps
-  | MessageInputWithAttachmentsProps
+  | MessageInputWithAttachmentsProps;
 
 export function MessageInput({
   placeholder = "Ask AI...",
@@ -48,8 +48,8 @@ export function MessageInput({
   transcribeAudio,
   ...props
 }: MessageInputProps) {
-  const [isDragging, setIsDragging] = useState(false)
-  const [showInterruptPrompt, setShowInterruptPrompt] = useState(false)
+  const [isDragging, setIsDragging] = useState(false);
+  const [showInterruptPrompt, setShowInterruptPrompt] = useState(false);
 
   const {
     isListening,
@@ -61,123 +61,124 @@ export function MessageInput({
     stopRecording,
   } = useAudioRecording({
     transcribeAudio,
-    onTranscriptionComplete: (text) => {
-      props.onChange?.({ target: { value: text } } as any)
+    onTranscriptionComplete: text => {
+      props.onChange?.({
+        target: { value: text },
+      } as React.ChangeEvent<HTMLTextAreaElement>);
     },
-  })
+  });
 
   useEffect(() => {
     if (!isGenerating) {
-      setShowInterruptPrompt(false)
+      setShowInterruptPrompt(false);
     }
-  }, [isGenerating])
+  }, [isGenerating]);
 
   const addFiles = (files: File[] | null) => {
     if (props.allowAttachments) {
-      props.setFiles((currentFiles) => {
+      props.setFiles(currentFiles => {
         if (currentFiles === null) {
-          return files
+          return files;
         }
 
         if (files === null) {
-          return currentFiles
+          return currentFiles;
         }
 
-        return [...currentFiles, ...files]
-      })
+        return [...currentFiles, ...files];
+      });
     }
-  }
+  };
 
   const onDragOver = (event: React.DragEvent) => {
-    if (props.allowAttachments !== true) return
-    event.preventDefault()
-    setIsDragging(true)
-  }
+    if (props.allowAttachments !== true) return;
+    event.preventDefault();
+    setIsDragging(true);
+  };
 
   const onDragLeave = (event: React.DragEvent) => {
-    if (props.allowAttachments !== true) return
-    event.preventDefault()
-    setIsDragging(false)
-  }
+    if (props.allowAttachments !== true) return;
+    event.preventDefault();
+    setIsDragging(false);
+  };
 
   const onDrop = (event: React.DragEvent) => {
-    setIsDragging(false)
-    if (props.allowAttachments !== true) return
-    event.preventDefault()
-    const dataTransfer = event.dataTransfer
+    setIsDragging(false);
+    if (props.allowAttachments !== true) return;
+    event.preventDefault();
+    const dataTransfer = event.dataTransfer;
     if (dataTransfer.files.length) {
-      addFiles(Array.from(dataTransfer.files))
+      addFiles(Array.from(dataTransfer.files));
     }
-  }
+  };
 
   const onPaste = (event: React.ClipboardEvent) => {
-    const items = event.clipboardData?.items
-    if (!items) return
+    const items = event.clipboardData?.items;
+    if (!items) return;
 
-    const text = event.clipboardData.getData("text")
+    const text = event.clipboardData.getData("text");
     if (text && text.length > 500 && props.allowAttachments) {
-      event.preventDefault()
-      const blob = new Blob([text], { type: "text/plain" })
+      event.preventDefault();
+      const blob = new Blob([text], { type: "text/plain" });
       const file = new File([blob], "Pasted text", {
         type: "text/plain",
         lastModified: Date.now(),
-      })
-      addFiles([file])
-      return
+      });
+      addFiles([file]);
+      return;
     }
 
     const files = Array.from(items)
-      .map((item) => item.getAsFile())
-      .filter((file) => file !== null)
+      .map(item => item.getAsFile())
+      .filter(file => file !== null);
 
     if (props.allowAttachments && files.length > 0) {
-      addFiles(files)
+      addFiles(files);
     }
-  }
+  };
 
   const onKeyDown = (event: React.KeyboardEvent<HTMLTextAreaElement>) => {
     if (submitOnEnter && event.key === "Enter" && !event.shiftKey) {
-      event.preventDefault()
+      event.preventDefault();
 
       if (isGenerating && stop && enableInterrupt) {
         if (showInterruptPrompt) {
-          stop()
-          setShowInterruptPrompt(false)
-          event.currentTarget.form?.requestSubmit()
+          stop();
+          setShowInterruptPrompt(false);
+          event.currentTarget.form?.requestSubmit();
         } else if (
           props.value ||
           (props.allowAttachments && props.files?.length)
         ) {
-          setShowInterruptPrompt(true)
-          return
+          setShowInterruptPrompt(true);
+          return;
         }
       }
 
-      event.currentTarget.form?.requestSubmit()
+      event.currentTarget.form?.requestSubmit();
     }
 
-    onKeyDownProp?.(event)
-  }
+    onKeyDownProp?.(event);
+  };
 
-  const textAreaRef = useRef<HTMLTextAreaElement>(null)
-  const [textAreaHeight, setTextAreaHeight] = useState<number>(0)
+  const textAreaRef = useRef<HTMLTextAreaElement>(null);
+  const [textAreaHeight, setTextAreaHeight] = useState<number>(0);
 
   useEffect(() => {
     if (textAreaRef.current) {
-      setTextAreaHeight(textAreaRef.current.offsetHeight)
+      setTextAreaHeight(textAreaRef.current.offsetHeight);
     }
-  }, [props.value])
+  }, [props.value]);
 
   const showFileList =
-    props.allowAttachments && props.files && props.files.length > 0
-
+    props.allowAttachments && props.files && props.files.length > 0;
 
   useAutosizeTextArea({
     ref: textAreaRef,
     maxHeight: 240,
     borderWidth: 1,
     dependencies: [props.value, showFileList],
-  })
+  });
 
   return (
     <div
@@ -220,24 +221,24 @@ export function MessageInput({
             <div className="absolute inset-x-3 bottom-0 z-20 overflow-x-scroll py-3">
               <div className="flex space-x-3">
                 <AnimatePresence mode="popLayout">
-                  {props.files?.map((file) => {
+                  {props.files?.map(file => {
                     return (
                       <FilePreview
                         key={file.name + String(file.lastModified)}
                         file={file}
                         onRemove={() => {
-                          props.setFiles((files) => {
-                            if (!files) return null
+                          props.setFiles(files => {
+                            if (!files) return null;
 
                             const filtered = Array.from(files).filter(
-                              (f) => f !== file
-                            )
-                            if (filtered.length === 0) return null
-                            return filtered
-                          })
+                              f => f !== file
+                            );
+                            if (filtered.length === 0) return null;
+                            return filtered;
+                          });
                         }}
                       />
-                    )
+                    );
                   })}
                 </AnimatePresence>
               </div>
@@ -256,8 +257,8 @@ export function MessageInput({
             aria-label="Attach a file"
             disabled={true}
             onClick={async () => {
-              const files = await showFileUploadDialog()
-              addFiles(files)
+              const files = await showFileUploadDialog();
+              addFiles(files);
             }}
           >
             <Paperclip className="h-4 w-4" />
@@ -308,12 +309,12 @@ export function MessageInput({
         onStopRecording={stopRecording}
       />
     </div>
-  )
+  );
 }
-MessageInput.displayName = "MessageInput"
+MessageInput.displayName = "MessageInput";
 
 interface FileUploadOverlayProps {
-  isDragging: boolean
+  isDragging: boolean;
 }
 
 function FileUploadOverlay({ isDragging }: FileUploadOverlayProps) {
@@ -333,29 +334,29 @@ function FileUploadOverlay({ isDragging }: FileUploadOverlayProps) {
         </motion.div>
       )}
     </AnimatePresence>
-  )
+  );
 }
 
 function showFileUploadDialog() {
-  const input = document.createElement("input")
+  const input = document.createElement("input");
 
-  input.type = "file"
-  input.multiple = true
-  input.accept = "*/*"
-  input.click()
+  input.type = "file";
+  input.multiple = true;
+  input.accept = "*/*";
+  input.click();
 
-  return new Promise<File[] | null>((resolve) => {
-    input.onchange = (e) => {
-      const files = (e.currentTarget as HTMLInputElement).files
+  return new Promise<File[] | null>(resolve => {
+    input.onchange = e => {
+      const files = (e.currentTarget as HTMLInputElement).files;
 
       if (files) {
-        resolve(Array.from(files))
-        return
+        resolve(Array.from(files));
+        return;
       }
 
-      resolve(null)
-    }
-  })
+      resolve(null);
+    };
+  });
 }
 
 function TranscribingOverlay() {
@@ -385,12 +386,12 @@ function TranscribingOverlay() {
         Transcribing audio...
       </p>
     </motion.div>
-  )
+  );
 }
 
 interface RecordingPromptProps {
-  isVisible: boolean
-  onStopRecording: () => void
+  isVisible: boolean;
+  onStopRecording: () => void;
 }
 
 function RecordingPrompt({ isVisible, onStopRecording }: RecordingPromptProps) {
@@ -418,15 +419,15 @@ function RecordingPrompt({ isVisible, onStopRecording }: RecordingPromptProps) {
         </motion.div>
       )}
     </AnimatePresence>
-  )
+  );
 }
 
 interface RecordingControlsProps {
-  isRecording: boolean
-  isTranscribing: boolean
-  audioStream: MediaStream | null
-  textAreaHeight: number
-  onStopRecording: () => void
+  isRecording: boolean;
+  isTranscribing: boolean;
+  audioStream: MediaStream | null;
+  textAreaHeight: number;
+  onStopRecording: () => void;
 }
 
 function RecordingControls({
@@ -448,7 +449,7 @@ function RecordingControls({
           onClick={onStopRecording}
         />
       </div>
-    )
+    );
   }
 
   if (isTranscribing) {
@@ -459,8 +460,8 @@ function RecordingControls({
       >
         <TranscribingOverlay />
       </div>
-    )
+    );
   }
 
-  return null
+  return null;
 }
diff --git a/llama_stack/ui/components/chat-playground/message-list.tsx b/llama_stack/ui/components/chat-playground/message-list.tsx
index 5fe8409f4..5e8647748 100644
--- a/llama_stack/ui/components/chat-playground/message-list.tsx
+++ b/llama_stack/ui/components/chat-playground/message-list.tsx
@@ -2,18 +2,18 @@ import {
   ChatMessage,
   type ChatMessageProps,
   type Message,
-} from "@/components/chat-playground/chat-message"
-import { TypingIndicator } from "@/components/chat-playground/typing-indicator"
+} from "@/components/chat-playground/chat-message";
+import { TypingIndicator } from "@/components/chat-playground/typing-indicator";
 
-type AdditionalMessageOptions = Omit<ChatMessageProps, keyof Message>
+type AdditionalMessageOptions = Omit<ChatMessageProps, keyof Message>;
 
 interface MessageListProps {
-  messages: Message[]
-  showTimeStamps?: boolean
-  isTyping?: boolean
+  messages: Message[];
+  showTimeStamps?: boolean;
+  isTyping?: boolean;
   messageOptions?:
     | AdditionalMessageOptions
-    | ((message: Message) => AdditionalMessageOptions)
+    | ((message: Message) => AdditionalMessageOptions);
 }
 
 export function MessageList({
@@ -28,7 +28,7 @@ export function MessageList({
         const additionalOptions =
           typeof messageOptions === "function"
             ? messageOptions(message)
-            : messageOptions
+            : messageOptions;
 
         return (
           <ChatMessage
@@ -37,9 +37,9 @@ export function MessageList({
             {...message}
             {...additionalOptions}
           />
-        )
+        );
       })}
       {isTyping && <TypingIndicator />}
     </div>
-  )
+  );
 }
diff --git a/llama_stack/ui/components/chat-playground/prompt-suggestions.tsx b/llama_stack/ui/components/chat-playground/prompt-suggestions.tsx
index 9afaa4e66..075cce406 100644
--- a/llama_stack/ui/components/chat-playground/prompt-suggestions.tsx
+++ b/llama_stack/ui/components/chat-playground/prompt-suggestions.tsx
@@ -1,7 +1,7 @@
 interface PromptSuggestionsProps {
-  label: string
-  append: (message: { role: "user"; content: string }) => void
-  suggestions: string[]
+  label: string;
+  append: (message: { role: "user"; content: string }) => void;
+  suggestions: string[];
 }
 
 export function PromptSuggestions({
@@ -13,7 +13,7 @@ export function PromptSuggestions({
     <div className="space-y-6">
       <h2 className="text-center text-2xl font-bold">{label}</h2>
       <div className="flex gap-6 text-sm">
-        {suggestions.map((suggestion) => (
+        {suggestions.map(suggestion => (
           <button
             key={suggestion}
             onClick={() => append({ role: "user", content: suggestion })}
@@ -24,5 +24,5 @@ export function PromptSuggestions({
         ))}
       </div>
     </div>
-  )
+  );
 }
diff --git a/llama_stack/ui/components/chat-playground/typing-indicator.tsx b/llama_stack/ui/components/chat-playground/typing-indicator.tsx
index 07055d428..8950c066b 100644
--- a/llama_stack/ui/components/chat-playground/typing-indicator.tsx
+++ b/llama_stack/ui/components/chat-playground/typing-indicator.tsx
@@ -1,4 +1,4 @@
-import { Dot } from "lucide-react"
+import { Dot } from "lucide-react";
 
 export function TypingIndicator() {
   return (
@@ -11,5 +11,5 @@ export function TypingIndicator() {
         </div>
       </div>
     </div>
-  )
+  );
 }
diff --git a/llama_stack/ui/components/layout/app-sidebar.tsx b/llama_stack/ui/components/layout/app-sidebar.tsx
index 2ff106e01..bee3d6a70 100644
--- a/llama_stack/ui/components/layout/app-sidebar.tsx
+++ b/llama_stack/ui/components/layout/app-sidebar.tsx
@@ -56,18 +56,19 @@ const manageItems = [
   },
 ];
 
-const optimizeItems: { title: string; url: string; icon: React.ElementType }[] = [
+const optimizeItems: { title: string; url: string; icon: React.ElementType }[] =
+  [
     {
-        title: "Evaluations",
-        url: "",
-        icon: Compass,
+      title: "Evaluations",
+      url: "",
+      icon: Compass,
     },
     {
-        title: "Fine-tuning",
-        url: "",
-        icon: Settings2,
+      title: "Fine-tuning",
+      url: "",
+      icon: Settings2,
     },
-];
+  ];
 
 interface SidebarItem {
   title: string;
@@ -79,7 +80,7 @@ export function AppSidebar() {
   const pathname = usePathname();
 
   const renderSidebarItems = (items: SidebarItem[]) => {
-    return items.map((item) => {
+    return items.map(item => {
       const isActive = pathname.startsWith(item.url);
       return (
         <SidebarMenuItem key={item.title}>
@@ -88,14 +89,14 @@ export function AppSidebar() {
             className={cn(
               "justify-start",
               isActive &&
-                "bg-gray-200 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-700 text-gray-900 dark:text-gray-100",
+                "bg-gray-200 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-700 text-gray-900 dark:text-gray-100"
             )}
           >
             <Link href={item.url}>
               <item.icon
                 className={cn(
                   isActive && "text-gray-900 dark:text-gray-100",
-                  "mr-2 h-4 w-4",
+                  "mr-2 h-4 w-4"
                 )}
               />
               <span>{item.title}</span>
@@ -106,46 +107,48 @@ export function AppSidebar() {
     });
   };
 
-return (
-  <Sidebar>
-    <SidebarHeader>
-      <Link href="/">Llama Stack</Link>
-    </SidebarHeader>
-    <SidebarContent>
-      <SidebarGroup>
-        <SidebarGroupLabel>Create</SidebarGroupLabel>
-        <SidebarGroupContent>
-          <SidebarMenu>{renderSidebarItems(createItems)}</SidebarMenu>
-        </SidebarGroupContent>
-      </SidebarGroup>
+  return (
+    <Sidebar>
+      <SidebarHeader>
+        <Link href="/">Llama Stack</Link>
+      </SidebarHeader>
+      <SidebarContent>
+        <SidebarGroup>
+          <SidebarGroupLabel>Create</SidebarGroupLabel>
+          <SidebarGroupContent>
+            <SidebarMenu>{renderSidebarItems(createItems)}</SidebarMenu>
+          </SidebarGroupContent>
+        </SidebarGroup>
 
-      <SidebarGroup>
-        <SidebarGroupLabel>Manage</SidebarGroupLabel>
-        <SidebarGroupContent>
-          <SidebarMenu>{renderSidebarItems(manageItems)}</SidebarMenu>
-        </SidebarGroupContent>
-      </SidebarGroup>
+        <SidebarGroup>
+          <SidebarGroupLabel>Manage</SidebarGroupLabel>
+          <SidebarGroupContent>
+            <SidebarMenu>{renderSidebarItems(manageItems)}</SidebarMenu>
+          </SidebarGroupContent>
+        </SidebarGroup>
 
-      <SidebarGroup>
-        <SidebarGroupLabel>Optimize</SidebarGroupLabel>
-        <SidebarGroupContent>
-          <SidebarMenu>
-            {optimizeItems.map((item) => (
-              <SidebarMenuItem key={item.title}>
-                <SidebarMenuButton
-                  disabled
-                  className="justify-start opacity-60 cursor-not-allowed"
-                >
-                  <item.icon className="mr-2 h-4 w-4" />
-                  <span>{item.title}</span>
-                  <span className="ml-2 text-xs text-gray-500">(Coming Soon)</span>
-                </SidebarMenuButton>
-              </SidebarMenuItem>
-            ))}
-          </SidebarMenu>
-        </SidebarGroupContent>
-      </SidebarGroup>
-    </SidebarContent>
-  </Sidebar>
+        <SidebarGroup>
+          <SidebarGroupLabel>Optimize</SidebarGroupLabel>
+          <SidebarGroupContent>
+            <SidebarMenu>
+              {optimizeItems.map(item => (
+                <SidebarMenuItem key={item.title}>
+                  <SidebarMenuButton
+                    disabled
+                    className="justify-start opacity-60 cursor-not-allowed"
+                  >
+                    <item.icon className="mr-2 h-4 w-4" />
+                    <span>{item.title}</span>
+                    <span className="ml-2 text-xs text-gray-500">
+                      (Coming Soon)
+                    </span>
+                  </SidebarMenuButton>
+                </SidebarMenuItem>
+              ))}
+            </SidebarMenu>
+          </SidebarGroupContent>
+        </SidebarGroup>
+      </SidebarContent>
+    </Sidebar>
   );
 }
diff --git a/llama_stack/ui/components/layout/detail-layout.tsx b/llama_stack/ui/components/layout/detail-layout.tsx
index 3013195a2..ed5edd127 100644
--- a/llama_stack/ui/components/layout/detail-layout.tsx
+++ b/llama_stack/ui/components/layout/detail-layout.tsx
@@ -2,7 +2,7 @@ import React from "react";
 import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
 import { Skeleton } from "@/components/ui/skeleton";
 
-export function DetailLoadingView({ title }: { title: string }) {
+export function DetailLoadingView() {
   return (
     <>
       <Skeleton className="h-8 w-3/4 mb-6" /> {/* Title Skeleton */}
diff --git a/llama_stack/ui/components/logs/logs-table-scroll.test.tsx b/llama_stack/ui/components/logs/logs-table-scroll.test.tsx
index a5c3fde46..9952f750b 100644
--- a/llama_stack/ui/components/logs/logs-table-scroll.test.tsx
+++ b/llama_stack/ui/components/logs/logs-table-scroll.test.tsx
@@ -67,7 +67,7 @@ describe("LogsTable Viewport Loading", () => {
       () => {
         expect(mockLoadMore).toHaveBeenCalled();
       },
-      { timeout: 300 },
+      { timeout: 300 }
     );
 
     expect(mockLoadMore).toHaveBeenCalledTimes(1);
@@ -81,11 +81,11 @@ describe("LogsTable Viewport Loading", () => {
         {...defaultProps}
         status="loading-more"
         onLoadMore={mockLoadMore}
-      />,
+      />
     );
 
     // Wait for possible triggers
-    await new Promise((resolve) => setTimeout(resolve, 300));
+    await new Promise(resolve => setTimeout(resolve, 300));
 
     expect(mockLoadMore).not.toHaveBeenCalled();
   });
@@ -94,15 +94,11 @@ describe("LogsTable Viewport Loading", () => {
     const mockLoadMore = jest.fn();
 
     render(
-      <LogsTable
-        {...defaultProps}
-        status="loading"
-        onLoadMore={mockLoadMore}
-      />,
+      <LogsTable {...defaultProps} status="loading" onLoadMore={mockLoadMore} />
     );
 
     // Wait for possible triggers
-    await new Promise((resolve) => setTimeout(resolve, 300));
+    await new Promise(resolve => setTimeout(resolve, 300));
 
     expect(mockLoadMore).not.toHaveBeenCalled();
   });
@@ -111,18 +107,18 @@ describe("LogsTable Viewport Loading", () => {
     const mockLoadMore = jest.fn();
 
     render(
-      <LogsTable {...defaultProps} hasMore={false} onLoadMore={mockLoadMore} />,
+      <LogsTable {...defaultProps} hasMore={false} onLoadMore={mockLoadMore} />
     );
 
     // Wait for possible triggers
-    await new Promise((resolve) => setTimeout(resolve, 300));
+    await new Promise(resolve => setTimeout(resolve, 300));
 
     expect(mockLoadMore).not.toHaveBeenCalled();
   });
 
   test("sentinel element should not be rendered when loading", () => {
     const { container } = render(
-      <LogsTable {...defaultProps} status="loading-more" />,
+      <LogsTable {...defaultProps} status="loading-more" />
     );
 
     // Check that no sentinel row with height: 1 exists
@@ -132,7 +128,7 @@ describe("LogsTable Viewport Loading", () => {
 
   test("sentinel element should be rendered when not loading and hasMore", () => {
     const { container } = render(
-      <LogsTable {...defaultProps} hasMore={true} status="idle" />,
+      <LogsTable {...defaultProps} hasMore={true} status="idle" />
     );
 
     // Check that sentinel row exists
diff --git a/llama_stack/ui/components/logs/logs-table.test.tsx b/llama_stack/ui/components/logs/logs-table.test.tsx
index 9d129879b..b86cf1c12 100644
--- a/llama_stack/ui/components/logs/logs-table.test.tsx
+++ b/llama_stack/ui/components/logs/logs-table.test.tsx
@@ -70,7 +70,7 @@ describe("LogsTable", () => {
   describe("Loading State", () => {
     test("renders skeleton UI when isLoading is true", () => {
       const { container } = render(
-        <LogsTable {...defaultProps} status="loading" />,
+        <LogsTable {...defaultProps} status="loading" />
       );
 
       // Check for skeleton in the table caption
@@ -78,7 +78,7 @@ describe("LogsTable", () => {
       expect(tableCaption).toBeInTheDocument();
       if (tableCaption) {
         const captionSkeleton = tableCaption.querySelector(
-          '[data-slot="skeleton"]',
+          '[data-slot="skeleton"]'
         );
         expect(captionSkeleton).toBeInTheDocument();
       }
@@ -88,7 +88,7 @@ describe("LogsTable", () => {
       expect(tableBody).toBeInTheDocument();
       if (tableBody) {
         const bodySkeletons = tableBody.querySelectorAll(
-          '[data-slot="skeleton"]',
+          '[data-slot="skeleton"]'
         );
         expect(bodySkeletons.length).toBeGreaterThan(0);
       }
@@ -102,7 +102,7 @@ describe("LogsTable", () => {
 
     test("renders correct number of skeleton rows", () => {
       const { container } = render(
-        <LogsTable {...defaultProps} status="loading" />,
+        <LogsTable {...defaultProps} status="loading" />
       );
 
       const skeletonRows = container.querySelectorAll("tbody tr");
@@ -118,10 +118,10 @@ describe("LogsTable", () => {
           {...defaultProps}
           status="error"
           error={{ name: "Error", message: errorMessage } as Error}
-        />,
+        />
       );
       expect(
-        screen.getByText("Unable to load chat completions"),
+        screen.getByText("Unable to load chat completions")
       ).toBeInTheDocument();
       expect(screen.getByText(errorMessage)).toBeInTheDocument();
     });
@@ -132,29 +132,25 @@ describe("LogsTable", () => {
           {...defaultProps}
           status="error"
           error={{ name: "Error", message: "" } as Error}
-        />,
+        />
       );
       expect(
-        screen.getByText("Unable to load chat completions"),
+        screen.getByText("Unable to load chat completions")
       ).toBeInTheDocument();
       expect(
-        screen.getByText(
-          "An unexpected error occurred while loading the data.",
-        ),
+        screen.getByText("An unexpected error occurred while loading the data.")
       ).toBeInTheDocument();
     });
 
     test("renders default error message when error prop is an object without message", () => {
       render(
-        <LogsTable {...defaultProps} status="error" error={{} as Error} />,
+        <LogsTable {...defaultProps} status="error" error={{} as Error} />
       );
       expect(
-        screen.getByText("Unable to load chat completions"),
+        screen.getByText("Unable to load chat completions")
       ).toBeInTheDocument();
       expect(
-        screen.getByText(
-          "An unexpected error occurred while loading the data.",
-        ),
+        screen.getByText("An unexpected error occurred while loading the data.")
       ).toBeInTheDocument();
     });
 
@@ -164,7 +160,7 @@ describe("LogsTable", () => {
           {...defaultProps}
           status="error"
           error={{ name: "Error", message: "Test error" } as Error}
-        />,
+        />
       );
       const table = screen.queryByRole("table");
       expect(table).not.toBeInTheDocument();
@@ -178,7 +174,7 @@ describe("LogsTable", () => {
           {...defaultProps}
           data={[]}
           emptyMessage="Custom empty message"
-        />,
+        />
       );
       expect(screen.getByText("Custom empty message")).toBeInTheDocument();
 
@@ -214,7 +210,7 @@ describe("LogsTable", () => {
           {...defaultProps}
           data={mockData}
           caption="Custom table caption"
-        />,
+        />
       );
 
       // Table caption
@@ -311,8 +307,8 @@ describe("LogsTable", () => {
       // Verify truncated text is displayed
       const truncatedTexts = screen.getAllByText("This is a ...");
       expect(truncatedTexts).toHaveLength(2); // one for input, one for output
-      truncatedTexts.forEach((textElement) =>
-        expect(textElement).toBeInTheDocument(),
+      truncatedTexts.forEach(textElement =>
+        expect(textElement).toBeInTheDocument()
       );
     });
 
@@ -332,12 +328,12 @@ describe("LogsTable", () => {
 
       // Model name should not be passed to truncateText
       expect(truncateText).not.toHaveBeenCalledWith(
-        "very-long-model-name-that-should-not-be-truncated",
+        "very-long-model-name-that-should-not-be-truncated"
       );
 
       // Full model name should be displayed
       expect(
-        screen.getByText("very-long-model-name-that-should-not-be-truncated"),
+        screen.getByText("very-long-model-name-that-should-not-be-truncated")
       ).toBeInTheDocument();
     });
   });
diff --git a/llama_stack/ui/components/logs/logs-table.tsx b/llama_stack/ui/components/logs/logs-table.tsx
index 3d4e609c7..717b122ca 100644
--- a/llama_stack/ui/components/logs/logs-table.tsx
+++ b/llama_stack/ui/components/logs/logs-table.tsx
@@ -142,7 +142,7 @@ export function LogsTable({
         <Table>
           <TableCaption className="sr-only">{caption}</TableCaption>
           <TableBody>
-            {data.map((row) => (
+            {data.map(row => (
               <TableRow
                 key={row.id}
                 onClick={() => router.push(row.detailPath)}
diff --git a/llama_stack/ui/components/responses/grouping/grouped-items-display.tsx b/llama_stack/ui/components/responses/grouping/grouped-items-display.tsx
index 6ddc0eacc..5eaa93fac 100644
--- a/llama_stack/ui/components/responses/grouping/grouped-items-display.tsx
+++ b/llama_stack/ui/components/responses/grouping/grouped-items-display.tsx
@@ -22,7 +22,7 @@ export function GroupedItemsDisplay({
 
   return (
     <>
-      {groupedItems.map((groupedItem) => {
+      {groupedItems.map(groupedItem => {
         // If this is a function call with an output, render the grouped component
         if (
           groupedItem.outputItem &&
diff --git a/llama_stack/ui/components/responses/hooks/function-call-grouping.ts b/llama_stack/ui/components/responses/hooks/function-call-grouping.ts
index 2994354d5..203cd688f 100644
--- a/llama_stack/ui/components/responses/hooks/function-call-grouping.ts
+++ b/llama_stack/ui/components/responses/hooks/function-call-grouping.ts
@@ -18,7 +18,7 @@ export interface GroupedItem {
  * @returns Array of grouped items with their outputs
  */
 export function useFunctionCallGrouping(
-  items: AnyResponseItem[],
+  items: AnyResponseItem[]
 ): GroupedItem[] {
   return useMemo(() => {
     const groupedItems: GroupedItem[] = [];
diff --git a/llama_stack/ui/components/responses/items/item-renderer.tsx b/llama_stack/ui/components/responses/items/item-renderer.tsx
index 8f65d50c4..5f16d9120 100644
--- a/llama_stack/ui/components/responses/items/item-renderer.tsx
+++ b/llama_stack/ui/components/responses/items/item-renderer.tsx
@@ -52,7 +52,7 @@ export function ItemRenderer({
   // Fallback to generic item for unknown types
   return (
     <GenericItemComponent
-      item={item as any}
+      item={item as Record<string, unknown>}
       index={index}
       keyPrefix={keyPrefix}
     />
diff --git a/llama_stack/ui/components/responses/items/message-item.tsx b/llama_stack/ui/components/responses/items/message-item.tsx
index 5590e4460..68054c48f 100644
--- a/llama_stack/ui/components/responses/items/message-item.tsx
+++ b/llama_stack/ui/components/responses/items/message-item.tsx
@@ -20,7 +20,7 @@ export function MessageItemComponent({
     content = item.content;
   } else if (Array.isArray(item.content)) {
     content = item.content
-      .map((c) => {
+      .map(c => {
         return c.type === "input_text" || c.type === "output_text"
           ? c.text
           : JSON.stringify(c);
diff --git a/llama_stack/ui/components/responses/responses-detail.test.tsx b/llama_stack/ui/components/responses/responses-detail.test.tsx
index f426dc059..c0f348cad 100644
--- a/llama_stack/ui/components/responses/responses-detail.test.tsx
+++ b/llama_stack/ui/components/responses/responses-detail.test.tsx
@@ -18,7 +18,7 @@ describe("ResponseDetailView", () => {
   describe("Loading State", () => {
     test("renders loading skeleton when isLoading is true", () => {
       const { container } = render(
-        <ResponseDetailView {...defaultProps} isLoading={true} />,
+        <ResponseDetailView {...defaultProps} isLoading={true} />
       );
 
       // Check for skeleton elements
@@ -36,13 +36,13 @@ describe("ResponseDetailView", () => {
         <ResponseDetailView
           {...defaultProps}
           error={{ name: "Error", message: errorMessage }}
-        />,
+        />
       );
 
       expect(screen.getByText("Responses Details")).toBeInTheDocument();
       // The error message is split across elements, so we check for parts
       expect(
-        screen.getByText(/Error loading details for ID/),
+        screen.getByText(/Error loading details for ID/)
       ).toBeInTheDocument();
       expect(screen.getByText(/test_id/)).toBeInTheDocument();
       expect(screen.getByText(/Network Error/)).toBeInTheDocument();
@@ -53,11 +53,11 @@ describe("ResponseDetailView", () => {
         <ResponseDetailView
           {...defaultProps}
           error={{ name: "Error", message: "" }}
-        />,
+        />
       );
 
       expect(
-        screen.getByText(/Error loading details for ID/),
+        screen.getByText(/Error loading details for ID/)
       ).toBeInTheDocument();
       expect(screen.getByText(/test_id/)).toBeInTheDocument();
     });
@@ -124,14 +124,14 @@ describe("ResponseDetailView", () => {
       // Check properties - use regex to handle text split across elements
       expect(screen.getByText(/Created/)).toBeInTheDocument();
       expect(
-        screen.getByText(new Date(1710000000 * 1000).toLocaleString()),
+        screen.getByText(new Date(1710000000 * 1000).toLocaleString())
       ).toBeInTheDocument();
 
       // Check for the specific ID label (not Previous Response ID)
       expect(
         screen.getByText((content, element) => {
           return element?.tagName === "STRONG" && content === "ID:";
-        }),
+        })
       ).toBeInTheDocument();
       expect(screen.getByText("resp_123")).toBeInTheDocument();
 
@@ -166,7 +166,7 @@ describe("ResponseDetailView", () => {
       };
 
       render(
-        <ResponseDetailView {...defaultProps} response={minimalResponse} />,
+        <ResponseDetailView {...defaultProps} response={minimalResponse} />
       );
 
       // Should show required properties
@@ -179,7 +179,7 @@ describe("ResponseDetailView", () => {
       expect(screen.queryByText("Top P")).not.toBeInTheDocument();
       expect(screen.queryByText("Parallel Tool Calls")).not.toBeInTheDocument();
       expect(
-        screen.queryByText("Previous Response ID"),
+        screen.queryByText("Previous Response ID")
       ).not.toBeInTheDocument();
     });
 
@@ -196,7 +196,7 @@ describe("ResponseDetailView", () => {
 
       // The error is shown in the properties sidebar, not as a separate "Error" label
       expect(
-        screen.getByText("invalid_request: The request was invalid"),
+        screen.getByText("invalid_request: The request was invalid")
       ).toBeInTheDocument();
     });
   });
@@ -218,7 +218,7 @@ describe("ResponseDetailView", () => {
           {...defaultProps}
           response={mockResponse}
           isLoadingInputItems={true}
-        />,
+        />
       );
 
       // Check for skeleton loading in input items section
@@ -227,7 +227,7 @@ describe("ResponseDetailView", () => {
           {...defaultProps}
           response={mockResponse}
           isLoadingInputItems={true}
-        />,
+        />
       );
 
       const skeletons = container.querySelectorAll('[data-slot="skeleton"]');
@@ -243,16 +243,16 @@ describe("ResponseDetailView", () => {
             name: "Error",
             message: "Failed to load input items",
           }}
-        />,
+        />
       );
 
       expect(
         screen.getByText(
-          "Error loading input items: Failed to load input items",
-        ),
+          "Error loading input items: Failed to load input items"
+        )
       ).toBeInTheDocument();
       expect(
-        screen.getByText("Falling back to response input data."),
+        screen.getByText("Falling back to response input data.")
       ).toBeInTheDocument();
 
       // Should still show fallback input data
@@ -276,7 +276,7 @@ describe("ResponseDetailView", () => {
           {...defaultProps}
           response={mockResponse}
           inputItems={mockInputItems}
-        />,
+        />
       );
 
       // Should show input items data, not response.input
@@ -295,7 +295,7 @@ describe("ResponseDetailView", () => {
           {...defaultProps}
           response={mockResponse}
           inputItems={emptyInputItems}
-        />,
+        />
       );
 
       // Should show fallback input data
@@ -313,7 +313,7 @@ describe("ResponseDetailView", () => {
           {...defaultProps}
           response={responseWithoutInput}
           inputItems={null}
-        />,
+        />
       );
 
       expect(screen.getByText("No input data available.")).toBeInTheDocument();
@@ -443,7 +443,7 @@ describe("ResponseDetailView", () => {
       render(<ResponseDetailView {...defaultProps} response={mockResponse} />);
 
       expect(
-        screen.getByText('input_function({"param": "value"})'),
+        screen.getByText('input_function({"param": "value"})')
       ).toBeInTheDocument();
       expect(screen.getByText("Function Call")).toBeInTheDocument();
     });
@@ -468,7 +468,7 @@ describe("ResponseDetailView", () => {
       render(<ResponseDetailView {...defaultProps} response={mockResponse} />);
 
       expect(
-        screen.getByText("web_search_call(status: completed)"),
+        screen.getByText("web_search_call(status: completed)")
       ).toBeInTheDocument();
       expect(screen.getByText("Function Call")).toBeInTheDocument();
       expect(screen.getByText("(Web Search)")).toBeInTheDocument();
@@ -522,7 +522,7 @@ describe("ResponseDetailView", () => {
       render(<ResponseDetailView {...defaultProps} response={mockResponse} />);
 
       expect(
-        screen.getByText("First output Second output"),
+        screen.getByText("First output Second output")
       ).toBeInTheDocument();
       expect(screen.getByText("Assistant")).toBeInTheDocument();
     });
@@ -549,7 +549,7 @@ describe("ResponseDetailView", () => {
       render(<ResponseDetailView {...defaultProps} response={mockResponse} />);
 
       expect(
-        screen.getByText('search_function({"query": "test"})'),
+        screen.getByText('search_function({"query": "test"})')
       ).toBeInTheDocument();
       expect(screen.getByText("Function Call")).toBeInTheDocument();
     });
@@ -598,7 +598,7 @@ describe("ResponseDetailView", () => {
       render(<ResponseDetailView {...defaultProps} response={mockResponse} />);
 
       expect(
-        screen.getByText("web_search_call(status: completed)"),
+        screen.getByText("web_search_call(status: completed)")
       ).toBeInTheDocument();
       expect(screen.getByText(/Function Call/)).toBeInTheDocument();
       expect(screen.getByText("(Web Search)")).toBeInTheDocument();
@@ -616,7 +616,7 @@ describe("ResponseDetailView", () => {
             type: "unknown_type",
             custom_field: "custom_value",
             data: { nested: "object" },
-          } as any,
+          } as unknown,
         ],
         input: [],
       };
@@ -625,7 +625,7 @@ describe("ResponseDetailView", () => {
 
       // Should show JSON stringified content
       expect(
-        screen.getByText(/custom_field.*custom_value/),
+        screen.getByText(/custom_field.*custom_value/)
       ).toBeInTheDocument();
       expect(screen.getByText("(unknown_type)")).toBeInTheDocument();
     });
@@ -666,7 +666,7 @@ describe("ResponseDetailView", () => {
             role: "assistant",
             call_id: "call_123",
             content: "sunny and warm",
-          } as any, // Using any to bypass the type restriction for this test
+          } as unknown, // Using any to bypass the type restriction for this test
         ],
         input: [],
       };
@@ -676,7 +676,7 @@ describe("ResponseDetailView", () => {
       // Should show the function call and message as separate items (not grouped)
       expect(screen.getByText("Function Call")).toBeInTheDocument();
       expect(
-        screen.getByText('get_weather({"city": "Tokyo"})'),
+        screen.getByText('get_weather({"city": "Tokyo"})')
       ).toBeInTheDocument();
       expect(screen.getByText("Assistant")).toBeInTheDocument();
       expect(screen.getByText("sunny and warm")).toBeInTheDocument();
@@ -706,7 +706,7 @@ describe("ResponseDetailView", () => {
             status: "completed",
             call_id: "call_123",
             output: "sunny and warm",
-          } as any, // Using any to bypass the type restriction for this test
+          } as unknown,
         ],
         input: [],
       };
@@ -717,7 +717,7 @@ describe("ResponseDetailView", () => {
       expect(screen.getByText("Function Call")).toBeInTheDocument();
       expect(screen.getByText("Arguments")).toBeInTheDocument();
       expect(
-        screen.getByText('get_weather({"city": "Tokyo"})'),
+        screen.getByText('get_weather({"city": "Tokyo"})')
       ).toBeInTheDocument();
       // Use getAllByText since there are multiple "Output" elements (card title and output label)
       const outputElements = screen.getAllByText("Output");
diff --git a/llama_stack/ui/components/responses/responses-table.test.tsx b/llama_stack/ui/components/responses/responses-table.test.tsx
index 0338b9151..37eaed543 100644
--- a/llama_stack/ui/components/responses/responses-table.test.tsx
+++ b/llama_stack/ui/components/responses/responses-table.test.tsx
@@ -146,7 +146,7 @@ describe("ResponsesTable", () => {
       expect(tableCaption).toBeInTheDocument();
       if (tableCaption) {
         const captionSkeleton = tableCaption.querySelector(
-          '[data-slot="skeleton"]',
+          '[data-slot="skeleton"]'
         );
         expect(captionSkeleton).toBeInTheDocument();
       }
@@ -156,7 +156,7 @@ describe("ResponsesTable", () => {
       expect(tableBody).toBeInTheDocument();
       if (tableBody) {
         const bodySkeletons = tableBody.querySelectorAll(
-          '[data-slot="skeleton"]',
+          '[data-slot="skeleton"]'
         );
         expect(bodySkeletons.length).toBeGreaterThan(0);
       }
@@ -176,14 +176,14 @@ describe("ResponsesTable", () => {
 
       render(<ResponsesTable {...defaultProps} />);
       expect(
-        screen.getByText("Unable to load chat completions"),
+        screen.getByText("Unable to load chat completions")
       ).toBeInTheDocument();
       expect(screen.getByText(errorMessage)).toBeInTheDocument();
     });
 
     test.each([{ name: "Error", message: "" }, {}])(
       "renders default error message when error has no message",
-      (errorObject) => {
+      errorObject => {
         mockedUsePagination.mockReturnValue({
           data: [],
           status: "error",
@@ -194,14 +194,14 @@ describe("ResponsesTable", () => {
 
         render(<ResponsesTable {...defaultProps} />);
         expect(
-          screen.getByText("Unable to load chat completions"),
+          screen.getByText("Unable to load chat completions")
         ).toBeInTheDocument();
         expect(
           screen.getByText(
-            "An unexpected error occurred while loading the data.",
-          ),
+            "An unexpected error occurred while loading the data."
+          )
         ).toBeInTheDocument();
-      },
+      }
     );
   });
 
@@ -275,7 +275,7 @@ describe("ResponsesTable", () => {
 
       // Table caption
       expect(
-        screen.getByText("A list of your recent responses."),
+        screen.getByText("A list of your recent responses.")
       ).toBeInTheDocument();
 
       // Table headers
@@ -289,14 +289,14 @@ describe("ResponsesTable", () => {
       expect(screen.getByText("Test output")).toBeInTheDocument();
       expect(screen.getByText("llama-test-model")).toBeInTheDocument();
       expect(
-        screen.getByText(new Date(1710000000 * 1000).toLocaleString()),
+        screen.getByText(new Date(1710000000 * 1000).toLocaleString())
       ).toBeInTheDocument();
 
       expect(screen.getByText("Another input")).toBeInTheDocument();
       expect(screen.getByText("Another output")).toBeInTheDocument();
       expect(screen.getByText("llama-another-model")).toBeInTheDocument();
       expect(
-        screen.getByText(new Date(1710001000 * 1000).toLocaleString()),
+        screen.getByText(new Date(1710001000 * 1000).toLocaleString())
       ).toBeInTheDocument();
     });
   });
@@ -487,7 +487,7 @@ describe("ResponsesTable", () => {
 
       render(<ResponsesTable {...defaultProps} />);
       expect(
-        screen.getByText('search_function({"query": "test"})'),
+        screen.getByText('search_function({"query": "test"})')
       ).toBeInTheDocument();
     });
 
@@ -548,7 +548,7 @@ describe("ResponsesTable", () => {
 
       render(<ResponsesTable {...defaultProps} />);
       expect(
-        screen.getByText("web_search_call(status: completed)"),
+        screen.getByText("web_search_call(status: completed)")
       ).toBeInTheDocument();
     });
 
@@ -565,7 +565,7 @@ describe("ResponsesTable", () => {
             id: "unknown_123",
             status: "completed",
             custom_field: "custom_value",
-          } as any,
+          } as unknown,
         ],
         input: [{ type: "message", content: "input" }],
       };
@@ -594,7 +594,7 @@ describe("ResponsesTable", () => {
           {
             type: "unknown_type",
             data: "some data",
-          } as any,
+          } as unknown,
         ],
         input: [{ type: "message", content: "input" }],
       };
@@ -623,7 +623,7 @@ describe("ResponsesTable", () => {
           return typeof text === "string" && text.length > effectiveMaxLength
             ? text.slice(0, effectiveMaxLength) + "..."
             : text;
-        },
+        }
       );
 
       const longInput =
@@ -665,7 +665,7 @@ describe("ResponsesTable", () => {
 
       // The truncated text should be present for both input and output
       const truncatedTexts = screen.getAllByText(
-        longInput.slice(0, 10) + "...",
+        longInput.slice(0, 10) + "..."
       );
       expect(truncatedTexts.length).toBe(2); // one for input, one for output
     });
diff --git a/llama_stack/ui/components/responses/responses-table.tsx b/llama_stack/ui/components/responses/responses-table.tsx
index a3e8c0c15..0c0f8e56b 100644
--- a/llama_stack/ui/components/responses/responses-table.tsx
+++ b/llama_stack/ui/components/responses/responses-table.tsx
@@ -27,7 +27,7 @@ interface ResponsesTableProps {
  * Helper function to convert ResponseListResponse.Data to OpenAIResponse
  */
 const convertResponseListData = (
-  responseData: ResponseListResponse.Data,
+  responseData: ResponseListResponse.Data
 ): OpenAIResponse => {
   return {
     id: responseData.id,
@@ -56,8 +56,8 @@ function getInputText(response: OpenAIResponse): string {
 }
 
 function getOutputText(response: OpenAIResponse): string {
-  const firstMessage = response.output.find((item) =>
-    isMessageItem(item as any),
+  const firstMessage = response.output.find(item =>
+    isMessageItem(item as Record<string, unknown>)
   );
   if (firstMessage) {
     const content = extractContentFromItem(firstMessage as MessageItem);
@@ -66,15 +66,15 @@ function getOutputText(response: OpenAIResponse): string {
     }
   }
 
-  const functionCall = response.output.find((item) =>
-    isFunctionCallItem(item as any),
+  const functionCall = response.output.find(item =>
+    isFunctionCallItem(item as Record<string, unknown>)
   );
   if (functionCall) {
     return formatFunctionCall(functionCall as FunctionCallItem);
   }
 
-  const webSearchCall = response.output.find((item) =>
-    isWebSearchCallItem(item as any),
+  const webSearchCall = response.output.find(item =>
+    isWebSearchCallItem(item as Record<string, unknown>)
   );
   if (webSearchCall) {
     return formatWebSearchCall(webSearchCall as WebSearchCallItem);
@@ -95,7 +95,7 @@ function extractContentFromItem(item: {
   } else if (Array.isArray(item.content)) {
     const textContent = item.content.find(
       (c: ResponseInputMessageContent) =>
-        c.type === "input_text" || c.type === "output_text",
+        c.type === "input_text" || c.type === "output_text"
     );
     return textContent?.text || "";
   }
@@ -131,14 +131,14 @@ export function ResponsesTable({ paginationOptions }: ResponsesTableProps) {
       limit: number;
       model?: string;
       order?: string;
-    },
+    }
   ) => {
     const response = await client.responses.list({
       after: params.after,
       limit: params.limit,
       ...(params.model && { model: params.model }),
       ...(params.order && { order: params.order }),
-    } as any);
+    } as Parameters<typeof client.responses.list>[0]);
 
     const listResponse = response as ResponseListResponse;
 
diff --git a/llama_stack/ui/components/responses/utils/item-types.ts b/llama_stack/ui/components/responses/utils/item-types.ts
index 2bde49119..1c1ca2cb1 100644
--- a/llama_stack/ui/components/responses/utils/item-types.ts
+++ b/llama_stack/ui/components/responses/utils/item-types.ts
@@ -29,7 +29,7 @@ export type AnyResponseItem =
   | FunctionCallOutputItem;
 
 export function isMessageInput(
-  item: ResponseInput,
+  item: ResponseInput
 ): item is ResponseInput & { type: "message" } {
   return item.type === "message";
 }
@@ -39,23 +39,23 @@ export function isMessageItem(item: AnyResponseItem): item is MessageItem {
 }
 
 export function isFunctionCallItem(
-  item: AnyResponseItem,
+  item: AnyResponseItem
 ): item is FunctionCallItem {
   return item.type === "function_call" && "name" in item;
 }
 
 export function isWebSearchCallItem(
-  item: AnyResponseItem,
+  item: AnyResponseItem
 ): item is WebSearchCallItem {
   return item.type === "web_search_call";
 }
 
 export function isFunctionCallOutputItem(
-  item: AnyResponseItem,
+  item: AnyResponseItem
 ): item is FunctionCallOutputItem {
   return (
     item.type === "function_call_output" &&
     "call_id" in item &&
-    typeof (item as any).call_id === "string"
+    typeof (item as Record<string, unknown>).call_id === "string"
   );
 }
diff --git a/llama_stack/ui/components/ui/audio-visualizer.tsx b/llama_stack/ui/components/ui/audio-visualizer.tsx
index e1c23c57b..772ed5eef 100644
--- a/llama_stack/ui/components/ui/audio-visualizer.tsx
+++ b/llama_stack/ui/components/ui/audio-visualizer.tsx
@@ -1,6 +1,6 @@
-"use client"
+"use client";
 
-import { useEffect, useRef } from "react"
+import { useEffect, useRef } from "react";
 
 // Configuration constants for the audio analyzer
 const AUDIO_CONFIG = {
@@ -14,12 +14,12 @@ const AUDIO_CONFIG = {
     MAX_INTENSITY: 255, // Maximum gray value (brighter)
     INTENSITY_RANGE: 155, // MAX_INTENSITY - MIN_INTENSITY
   },
-} as const
+} as const;
 
 interface AudioVisualizerProps {
-  stream: MediaStream | null
-  isRecording: boolean
-  onClick: () => void
+  stream: MediaStream | null;
+  isRecording: boolean;
+  onClick: () => void;
 }
 
 export function AudioVisualizer({
@@ -28,91 +28,91 @@ export function AudioVisualizer({
   onClick,
 }: AudioVisualizerProps) {
   // Refs for managing audio context and animation
-  const canvasRef = useRef<HTMLCanvasElement>(null)
-  const audioContextRef = useRef<AudioContext | null>(null)
-  const analyserRef = useRef<AnalyserNode | null>(null)
-  const animationFrameRef = useRef<number>()
-  const containerRef = useRef<HTMLDivElement>(null)
+  const canvasRef = useRef<HTMLCanvasElement>(null);
+  const audioContextRef = useRef<AudioContext | null>(null);
+  const analyserRef = useRef<AnalyserNode | null>(null);
+  const animationFrameRef = useRef<number>();
+  const containerRef = useRef<HTMLDivElement>(null);
 
   // Cleanup function to stop visualization and close audio context
   const cleanup = () => {
     if (animationFrameRef.current) {
-      cancelAnimationFrame(animationFrameRef.current)
+      cancelAnimationFrame(animationFrameRef.current);
     }
     if (audioContextRef.current) {
-      audioContextRef.current.close()
+      audioContextRef.current.close();
     }
-  }
+  };
 
   // Cleanup on unmount
   useEffect(() => {
-    return cleanup
-  }, [])
+    return cleanup;
+  }, []);
 
   // Start or stop visualization based on recording state
   useEffect(() => {
     if (stream && isRecording) {
-      startVisualization()
+      startVisualization();
     } else {
-      cleanup()
+      cleanup();
     }
     // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, [stream, isRecording])
+  }, [stream, isRecording]);
 
   // Handle window resize
   useEffect(() => {
     const handleResize = () => {
       if (canvasRef.current && containerRef.current) {
-        const container = containerRef.current
-        const canvas = canvasRef.current
-        const dpr = window.devicePixelRatio || 1
+        const container = containerRef.current;
+        const canvas = canvasRef.current;
+        const dpr = window.devicePixelRatio || 1;
 
         // Set canvas size based on container and device pixel ratio
-        const rect = container.getBoundingClientRect()
+        const rect = container.getBoundingClientRect();
         // Account for the 2px total margin (1px on each side)
-        canvas.width = (rect.width - 2) * dpr
-        canvas.height = (rect.height - 2) * dpr
+        canvas.width = (rect.width - 2) * dpr;
+        canvas.height = (rect.height - 2) * dpr;
 
         // Scale canvas CSS size to match container minus margins
-        canvas.style.width = `${rect.width - 2}px`
-        canvas.style.height = `${rect.height - 2}px`
+        canvas.style.width = `${rect.width - 2}px`;
+        canvas.style.height = `${rect.height - 2}px`;
       }
-    }
+    };
 
-    window.addEventListener("resize", handleResize)
+    window.addEventListener("resize", handleResize);
     // Initial setup
-    handleResize()
+    handleResize();
 
-    return () => window.removeEventListener("resize", handleResize)
-  }, [])
+    return () => window.removeEventListener("resize", handleResize);
+  }, []);
 
   // Initialize audio context and start visualization
   const startVisualization = async () => {
     try {
-      const audioContext = new AudioContext()
-      audioContextRef.current = audioContext
+      const audioContext = new AudioContext();
+      audioContextRef.current = audioContext;
 
-      const analyser = audioContext.createAnalyser()
-      analyser.fftSize = AUDIO_CONFIG.FFT_SIZE
-      analyser.smoothingTimeConstant = AUDIO_CONFIG.SMOOTHING
-      analyserRef.current = analyser
+      const analyser = audioContext.createAnalyser();
+      analyser.fftSize = AUDIO_CONFIG.FFT_SIZE;
+      analyser.smoothingTimeConstant = AUDIO_CONFIG.SMOOTHING;
+      analyserRef.current = analyser;
 
-      const source = audioContext.createMediaStreamSource(stream!)
-      source.connect(analyser)
+      const source = audioContext.createMediaStreamSource(stream!);
+      source.connect(analyser);
 
-      draw()
+      draw();
     } catch (error) {
-      console.error("Error starting visualization:", error)
+      console.error("Error starting visualization:", error);
     }
-  }
+  };
 
   // Calculate the color intensity based on bar height
   const getBarColor = (normalizedHeight: number) => {
     const intensity =
       Math.floor(normalizedHeight * AUDIO_CONFIG.COLOR.INTENSITY_RANGE) +
-      AUDIO_CONFIG.COLOR.MIN_INTENSITY
-    return `rgb(${intensity}, ${intensity}, ${intensity})`
-  }
+      AUDIO_CONFIG.COLOR.MIN_INTENSITY;
+    return `rgb(${intensity}, ${intensity}, ${intensity})`;
+  };
 
   // Draw a single bar of the visualizer
   const drawBar = (
@@ -123,52 +123,52 @@ export function AudioVisualizer({
     height: number,
     color: string
   ) => {
-    ctx.fillStyle = color
+    ctx.fillStyle = color;
     // Draw upper bar (above center)
-    ctx.fillRect(x, centerY - height, width, height)
+    ctx.fillRect(x, centerY - height, width, height);
     // Draw lower bar (below center)
-    ctx.fillRect(x, centerY, width, height)
-  }
+    ctx.fillRect(x, centerY, width, height);
+  };
 
   // Main drawing function
   const draw = () => {
-    if (!isRecording) return
+    if (!isRecording) return;
 
-    const canvas = canvasRef.current
-    const ctx = canvas?.getContext("2d")
-    if (!canvas || !ctx || !analyserRef.current) return
+    const canvas = canvasRef.current;
+    const ctx = canvas?.getContext("2d");
+    if (!canvas || !ctx || !analyserRef.current) return;
 
-    const dpr = window.devicePixelRatio || 1
-    ctx.scale(dpr, dpr)
+    const dpr = window.devicePixelRatio || 1;
+    ctx.scale(dpr, dpr);
 
-    const analyser = analyserRef.current
-    const bufferLength = analyser.frequencyBinCount
-    const frequencyData = new Uint8Array(bufferLength)
+    const analyser = analyserRef.current;
+    const bufferLength = analyser.frequencyBinCount;
+    const frequencyData = new Uint8Array(bufferLength);
 
     const drawFrame = () => {
-      animationFrameRef.current = requestAnimationFrame(drawFrame)
+      animationFrameRef.current = requestAnimationFrame(drawFrame);
 
       // Get current frequency data
-      analyser.getByteFrequencyData(frequencyData)
+      analyser.getByteFrequencyData(frequencyData);
 
       // Clear canvas - use CSS pixels for clearing
-      ctx.clearRect(0, 0, canvas.width / dpr, canvas.height / dpr)
+      ctx.clearRect(0, 0, canvas.width / dpr, canvas.height / dpr);
 
       // Calculate dimensions in CSS pixels
       const barWidth = Math.max(
         AUDIO_CONFIG.MIN_BAR_WIDTH,
         canvas.width / dpr / bufferLength - AUDIO_CONFIG.BAR_SPACING
-      )
-      const centerY = canvas.height / dpr / 2
-      let x = 0
+      );
+      const centerY = canvas.height / dpr / 2;
+      let x = 0;
 
       // Draw each frequency bar
       for (let i = 0; i < bufferLength; i++) {
-        const normalizedHeight = frequencyData[i] / 255 // Convert to 0-1 range
+        const normalizedHeight = frequencyData[i] / 255; // Convert to 0-1 range
         const barHeight = Math.max(
           AUDIO_CONFIG.MIN_BAR_HEIGHT,
           normalizedHeight * centerY
-        )
+        );
 
         drawBar(
           ctx,
@@ -177,14 +177,14 @@ export function AudioVisualizer({
           barWidth,
           barHeight,
           getBarColor(normalizedHeight)
-        )
+        );
 
-        x += barWidth + AUDIO_CONFIG.BAR_SPACING
+        x += barWidth + AUDIO_CONFIG.BAR_SPACING;
       }
-    }
+    };
 
-    drawFrame()
-  }
+    drawFrame();
+  };
 
   return (
     <div
@@ -194,5 +194,5 @@ export function AudioVisualizer({
     >
       <canvas ref={canvasRef} className="h-full w-full" />
     </div>
-  )
+  );
 }
diff --git a/llama_stack/ui/components/ui/breadcrumb.tsx b/llama_stack/ui/components/ui/breadcrumb.tsx
index f63ae19af..9d88a372a 100644
--- a/llama_stack/ui/components/ui/breadcrumb.tsx
+++ b/llama_stack/ui/components/ui/breadcrumb.tsx
@@ -14,7 +14,7 @@ function BreadcrumbList({ className, ...props }: React.ComponentProps<"ol">) {
       data-slot="breadcrumb-list"
       className={cn(
         "text-muted-foreground flex flex-wrap items-center gap-1.5 text-sm break-words sm:gap-2.5",
-        className,
+        className
       )}
       {...props}
     />
diff --git a/llama_stack/ui/components/ui/button.tsx b/llama_stack/ui/components/ui/button.tsx
index a2df8dce6..66ab90e53 100644
--- a/llama_stack/ui/components/ui/button.tsx
+++ b/llama_stack/ui/components/ui/button.tsx
@@ -1,8 +1,8 @@
-import * as React from "react"
-import { Slot } from "@radix-ui/react-slot"
-import { cva, type VariantProps } from "class-variance-authority"
+import * as React from "react";
+import { Slot } from "@radix-ui/react-slot";
+import { cva, type VariantProps } from "class-variance-authority";
 
-import { cn } from "@/lib/utils"
+import { cn } from "@/lib/utils";
 
 const buttonVariants = cva(
   "inline-flex items-center justify-center gap-2 whitespace-nowrap rounded-md text-sm font-medium transition-all disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg:not([class*='size-'])]:size-4 shrink-0 [&_svg]:shrink-0 outline-none focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px] aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive",
@@ -33,7 +33,7 @@ const buttonVariants = cva(
       size: "default",
     },
   }
-)
+);
 
 function Button({
   className,
@@ -43,9 +43,9 @@ function Button({
   ...props
 }: React.ComponentProps<"button"> &
   VariantProps<typeof buttonVariants> & {
-    asChild?: boolean
+    asChild?: boolean;
   }) {
-  const Comp = asChild ? Slot : "button"
+  const Comp = asChild ? Slot : "button";
 
   return (
     <Comp
@@ -53,7 +53,7 @@ function Button({
       className={cn(buttonVariants({ variant, size, className }))}
       {...props}
     />
-  )
+  );
 }
 
-export { Button, buttonVariants }
+export { Button, buttonVariants };
diff --git a/llama_stack/ui/components/ui/card.tsx b/llama_stack/ui/components/ui/card.tsx
index 113d66c74..93a82d9c1 100644
--- a/llama_stack/ui/components/ui/card.tsx
+++ b/llama_stack/ui/components/ui/card.tsx
@@ -8,7 +8,7 @@ function Card({ className, ...props }: React.ComponentProps<"div">) {
       data-slot="card"
       className={cn(
         "bg-card text-card-foreground flex flex-col gap-6 rounded-xl border py-6 shadow-sm",
-        className,
+        className
       )}
       {...props}
     />
@@ -21,7 +21,7 @@ function CardHeader({ className, ...props }: React.ComponentProps<"div">) {
       data-slot="card-header"
       className={cn(
         "@container/card-header grid auto-rows-min grid-rows-[auto_auto] items-start gap-1.5 px-6 has-data-[slot=card-action]:grid-cols-[1fr_auto] [.border-b]:pb-6",
-        className,
+        className
       )}
       {...props}
     />
@@ -54,7 +54,7 @@ function CardAction({ className, ...props }: React.ComponentProps<"div">) {
       data-slot="card-action"
       className={cn(
         "col-start-2 row-span-2 row-start-1 self-start justify-self-end",
-        className,
+        className
       )}
       {...props}
     />
diff --git a/llama_stack/ui/components/ui/collapsible.tsx b/llama_stack/ui/components/ui/collapsible.tsx
index ae9fad04a..90935c6b2 100644
--- a/llama_stack/ui/components/ui/collapsible.tsx
+++ b/llama_stack/ui/components/ui/collapsible.tsx
@@ -1,11 +1,11 @@
-"use client"
+"use client";
 
-import * as CollapsiblePrimitive from "@radix-ui/react-collapsible"
+import * as CollapsiblePrimitive from "@radix-ui/react-collapsible";
 
 function Collapsible({
   ...props
 }: React.ComponentProps<typeof CollapsiblePrimitive.Root>) {
-  return <CollapsiblePrimitive.Root data-slot="collapsible" {...props} />
+  return <CollapsiblePrimitive.Root data-slot="collapsible" {...props} />;
 }
 
 function CollapsibleTrigger({
@@ -16,7 +16,7 @@ function CollapsibleTrigger({
       data-slot="collapsible-trigger"
       {...props}
     />
-  )
+  );
 }
 
 function CollapsibleContent({
@@ -27,7 +27,7 @@ function CollapsibleContent({
       data-slot="collapsible-content"
       {...props}
     />
-  )
+  );
 }
 
-export { Collapsible, CollapsibleTrigger, CollapsibleContent }
+export { Collapsible, CollapsibleTrigger, CollapsibleContent };
diff --git a/llama_stack/ui/components/ui/copy-button.tsx b/llama_stack/ui/components/ui/copy-button.tsx
index 51d2ca2d4..433e2474c 100644
--- a/llama_stack/ui/components/ui/copy-button.tsx
+++ b/llama_stack/ui/components/ui/copy-button.tsx
@@ -1,21 +1,21 @@
-"use client"
+"use client";
 
-import { Check, Copy } from "lucide-react"
+import { Check, Copy } from "lucide-react";
 
-import { cn } from "@/lib/utils"
-import { useCopyToClipboard } from "@/hooks/use-copy-to-clipboard"
-import { Button } from "@/components/ui/button"
+import { cn } from "@/lib/utils";
+import { useCopyToClipboard } from "@/hooks/use-copy-to-clipboard";
+import { Button } from "@/components/ui/button";
 
 type CopyButtonProps = {
-  content: string
-  copyMessage?: string
-}
+  content: string;
+  copyMessage?: string;
+};
 
 export function CopyButton({ content, copyMessage }: CopyButtonProps) {
   const { isCopied, handleCopy } = useCopyToClipboard({
     text: content,
     copyMessage,
-  })
+  });
 
   return (
     <Button
@@ -40,5 +40,5 @@ export function CopyButton({ content, copyMessage }: CopyButtonProps) {
         )}
       />
     </Button>
-  )
+  );
 }
diff --git a/llama_stack/ui/components/ui/dropdown-menu.tsx b/llama_stack/ui/components/ui/dropdown-menu.tsx
index 1fc1f4ee3..9cde4a3ca 100644
--- a/llama_stack/ui/components/ui/dropdown-menu.tsx
+++ b/llama_stack/ui/components/ui/dropdown-menu.tsx
@@ -43,7 +43,7 @@ function DropdownMenuContent({
         sideOffset={sideOffset}
         className={cn(
           "bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 max-h-(--radix-dropdown-menu-content-available-height) min-w-[8rem] origin-(--radix-dropdown-menu-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border p-1 shadow-md",
-          className,
+          className
         )}
         {...props}
       />
@@ -75,7 +75,7 @@ function DropdownMenuItem({
       data-variant={variant}
       className={cn(
         "focus:bg-accent focus:text-accent-foreground data-[variant=destructive]:text-destructive data-[variant=destructive]:focus:bg-destructive/10 dark:data-[variant=destructive]:focus:bg-destructive/20 data-[variant=destructive]:focus:text-destructive data-[variant=destructive]:*:[svg]:!text-destructive [&_svg:not([class*='text-'])]:text-muted-foreground relative flex cursor-default items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 data-[inset]:pl-8 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
-        className,
+        className
       )}
       {...props}
     />
@@ -93,7 +93,7 @@ function DropdownMenuCheckboxItem({
       data-slot="dropdown-menu-checkbox-item"
       className={cn(
         "focus:bg-accent focus:text-accent-foreground relative flex cursor-default items-center gap-2 rounded-sm py-1.5 pr-2 pl-8 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
-        className,
+        className
       )}
       checked={checked}
       {...props}
@@ -129,7 +129,7 @@ function DropdownMenuRadioItem({
       data-slot="dropdown-menu-radio-item"
       className={cn(
         "focus:bg-accent focus:text-accent-foreground relative flex cursor-default items-center gap-2 rounded-sm py-1.5 pr-2 pl-8 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
-        className,
+        className
       )}
       {...props}
     >
@@ -156,7 +156,7 @@ function DropdownMenuLabel({
       data-inset={inset}
       className={cn(
         "px-2 py-1.5 text-sm font-medium data-[inset]:pl-8",
-        className,
+        className
       )}
       {...props}
     />
@@ -185,7 +185,7 @@ function DropdownMenuShortcut({
       data-slot="dropdown-menu-shortcut"
       className={cn(
         "text-muted-foreground ml-auto text-xs tracking-widest",
-        className,
+        className
       )}
       {...props}
     />
@@ -212,7 +212,7 @@ function DropdownMenuSubTrigger({
       data-inset={inset}
       className={cn(
         "focus:bg-accent focus:text-accent-foreground data-[state=open]:bg-accent data-[state=open]:text-accent-foreground flex cursor-default items-center rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-[inset]:pl-8",
-        className,
+        className
       )}
       {...props}
     >
@@ -231,7 +231,7 @@ function DropdownMenuSubContent({
       data-slot="dropdown-menu-sub-content"
       className={cn(
         "bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 min-w-[8rem] origin-(--radix-dropdown-menu-content-transform-origin) overflow-hidden rounded-md border p-1 shadow-lg",
-        className,
+        className
       )}
       {...props}
     />
diff --git a/llama_stack/ui/components/ui/file-preview.tsx b/llama_stack/ui/components/ui/file-preview.tsx
index 8f0ed7da2..5d7dfda7e 100644
--- a/llama_stack/ui/components/ui/file-preview.tsx
+++ b/llama_stack/ui/components/ui/file-preview.tsx
@@ -1,18 +1,18 @@
-"use client"
+"use client";
 
-import React, { useEffect } from "react"
-import { motion } from "framer-motion"
-import { FileIcon, X } from "lucide-react"
+import React, { useEffect } from "react";
+import { motion } from "framer-motion";
+import { FileIcon, X } from "lucide-react";
 
 interface FilePreviewProps {
-  file: File
-  onRemove?: () => void
+  file: File;
+  onRemove?: () => void;
 }
 
 export const FilePreview = React.forwardRef<HTMLDivElement, FilePreviewProps>(
   (props, ref) => {
     if (props.file.type.startsWith("image/")) {
-      return <ImageFilePreview {...props} ref={ref} />
+      return <ImageFilePreview {...props} ref={ref} />;
     }
 
     if (
@@ -20,13 +20,13 @@ export const FilePreview = React.forwardRef<HTMLDivElement, FilePreviewProps>(
       props.file.name.endsWith(".txt") ||
       props.file.name.endsWith(".md")
     ) {
-      return <TextFilePreview {...props} ref={ref} />
+      return <TextFilePreview {...props} ref={ref} />;
     }
 
-    return <GenericFilePreview {...props} ref={ref} />
+    return <GenericFilePreview {...props} ref={ref} />;
   }
-)
-FilePreview.displayName = "FilePreview"
+);
+FilePreview.displayName = "FilePreview";
 
 const ImageFilePreview = React.forwardRef<HTMLDivElement, FilePreviewProps>(
   ({ file, onRemove }, ref) => {
@@ -62,23 +62,23 @@ const ImageFilePreview = React.forwardRef<HTMLDivElement, FilePreviewProps>(
           </button>
         ) : null}
       </motion.div>
-    )
+    );
   }
-)
-ImageFilePreview.displayName = "ImageFilePreview"
+);
+ImageFilePreview.displayName = "ImageFilePreview";
 
 const TextFilePreview = React.forwardRef<HTMLDivElement, FilePreviewProps>(
   ({ file, onRemove }, ref) => {
-    const [preview, setPreview] = React.useState<string>("")
+    const [preview, setPreview] = React.useState<string>("");
 
     useEffect(() => {
-      const reader = new FileReader()
-      reader.onload = (e) => {
-        const text = e.target?.result as string
-        setPreview(text.slice(0, 50) + (text.length > 50 ? "..." : ""))
-      }
-      reader.readAsText(file)
-    }, [file])
+      const reader = new FileReader();
+      reader.onload = e => {
+        const text = e.target?.result as string;
+        setPreview(text.slice(0, 50) + (text.length > 50 ? "..." : ""));
+      };
+      reader.readAsText(file);
+    }, [file]);
 
     return (
       <motion.div
@@ -111,10 +111,10 @@ const TextFilePreview = React.forwardRef<HTMLDivElement, FilePreviewProps>(
           </button>
         ) : null}
       </motion.div>
-    )
+    );
   }
-)
-TextFilePreview.displayName = "TextFilePreview"
+);
+TextFilePreview.displayName = "TextFilePreview";
 
 const GenericFilePreview = React.forwardRef<HTMLDivElement, FilePreviewProps>(
   ({ file, onRemove }, ref) => {
@@ -147,7 +147,7 @@ const GenericFilePreview = React.forwardRef<HTMLDivElement, FilePreviewProps>(
           </button>
         ) : null}
       </motion.div>
-    )
+    );
   }
-)
-GenericFilePreview.displayName = "GenericFilePreview"
+);
+GenericFilePreview.displayName = "GenericFilePreview";
diff --git a/llama_stack/ui/components/ui/input.tsx b/llama_stack/ui/components/ui/input.tsx
index b1a060f50..0316cc455 100644
--- a/llama_stack/ui/components/ui/input.tsx
+++ b/llama_stack/ui/components/ui/input.tsx
@@ -11,7 +11,7 @@ function Input({ className, type, ...props }: React.ComponentProps<"input">) {
         "file:text-foreground placeholder:text-muted-foreground selection:bg-primary selection:text-primary-foreground dark:bg-input/30 border-input flex h-9 w-full min-w-0 rounded-md border bg-transparent px-3 py-1 text-base shadow-xs transition-[color,box-shadow] outline-none file:inline-flex file:h-7 file:border-0 file:bg-transparent file:text-sm file:font-medium disabled:pointer-events-none disabled:cursor-not-allowed disabled:opacity-50 md:text-sm",
         "focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px]",
         "aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive",
-        className,
+        className
       )}
       {...props}
     />
diff --git a/llama_stack/ui/components/ui/select.tsx b/llama_stack/ui/components/ui/select.tsx
index dcbbc0ca0..c10e42aa5 100644
--- a/llama_stack/ui/components/ui/select.tsx
+++ b/llama_stack/ui/components/ui/select.tsx
@@ -1,27 +1,27 @@
-"use client"
+"use client";
 
-import * as React from "react"
-import * as SelectPrimitive from "@radix-ui/react-select"
-import { CheckIcon, ChevronDownIcon, ChevronUpIcon } from "lucide-react"
+import * as React from "react";
+import * as SelectPrimitive from "@radix-ui/react-select";
+import { CheckIcon, ChevronDownIcon, ChevronUpIcon } from "lucide-react";
 
-import { cn } from "@/lib/utils"
+import { cn } from "@/lib/utils";
 
 function Select({
   ...props
 }: React.ComponentProps<typeof SelectPrimitive.Root>) {
-  return <SelectPrimitive.Root data-slot="select" {...props} />
+  return <SelectPrimitive.Root data-slot="select" {...props} />;
 }
 
 function SelectGroup({
   ...props
 }: React.ComponentProps<typeof SelectPrimitive.Group>) {
-  return <SelectPrimitive.Group data-slot="select-group" {...props} />
+  return <SelectPrimitive.Group data-slot="select-group" {...props} />;
 }
 
 function SelectValue({
   ...props
 }: React.ComponentProps<typeof SelectPrimitive.Value>) {
-  return <SelectPrimitive.Value data-slot="select-value" {...props} />
+  return <SelectPrimitive.Value data-slot="select-value" {...props} />;
 }
 
 function SelectTrigger({
@@ -30,7 +30,7 @@ function SelectTrigger({
   children,
   ...props
 }: React.ComponentProps<typeof SelectPrimitive.Trigger> & {
-  size?: "sm" | "default"
+  size?: "sm" | "default";
 }) {
   return (
     <SelectPrimitive.Trigger
@@ -47,7 +47,7 @@ function SelectTrigger({
         <ChevronDownIcon className="size-4 opacity-50" />
       </SelectPrimitive.Icon>
     </SelectPrimitive.Trigger>
-  )
+  );
 }
 
 function SelectContent({
@@ -82,7 +82,7 @@ function SelectContent({
         <SelectScrollDownButton />
       </SelectPrimitive.Content>
     </SelectPrimitive.Portal>
-  )
+  );
 }
 
 function SelectLabel({
@@ -95,7 +95,7 @@ function SelectLabel({
       className={cn("text-muted-foreground px-2 py-1.5 text-xs", className)}
       {...props}
     />
-  )
+  );
 }
 
 function SelectItem({
@@ -119,7 +119,7 @@ function SelectItem({
       </span>
       <SelectPrimitive.ItemText>{children}</SelectPrimitive.ItemText>
     </SelectPrimitive.Item>
-  )
+  );
 }
 
 function SelectSeparator({
@@ -132,7 +132,7 @@ function SelectSeparator({
       className={cn("bg-border pointer-events-none -mx-1 my-1 h-px", className)}
       {...props}
     />
-  )
+  );
 }
 
 function SelectScrollUpButton({
@@ -150,7 +150,7 @@ function SelectScrollUpButton({
     >
       <ChevronUpIcon className="size-4" />
     </SelectPrimitive.ScrollUpButton>
-  )
+  );
 }
 
 function SelectScrollDownButton({
@@ -168,7 +168,7 @@ function SelectScrollDownButton({
     >
       <ChevronDownIcon className="size-4" />
     </SelectPrimitive.ScrollDownButton>
-  )
+  );
 }
 
 export {
@@ -182,4 +182,4 @@ export {
   SelectSeparator,
   SelectTrigger,
   SelectValue,
-}
+};
diff --git a/llama_stack/ui/components/ui/separator.tsx b/llama_stack/ui/components/ui/separator.tsx
index 06d1380a9..7f8187751 100644
--- a/llama_stack/ui/components/ui/separator.tsx
+++ b/llama_stack/ui/components/ui/separator.tsx
@@ -18,7 +18,7 @@ function Separator({
       orientation={orientation}
       className={cn(
         "bg-border shrink-0 data-[orientation=horizontal]:h-px data-[orientation=horizontal]:w-full data-[orientation=vertical]:h-full data-[orientation=vertical]:w-px",
-        className,
+        className
       )}
       {...props}
     />
diff --git a/llama_stack/ui/components/ui/sheet.tsx b/llama_stack/ui/components/ui/sheet.tsx
index d30779f4f..6d6efec6a 100644
--- a/llama_stack/ui/components/ui/sheet.tsx
+++ b/llama_stack/ui/components/ui/sheet.tsx
@@ -37,7 +37,7 @@ function SheetOverlay({
       data-slot="sheet-overlay"
       className={cn(
         "data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 fixed inset-0 z-50 bg-black/50",
-        className,
+        className
       )}
       {...props}
     />
@@ -67,7 +67,7 @@ function SheetContent({
             "data-[state=closed]:slide-out-to-top data-[state=open]:slide-in-from-top inset-x-0 top-0 h-auto border-b",
           side === "bottom" &&
             "data-[state=closed]:slide-out-to-bottom data-[state=open]:slide-in-from-bottom inset-x-0 bottom-0 h-auto border-t",
-          className,
+          className
         )}
         {...props}
       >
diff --git a/llama_stack/ui/components/ui/sidebar.tsx b/llama_stack/ui/components/ui/sidebar.tsx
index f8a0a3ed5..58228e56e 100644
--- a/llama_stack/ui/components/ui/sidebar.tsx
+++ b/llama_stack/ui/components/ui/sidebar.tsx
@@ -85,12 +85,12 @@ function SidebarProvider({
       // This sets the cookie to keep the sidebar state.
       document.cookie = `${SIDEBAR_COOKIE_NAME}=${openState}; path=/; max-age=${SIDEBAR_COOKIE_MAX_AGE}`;
     },
-    [setOpenProp, open],
+    [setOpenProp, open]
   );
 
   // Helper to toggle the sidebar.
   const toggleSidebar = React.useCallback(() => {
-    return isMobile ? setOpenMobile((open) => !open) : setOpen((open) => !open);
+    return isMobile ? setOpenMobile(open => !open) : setOpen(open => !open);
   }, [isMobile, setOpen, setOpenMobile]);
 
   // Adds a keyboard shortcut to toggle the sidebar.
@@ -123,7 +123,7 @@ function SidebarProvider({
       setOpenMobile,
       toggleSidebar,
     }),
-    [state, open, setOpen, isMobile, openMobile, setOpenMobile, toggleSidebar],
+    [state, open, setOpen, isMobile, openMobile, setOpenMobile, toggleSidebar]
   );
 
   return (
@@ -140,7 +140,7 @@ function SidebarProvider({
           }
           className={cn(
             "group/sidebar-wrapper has-data-[variant=inset]:bg-sidebar flex min-h-svh w-full",
-            className,
+            className
           )}
           {...props}
         >
@@ -171,7 +171,7 @@ function Sidebar({
         data-slot="sidebar"
         className={cn(
           "bg-sidebar text-sidebar-foreground flex h-full w-(--sidebar-width) flex-col",
-          className,
+          className
         )}
         {...props}
       >
@@ -223,7 +223,7 @@ function Sidebar({
           "group-data-[side=right]:rotate-180",
           variant === "floating" || variant === "inset"
             ? "group-data-[collapsible=icon]:w-[calc(var(--sidebar-width-icon)+(--spacing(4)))]"
-            : "group-data-[collapsible=icon]:w-(--sidebar-width-icon)",
+            : "group-data-[collapsible=icon]:w-(--sidebar-width-icon)"
         )}
       />
       <div
@@ -237,7 +237,7 @@ function Sidebar({
           variant === "floating" || variant === "inset"
             ? "p-2 group-data-[collapsible=icon]:w-[calc(var(--sidebar-width-icon)+(--spacing(4))+2px)]"
             : "group-data-[collapsible=icon]:w-(--sidebar-width-icon) group-data-[side=left]:border-r group-data-[side=right]:border-l",
-          className,
+          className
         )}
         {...props}
       >
@@ -267,7 +267,7 @@ function SidebarTrigger({
       variant="ghost"
       size="icon"
       className={cn("size-7", className)}
-      onClick={(event) => {
+      onClick={event => {
         onClick?.(event);
         toggleSidebar();
       }}
@@ -297,7 +297,7 @@ function SidebarRail({ className, ...props }: React.ComponentProps<"button">) {
         "hover:group-data-[collapsible=offcanvas]:bg-sidebar group-data-[collapsible=offcanvas]:translate-x-0 group-data-[collapsible=offcanvas]:after:left-full",
         "[[data-side=left][data-collapsible=offcanvas]_&]:-right-2",
         "[[data-side=right][data-collapsible=offcanvas]_&]:-left-2",
-        className,
+        className
       )}
       {...props}
     />
@@ -311,7 +311,7 @@ function SidebarInset({ className, ...props }: React.ComponentProps<"main">) {
       className={cn(
         "bg-background relative flex w-full flex-1 flex-col",
         "md:peer-data-[variant=inset]:m-2 md:peer-data-[variant=inset]:ml-0 md:peer-data-[variant=inset]:rounded-xl md:peer-data-[variant=inset]:shadow-sm md:peer-data-[variant=inset]:peer-data-[state=collapsed]:ml-2",
-        className,
+        className
       )}
       {...props}
     />
@@ -375,7 +375,7 @@ function SidebarContent({ className, ...props }: React.ComponentProps<"div">) {
       data-sidebar="content"
       className={cn(
         "flex min-h-0 flex-1 flex-col gap-2 overflow-auto group-data-[collapsible=icon]:overflow-hidden",
-        className,
+        className
       )}
       {...props}
     />
@@ -407,7 +407,7 @@ function SidebarGroupLabel({
       className={cn(
         "text-sidebar-foreground/70 ring-sidebar-ring flex h-8 shrink-0 items-center rounded-md px-2 text-xs font-medium outline-hidden transition-[margin,opacity] duration-200 ease-linear focus-visible:ring-2 [&>svg]:size-4 [&>svg]:shrink-0",
         "group-data-[collapsible=icon]:-mt-8 group-data-[collapsible=icon]:opacity-0",
-        className,
+        className
       )}
       {...props}
     />
@@ -430,7 +430,7 @@ function SidebarGroupAction({
         // Increases the hit area of the button on mobile.
         "after:absolute after:-inset-2 md:after:hidden",
         "group-data-[collapsible=icon]:hidden",
-        className,
+        className
       )}
       {...props}
     />
@@ -492,7 +492,7 @@ const sidebarMenuButtonVariants = cva(
       variant: "default",
       size: "default",
     },
-  },
+  }
 );
 
 function SidebarMenuButton({
@@ -570,7 +570,7 @@ function SidebarMenuAction({
         "group-data-[collapsible=icon]:hidden",
         showOnHover &&
           "peer-data-[active=true]/menu-button:text-sidebar-accent-foreground group-focus-within/menu-item:opacity-100 group-hover/menu-item:opacity-100 data-[state=open]:opacity-100 md:opacity-0",
-        className,
+        className
       )}
       {...props}
     />
@@ -592,7 +592,7 @@ function SidebarMenuBadge({
         "peer-data-[size=default]/menu-button:top-1.5",
         "peer-data-[size=lg]/menu-button:top-2.5",
         "group-data-[collapsible=icon]:hidden",
-        className,
+        className
       )}
       {...props}
     />
@@ -645,7 +645,7 @@ function SidebarMenuSub({ className, ...props }: React.ComponentProps<"ul">) {
       className={cn(
         "border-sidebar-border mx-3.5 flex min-w-0 translate-x-px flex-col gap-1 border-l px-2.5 py-0.5",
         "group-data-[collapsible=icon]:hidden",
-        className,
+        className
       )}
       {...props}
     />
@@ -691,7 +691,7 @@ function SidebarMenuSubButton({
         size === "sm" && "text-xs",
         size === "md" && "text-sm",
         "group-data-[collapsible=icon]:hidden",
-        className,
+        className
       )}
       {...props}
     />
diff --git a/llama_stack/ui/components/ui/sonner.tsx b/llama_stack/ui/components/ui/sonner.tsx
index 957524edb..f1259836a 100644
--- a/llama_stack/ui/components/ui/sonner.tsx
+++ b/llama_stack/ui/components/ui/sonner.tsx
@@ -1,10 +1,10 @@
-"use client"
+"use client";
 
-import { useTheme } from "next-themes"
-import { Toaster as Sonner, ToasterProps } from "sonner"
+import { useTheme } from "next-themes";
+import { Toaster as Sonner, ToasterProps } from "sonner";
 
 const Toaster = ({ ...props }: ToasterProps) => {
-  const { theme = "system" } = useTheme()
+  const { theme = "system" } = useTheme();
 
   return (
     <Sonner
@@ -19,7 +19,7 @@ const Toaster = ({ ...props }: ToasterProps) => {
       }
       {...props}
     />
-  )
-}
+  );
+};
 
-export { Toaster }
+export { Toaster };
diff --git a/llama_stack/ui/components/ui/table.tsx b/llama_stack/ui/components/ui/table.tsx
index 4b3c98ea4..1980f3ad3 100644
--- a/llama_stack/ui/components/ui/table.tsx
+++ b/llama_stack/ui/components/ui/table.tsx
@@ -45,7 +45,7 @@ function TableFooter({ className, ...props }: React.ComponentProps<"tfoot">) {
       data-slot="table-footer"
       className={cn(
         "bg-muted/50 border-t font-medium [&>tr]:last:border-b-0",
-        className,
+        className
       )}
       {...props}
     />
@@ -58,7 +58,7 @@ function TableRow({ className, ...props }: React.ComponentProps<"tr">) {
       data-slot="table-row"
       className={cn(
         "hover:bg-muted/50 data-[state=selected]:bg-muted border-b transition-colors",
-        className,
+        className
       )}
       {...props}
     />
@@ -71,7 +71,7 @@ function TableHead({ className, ...props }: React.ComponentProps<"th">) {
       data-slot="table-head"
       className={cn(
         "text-foreground h-10 px-2 text-left align-middle font-medium whitespace-nowrap [&:has([role=checkbox])]:pr-0 [&>[role=checkbox]]:translate-y-[2px]",
-        className,
+        className
       )}
       {...props}
     />
@@ -84,7 +84,7 @@ function TableCell({ className, ...props }: React.ComponentProps<"td">) {
       data-slot="table-cell"
       className={cn(
         "p-2 align-middle whitespace-nowrap [&:has([role=checkbox])]:pr-0 [&>[role=checkbox]]:translate-y-[2px]",
-        className,
+        className
       )}
       {...props}
     />
diff --git a/llama_stack/ui/components/ui/tooltip.tsx b/llama_stack/ui/components/ui/tooltip.tsx
index bf4a342a9..95e0faaf3 100644
--- a/llama_stack/ui/components/ui/tooltip.tsx
+++ b/llama_stack/ui/components/ui/tooltip.tsx
@@ -47,7 +47,7 @@ function TooltipContent({
         sideOffset={sideOffset}
         className={cn(
           "bg-primary text-primary-foreground animate-in fade-in-0 zoom-in-95 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 w-fit origin-(--radix-tooltip-content-transform-origin) rounded-md px-3 py-1.5 text-xs text-balance",
-          className,
+          className
         )}
         {...props}
       >
diff --git a/llama_stack/ui/components/vector-stores/vector-store-detail.tsx b/llama_stack/ui/components/vector-stores/vector-store-detail.tsx
index 6e26d2e3d..d3d0fa249 100644
--- a/llama_stack/ui/components/vector-stores/vector-store-detail.tsx
+++ b/llama_stack/ui/components/vector-stores/vector-store-detail.tsx
@@ -85,9 +85,9 @@ export function VectorStoreDetailView({
                 </TableRow>
               </TableHeader>
               <TableBody>
-                {files.map((file) => (
+                {files.map(file => (
                   <TableRow key={file.id}>
-                      <TableCell>
+                    <TableCell>
                       <Button
                         variant="link"
                         className="p-0 h-auto font-mono text-blue-600 hover:text-blue-800 dark:text-blue-400 dark:hover:text-blue-300"
diff --git a/llama_stack/ui/e2e/logs-table-scroll.spec.ts b/llama_stack/ui/e2e/logs-table-scroll.spec.ts
index 081e6d426..345fea599 100644
--- a/llama_stack/ui/e2e/logs-table-scroll.spec.ts
+++ b/llama_stack/ui/e2e/logs-table-scroll.spec.ts
@@ -45,7 +45,7 @@ test.describe("LogsTable Scroll and Progressive Loading", () => {
     const scrollContainer = page.locator("div.overflow-auto").first();
 
     // Scroll to near the bottom
-    await scrollContainer.evaluate((element) => {
+    await scrollContainer.evaluate(element => {
       element.scrollTop = element.scrollHeight - element.clientHeight - 100;
     });
 
diff --git a/llama_stack/ui/eslint.config.mjs b/llama_stack/ui/eslint.config.mjs
index c85fb67c4..354c3bb15 100644
--- a/llama_stack/ui/eslint.config.mjs
+++ b/llama_stack/ui/eslint.config.mjs
@@ -10,7 +10,13 @@ const compat = new FlatCompat({
 });
 
 const eslintConfig = [
-  ...compat.extends("next/core-web-vitals", "next/typescript"),
+  ...compat.extends("next/core-web-vitals", "next/typescript", "prettier"),
+  ...compat.plugins("prettier"),
+  {
+    rules: {
+      "prettier/prettier": "error",
+    },
+  },
 ];
 
 export default eslintConfig;
diff --git a/llama_stack/ui/hooks/use-audio-recording.ts b/llama_stack/ui/hooks/use-audio-recording.ts
index dd58ce6e7..4d08837e9 100644
--- a/llama_stack/ui/hooks/use-audio-recording.ts
+++ b/llama_stack/ui/hooks/use-audio-recording.ts
@@ -1,85 +1,85 @@
-import { useEffect, useRef, useState } from "react"
+import { useEffect, useRef, useState } from "react";
 
-import { recordAudio } from "@/lib/audio-utils"
+import { recordAudio } from "@/lib/audio-utils";
 
 interface UseAudioRecordingOptions {
-  transcribeAudio?: (blob: Blob) => Promise<string>
-  onTranscriptionComplete?: (text: string) => void
+  transcribeAudio?: (blob: Blob) => Promise<string>;
+  onTranscriptionComplete?: (text: string) => void;
 }
 
 export function useAudioRecording({
   transcribeAudio,
   onTranscriptionComplete,
 }: UseAudioRecordingOptions) {
-  const [isListening, setIsListening] = useState(false)
-  const [isSpeechSupported, setIsSpeechSupported] = useState(!!transcribeAudio)
-  const [isRecording, setIsRecording] = useState(false)
-  const [isTranscribing, setIsTranscribing] = useState(false)
-  const [audioStream, setAudioStream] = useState<MediaStream | null>(null)
-  const activeRecordingRef = useRef<any>(null)
+  const [isListening, setIsListening] = useState(false);
+  const [isSpeechSupported, setIsSpeechSupported] = useState(!!transcribeAudio);
+  const [isRecording, setIsRecording] = useState(false);
+  const [isTranscribing, setIsTranscribing] = useState(false);
+  const [audioStream, setAudioStream] = useState<MediaStream | null>(null);
+  const activeRecordingRef = useRef<any>(null);
 
   useEffect(() => {
     const checkSpeechSupport = async () => {
       const hasMediaDevices = !!(
         navigator.mediaDevices && navigator.mediaDevices.getUserMedia
-      )
-      setIsSpeechSupported(hasMediaDevices && !!transcribeAudio)
-    }
+      );
+      setIsSpeechSupported(hasMediaDevices && !!transcribeAudio);
+    };
 
-    checkSpeechSupport()
-  }, [transcribeAudio])
+    checkSpeechSupport();
+  }, [transcribeAudio]);
 
   const stopRecording = async () => {
-    setIsRecording(false)
-    setIsTranscribing(true)
+    setIsRecording(false);
+    setIsTranscribing(true);
     try {
       // First stop the recording to get the final blob
-      recordAudio.stop()
+      recordAudio.stop();
       // Wait for the recording promise to resolve with the final blob
-      const recording = await activeRecordingRef.current
+      const recording = await activeRecordingRef.current;
       if (transcribeAudio) {
-        const text = await transcribeAudio(recording)
-        onTranscriptionComplete?.(text)
+        const text = await transcribeAudio(recording);
+        onTranscriptionComplete?.(text);
       }
     } catch (error) {
-      console.error("Error transcribing audio:", error)
+      console.error("Error transcribing audio:", error);
     } finally {
-      setIsTranscribing(false)
-      setIsListening(false)
+      setIsTranscribing(false);
+      setIsListening(false);
       if (audioStream) {
-        audioStream.getTracks().forEach((track) => track.stop())
-        setAudioStream(null)
+        audioStream.getTracks().forEach(track => track.stop());
+        setAudioStream(null);
       }
-      activeRecordingRef.current = null
+      activeRecordingRef.current = null;
     }
-  }
+  };
 
   const toggleListening = async () => {
     if (!isListening) {
       try {
-        setIsListening(true)
-        setIsRecording(true)
+        setIsListening(true);
+        setIsRecording(true);
         // Get audio stream first
         const stream = await navigator.mediaDevices.getUserMedia({
           audio: true,
-        })
-        setAudioStream(stream)
+        });
+        setAudioStream(stream);
 
         // Start recording with the stream
-        activeRecordingRef.current = recordAudio(stream)
+        activeRecordingRef.current = recordAudio(stream);
       } catch (error) {
-        console.error("Error recording audio:", error)
-        setIsListening(false)
-        setIsRecording(false)
+        console.error("Error recording audio:", error);
+        setIsListening(false);
+        setIsRecording(false);
         if (audioStream) {
-          audioStream.getTracks().forEach((track) => track.stop())
-          setAudioStream(null)
+          audioStream.getTracks().forEach(track => track.stop());
+          setAudioStream(null);
         }
       }
     } else {
-      await stopRecording()
+      await stopRecording();
     }
-  }
+  };
 
   return {
     isListening,
@@ -89,5 +89,5 @@ export function useAudioRecording({
     audioStream,
     toggleListening,
     stopRecording,
-  }
+  };
 }
diff --git a/llama_stack/ui/hooks/use-auto-scroll.ts b/llama_stack/ui/hooks/use-auto-scroll.ts
index 4d22c2cef..170aca688 100644
--- a/llama_stack/ui/hooks/use-auto-scroll.ts
+++ b/llama_stack/ui/hooks/use-auto-scroll.ts
@@ -1,67 +1,67 @@
-import { useEffect, useRef, useState } from "react"
+import { useEffect, useRef, useState } from "react";
 
 // How many pixels from the bottom of the container to enable auto-scroll
-const ACTIVATION_THRESHOLD = 50
+const ACTIVATION_THRESHOLD = 50;
 // Minimum pixels of scroll-up movement required to disable auto-scroll
-const MIN_SCROLL_UP_THRESHOLD = 10
+const MIN_SCROLL_UP_THRESHOLD = 10;
 
 export function useAutoScroll(dependencies: React.DependencyList) {
-  const containerRef = useRef<HTMLDivElement | null>(null)
-  const previousScrollTop = useRef<number | null>(null)
-  const [shouldAutoScroll, setShouldAutoScroll] = useState(true)
+  const containerRef = useRef<HTMLDivElement | null>(null);
+  const previousScrollTop = useRef<number | null>(null);
+  const [shouldAutoScroll, setShouldAutoScroll] = useState(true);
 
   const scrollToBottom = () => {
     if (containerRef.current) {
-      containerRef.current.scrollTop = containerRef.current.scrollHeight
+      containerRef.current.scrollTop = containerRef.current.scrollHeight;
     }
-  }
+  };
 
   const handleScroll = () => {
     if (containerRef.current) {
-      const { scrollTop, scrollHeight, clientHeight } = containerRef.current
+      const { scrollTop, scrollHeight, clientHeight } = containerRef.current;
 
       const distanceFromBottom = Math.abs(
         scrollHeight - scrollTop - clientHeight
-      )
+      );
 
       const isScrollingUp = previousScrollTop.current
         ? scrollTop < previousScrollTop.current
-        : false
+        : false;
 
       const scrollUpDistance = previousScrollTop.current
         ? previousScrollTop.current - scrollTop
-        : 0
+        : 0;
 
       const isDeliberateScrollUp =
-        isScrollingUp && scrollUpDistance > MIN_SCROLL_UP_THRESHOLD
+        isScrollingUp && scrollUpDistance > MIN_SCROLL_UP_THRESHOLD;
 
       if (isDeliberateScrollUp) {
-        setShouldAutoScroll(false)
+        setShouldAutoScroll(false);
       } else {
-        const isScrolledToBottom = distanceFromBottom < ACTIVATION_THRESHOLD
-        setShouldAutoScroll(isScrolledToBottom)
+        const isScrolledToBottom = distanceFromBottom < ACTIVATION_THRESHOLD;
+        setShouldAutoScroll(isScrolledToBottom);
       }
 
-      previousScrollTop.current = scrollTop
+      previousScrollTop.current = scrollTop;
     }
-  }
+  };
 
   const handleTouchStart = () => {
-    setShouldAutoScroll(false)
-  }
+    setShouldAutoScroll(false);
+  };
 
   useEffect(() => {
     if (containerRef.current) {
-      previousScrollTop.current = containerRef.current.scrollTop
+      previousScrollTop.current = containerRef.current.scrollTop;
     }
-  }, [])
+  }, []);
 
   useEffect(() => {
     if (shouldAutoScroll) {
-      scrollToBottom()
+      scrollToBottom();
     }
     // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, dependencies)
+  }, dependencies);
 
   return {
     containerRef,
@@ -69,5 +69,5 @@ export function useAutoScroll(dependencies: React.DependencyList) {
     handleScroll,
     shouldAutoScroll,
     handleTouchStart,
-  }
+  };
 }
diff --git a/llama_stack/ui/hooks/use-autosize-textarea.ts b/llama_stack/ui/hooks/use-autosize-textarea.ts
index a0a36bb02..a38359033 100644
--- a/llama_stack/ui/hooks/use-autosize-textarea.ts
+++ b/llama_stack/ui/hooks/use-autosize-textarea.ts
@@ -1,10 +1,10 @@
-import { useLayoutEffect, useRef } from "react"
+import { useLayoutEffect, useRef } from "react";
 
 interface UseAutosizeTextAreaProps {
-  ref: React.RefObject<HTMLTextAreaElement | null>
-  maxHeight?: number
-  borderWidth?: number
-  dependencies: React.DependencyList
+  ref: React.RefObject<HTMLTextAreaElement | null>;
+  maxHeight?: number;
+  borderWidth?: number;
+  dependencies: React.DependencyList;
 }
 
 export function useAutosizeTextArea({
@@ -13,27 +13,27 @@ export function useAutosizeTextArea({
   borderWidth = 0,
   dependencies,
 }: UseAutosizeTextAreaProps) {
-  const originalHeight = useRef<number | null>(null)
+  const originalHeight = useRef<number | null>(null);
 
   useLayoutEffect(() => {
-    if (!ref.current) return
+    if (!ref.current) return;
 
-    const currentRef = ref.current
-    const borderAdjustment = borderWidth * 2
+    const currentRef = ref.current;
+    const borderAdjustment = borderWidth * 2;
 
     if (originalHeight.current === null) {
-      originalHeight.current = currentRef.scrollHeight - borderAdjustment
+      originalHeight.current = currentRef.scrollHeight - borderAdjustment;
     }
 
-    currentRef.style.removeProperty("height")
-    const scrollHeight = currentRef.scrollHeight
+    currentRef.style.removeProperty("height");
+    const scrollHeight = currentRef.scrollHeight;
 
     // Make sure we don't go over maxHeight
-    const clampedToMax = Math.min(scrollHeight, maxHeight)
+    const clampedToMax = Math.min(scrollHeight, maxHeight);
     // Make sure we don't go less than the original height
-    const clampedToMin = Math.max(clampedToMax, originalHeight.current)
+    const clampedToMin = Math.max(clampedToMax, originalHeight.current);
 
-    currentRef.style.height = `${clampedToMin + borderAdjustment}px`
+    currentRef.style.height = `${clampedToMin + borderAdjustment}px`;
     // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, [maxHeight, ref, ...dependencies])
+  }, [maxHeight, ref, ...dependencies]);
 }
diff --git a/llama_stack/ui/hooks/use-copy-to-clipboard.ts b/llama_stack/ui/hooks/use-copy-to-clipboard.ts
index e2468d811..90043c4a0 100644
--- a/llama_stack/ui/hooks/use-copy-to-clipboard.ts
+++ b/llama_stack/ui/hooks/use-copy-to-clipboard.ts
@@ -1,36 +1,36 @@
-import { useCallback, useRef, useState } from "react"
-import { toast } from "sonner"
+import { useCallback, useRef, useState } from "react";
+import { toast } from "sonner";
 
 type UseCopyToClipboardProps = {
-  text: string
-  copyMessage?: string
-}
+  text: string;
+  copyMessage?: string;
+};
 
 export function useCopyToClipboard({
   text,
   copyMessage = "Copied to clipboard!",
 }: UseCopyToClipboardProps) {
-  const [isCopied, setIsCopied] = useState(false)
-  const timeoutRef = useRef<NodeJS.Timeout | null>(null)
+  const [isCopied, setIsCopied] = useState(false);
+  const timeoutRef = useRef<NodeJS.Timeout | null>(null);
 
   const handleCopy = useCallback(() => {
     navigator.clipboard
       .writeText(text)
       .then(() => {
-        toast.success(copyMessage)
-        setIsCopied(true)
+        toast.success(copyMessage);
+        setIsCopied(true);
         if (timeoutRef.current) {
-          clearTimeout(timeoutRef.current)
-          timeoutRef.current = null
+          clearTimeout(timeoutRef.current);
+          timeoutRef.current = null;
         }
         timeoutRef.current = setTimeout(() => {
-          setIsCopied(false)
-        }, 2000)
+          setIsCopied(false);
+        }, 2000);
       })
       .catch(() => {
-        toast.error("Failed to copy to clipboard.")
-      })
-  }, [text, copyMessage])
+        toast.error("Failed to copy to clipboard.");
+      });
+  }, [text, copyMessage]);
 
-  return { isCopied, handleCopy }
+  return { isCopied, handleCopy };
 }
diff --git a/llama_stack/ui/hooks/use-infinite-scroll.ts b/llama_stack/ui/hooks/use-infinite-scroll.ts
index 08a64a899..889c3f9fb 100644
--- a/llama_stack/ui/hooks/use-infinite-scroll.ts
+++ b/llama_stack/ui/hooks/use-infinite-scroll.ts
@@ -20,7 +20,7 @@ interface UseInfiniteScrollOptions {
  */
 export function useInfiniteScroll(
   onLoadMore: (() => void) | undefined,
-  options: UseInfiniteScrollOptions = {},
+  options: UseInfiniteScrollOptions = {}
 ) {
   const { enabled = true, threshold = 0.1, rootMargin = "100px" } = options;
   const sentinelRef = useRef<HTMLTableRowElement>(null);
@@ -29,7 +29,7 @@ export function useInfiniteScroll(
     if (!onLoadMore || !enabled) return;
 
     const observer = new IntersectionObserver(
-      (entries) => {
+      entries => {
         const [entry] = entries;
         if (entry.isIntersecting) {
           onLoadMore();
@@ -38,7 +38,7 @@ export function useInfiniteScroll(
       {
         threshold,
         rootMargin,
-      },
+      }
     );
 
     const sentinel = sentinelRef.current;
diff --git a/llama_stack/ui/hooks/use-mobile.ts b/llama_stack/ui/hooks/use-mobile.ts
index a93d58393..48fab93c0 100644
--- a/llama_stack/ui/hooks/use-mobile.ts
+++ b/llama_stack/ui/hooks/use-mobile.ts
@@ -4,7 +4,7 @@ const MOBILE_BREAKPOINT = 768;
 
 export function useIsMobile() {
   const [isMobile, setIsMobile] = React.useState<boolean | undefined>(
-    undefined,
+    undefined
   );
 
   React.useEffect(() => {
diff --git a/llama_stack/ui/hooks/use-pagination.ts b/llama_stack/ui/hooks/use-pagination.ts
index 58847ece5..9fa4fa338 100644
--- a/llama_stack/ui/hooks/use-pagination.ts
+++ b/llama_stack/ui/hooks/use-pagination.ts
@@ -38,7 +38,7 @@ interface UsePaginationParams<T> extends UsePaginationOptions {
       limit: number;
       model?: string;
       order?: string;
-    },
+    }
   ) => Promise<PaginationResponse<T>>;
   errorMessagePrefix: string;
   enabled?: boolean;
@@ -81,7 +81,7 @@ export function usePagination<T>({
       const fetchLimit = targetRows || limit;
 
       try {
-        setState((prev) => ({
+        setState(prev => ({
           ...prev,
           status: isInitialLoad ? "loading" : "loading-more",
           error: null,
@@ -94,7 +94,7 @@ export function usePagination<T>({
           ...(order && { order }),
         });
 
-        setState((prev) => ({
+        setState(prev => ({
           ...prev,
           data: isInitialLoad
             ? response.data
@@ -124,14 +124,14 @@ export function usePagination<T>({
             ? new Error(`${errorMessage} ${err.message}`)
             : new Error(errorMessage);
 
-        setState((prev) => ({
+        setState(prev => ({
           ...prev,
           error,
           status: "error",
         }));
       }
     },
-    [limit, model, order, fetchFunction, errorMessagePrefix, client, router],
+    [limit, model, order, fetchFunction, errorMessagePrefix, client, router]
   );
 
   /**
diff --git a/llama_stack/ui/lib/audio-utils.ts b/llama_stack/ui/lib/audio-utils.ts
index b9ad9a3ef..24c4becfd 100644
--- a/llama_stack/ui/lib/audio-utils.ts
+++ b/llama_stack/ui/lib/audio-utils.ts
@@ -1,50 +1,50 @@
 type RecordAudioType = {
-  (stream: MediaStream): Promise<Blob>
-  stop: () => void
-  currentRecorder?: MediaRecorder
-}
+  (stream: MediaStream): Promise<Blob>;
+  stop: () => void;
+  currentRecorder?: MediaRecorder;
+};
 
 export const recordAudio = (function (): RecordAudioType {
   const func = async function recordAudio(stream: MediaStream): Promise<Blob> {
     try {
       const mediaRecorder = new MediaRecorder(stream, {
         mimeType: "audio/webm;codecs=opus",
-      })
-      const audioChunks: Blob[] = []
+      });
+      const audioChunks: Blob[] = [];
 
       return new Promise((resolve, reject) => {
-        mediaRecorder.ondataavailable = (event) => {
+        mediaRecorder.ondataavailable = event => {
           if (event.data.size > 0) {
-            audioChunks.push(event.data)
+            audioChunks.push(event.data);
           }
-        }
+        };
 
         mediaRecorder.onstop = () => {
-          const audioBlob = new Blob(audioChunks, { type: "audio/webm" })
-          resolve(audioBlob)
-        }
+          const audioBlob = new Blob(audioChunks, { type: "audio/webm" });
+          resolve(audioBlob);
+        };
 
         mediaRecorder.onerror = () => {
-          reject(new Error("MediaRecorder error occurred"))
-        }
+          reject(new Error("MediaRecorder error occurred"));
+        };
 
-        mediaRecorder.start(1000)
-        ;(func as RecordAudioType).currentRecorder = mediaRecorder
-      })
+        mediaRecorder.start(1000);
+        (func as RecordAudioType).currentRecorder = mediaRecorder;
+      });
     } catch (error) {
       const errorMessage =
-        error instanceof Error ? error.message : "Unknown error occurred"
-      throw new Error("Failed to start recording: " + errorMessage)
+        error instanceof Error ? error.message : "Unknown error occurred";
+      throw new Error("Failed to start recording: " + errorMessage);
     }
-  }
+  };
 
-  ;(func as RecordAudioType).stop = () => {
-    const recorder = (func as RecordAudioType).currentRecorder
+  (func as RecordAudioType).stop = () => {
+    const recorder = (func as RecordAudioType).currentRecorder;
     if (recorder && recorder.state !== "inactive") {
-      recorder.stop()
+      recorder.stop();
     }
-    delete (func as RecordAudioType).currentRecorder
-  }
+    delete (func as RecordAudioType).currentRecorder;
+  };
 
-  return func as RecordAudioType
-})()
+  return func as RecordAudioType;
+})();
diff --git a/llama_stack/ui/lib/config-validator.ts b/llama_stack/ui/lib/config-validator.ts
index 19f4397b8..0020942f9 100644
--- a/llama_stack/ui/lib/config-validator.ts
+++ b/llama_stack/ui/lib/config-validator.ts
@@ -27,19 +27,19 @@ export function validateServerConfig() {
       !optionalConfigs.GITHUB_CLIENT_SECRET
     ) {
       console.log(
-        "\n📝 GitHub OAuth not configured (authentication features disabled)",
+        "\n📝 GitHub OAuth not configured (authentication features disabled)"
       );
       console.log("   To enable GitHub OAuth:");
       console.log("   1. Go to https://github.com/settings/applications/new");
       console.log(
-        "   2. Set Application name: Llama Stack UI (or your preferred name)",
+        "   2. Set Application name: Llama Stack UI (or your preferred name)"
       );
       console.log("   3. Set Homepage URL: http://localhost:8322");
       console.log(
-        "   4. Set Authorization callback URL: http://localhost:8322/api/auth/callback/github",
+        "   4. Set Authorization callback URL: http://localhost:8322/api/auth/callback/github"
       );
       console.log(
-        "   5. Create the app and copy the Client ID and Client Secret",
+        "   5. Create the app and copy the Client ID and Client Secret"
       );
       console.log("   6. Add them to your .env.local file:");
       console.log("      GITHUB_CLIENT_ID=your_client_id");
diff --git a/llama_stack/ui/lib/contents-api.ts b/llama_stack/ui/lib/contents-api.ts
index b8fcdb1a2..f4920f3db 100644
--- a/llama_stack/ui/lib/contents-api.ts
+++ b/llama_stack/ui/lib/contents-api.ts
@@ -11,7 +11,7 @@ export interface VectorStoreContentItem {
   vector_store_id: string;
   file_id: string;
   content: VectorStoreContent;
-  metadata: Record<string, any>;
+  metadata: Record<string, unknown>;
   embedding?: number[];
 }
 
@@ -32,11 +32,18 @@ export interface VectorStoreListContentsResponse {
 export class ContentsAPI {
   constructor(private client: LlamaStackClient) {}
 
-  async getFileContents(vectorStoreId: string, fileId: string): Promise<VectorStoreContentsResponse> {
+  async getFileContents(
+    vectorStoreId: string,
+    fileId: string
+  ): Promise<VectorStoreContentsResponse> {
     return this.client.vectorStores.files.content(vectorStoreId, fileId);
   }
 
-  async getContent(vectorStoreId: string, fileId: string, contentId: string): Promise<VectorStoreContentItem> {
+  async getContent(
+    vectorStoreId: string,
+    fileId: string,
+    contentId: string
+  ): Promise<VectorStoreContentItem> {
     const contentsResponse = await this.listContents(vectorStoreId, fileId);
     const targetContent = contentsResponse.data.find(c => c.id === contentId);
 
@@ -47,16 +54,11 @@ export class ContentsAPI {
     return targetContent;
   }
 
-  async updateContent(
-    vectorStoreId: string,
-    fileId: string,
-    contentId: string,
-    updates: { content?: string; metadata?: Record<string, any> }
-  ): Promise<VectorStoreContentItem> {
+  async updateContent(): Promise<VectorStoreContentItem> {
     throw new Error("Individual content updates not yet implemented in API");
   }
 
-  async deleteContent(vectorStoreId: string, fileId: string, contentId: string): Promise<VectorStoreContentDeleteResponse> {
+  async deleteContent(): Promise<VectorStoreContentDeleteResponse> {
     throw new Error("Individual content deletion not yet implemented in API");
   }
 
@@ -70,18 +72,27 @@ export class ContentsAPI {
       before?: string;
     }
   ): Promise<VectorStoreListContentsResponse> {
-    const fileContents = await this.client.vectorStores.files.content(vectorStoreId, fileId);
+    const fileContents = await this.client.vectorStores.files.content(
+      vectorStoreId,
+      fileId
+    );
     const contentItems: VectorStoreContentItem[] = [];
 
     fileContents.content.forEach((content, contentIndex) => {
-      const rawContent = content as any;
+      const rawContent = content as Record<string, unknown>;
 
       // Extract actual fields from the API response
       const embedding = rawContent.embedding || undefined;
-      const created_timestamp = rawContent.created_timestamp || rawContent.created_at || Date.now() / 1000;
+      const created_timestamp =
+        rawContent.created_timestamp ||
+        rawContent.created_at ||
+        Date.now() / 1000;
       const chunkMetadata = rawContent.chunk_metadata || {};
-      const contentId = rawContent.chunk_metadata?.chunk_id || rawContent.id || `content_${fileId}_${contentIndex}`;
-      const objectType = rawContent.object || 'vector_store.file.content';
+      const contentId =
+        rawContent.chunk_metadata?.chunk_id ||
+        rawContent.id ||
+        `content_${fileId}_${contentIndex}`;
+      const objectType = rawContent.object || "vector_store.file.content";
       contentItems.push({
         id: contentId,
         object: objectType,
@@ -92,7 +103,7 @@ export class ContentsAPI {
         embedding: embedding,
         metadata: {
           ...chunkMetadata, // chunk_metadata fields from API
-          content_length: content.type === 'text' ? content.text.length : 0,
+          content_length: content.type === "text" ? content.text.length : 0,
         },
       });
     });
@@ -104,7 +115,7 @@ export class ContentsAPI {
     }
 
     return {
-      object: 'list',
+      object: "list",
       data: filteredItems,
       has_more: contentItems.length > (options?.limit || contentItems.length),
     };
diff --git a/llama_stack/ui/lib/format-message-content.test.ts b/llama_stack/ui/lib/format-message-content.test.ts
index cf4055b51..18abbee39 100644
--- a/llama_stack/ui/lib/format-message-content.test.ts
+++ b/llama_stack/ui/lib/format-message-content.test.ts
@@ -18,7 +18,7 @@ describe("extractTextFromContentPart", () => {
   it("should extract text from an array of text content objects", () => {
     const content = [{ type: "text", text: "Which planet do humans live on?" }];
     expect(extractTextFromContentPart(content)).toBe(
-      "Which planet do humans live on?",
+      "Which planet do humans live on?"
     );
   });
 
@@ -37,7 +37,7 @@ describe("extractTextFromContentPart", () => {
       { type: "text", text: "It's an image." },
     ];
     expect(extractTextFromContentPart(content)).toBe(
-      "Look at this: [Image] It's an image.",
+      "Look at this: [Image] It's an image."
     );
   });
 
@@ -53,7 +53,7 @@ describe("extractTextFromContentPart", () => {
   });
 
   it("should handle arrays with plain strings", () => {
-    const content = ["This is", " a test."] as any;
+    const content = ["This is", " a test."] as unknown;
     expect(extractTextFromContentPart(content)).toBe("This is  a test.");
   });
 
@@ -65,7 +65,7 @@ describe("extractTextFromContentPart", () => {
       null,
       undefined,
       { type: "text", noTextProperty: true },
-    ] as any;
+    ] as unknown;
     expect(extractTextFromContentPart(content)).toBe("Valid");
   });
 
@@ -75,15 +75,17 @@ describe("extractTextFromContentPart", () => {
       "Just a string.",
       { type: "image_url", image_url: { url: "http://example.com/image.png" } },
       { type: "text", text: "Last part." },
-    ] as any;
+    ] as unknown;
     expect(extractTextFromContentPart(content)).toBe(
-      "First part. Just a string. [Image] Last part.",
+      "First part. Just a string. [Image] Last part."
     );
   });
 });
 
 describe("extractDisplayableText (composite function)", () => {
-  const mockFormatToolCallToString = (toolCall: any) => {
+  const mockFormatToolCallToString = (toolCall: {
+    function?: { name?: string; arguments?: unknown };
+  }) => {
     if (!toolCall || !toolCall.function || !toolCall.function.name) return "";
     const args = toolCall.function.arguments
       ? JSON.stringify(toolCall.function.arguments)
@@ -125,7 +127,7 @@ describe("extractDisplayableText (composite function)", () => {
       tool_calls: [toolCall],
     };
     expect(extractDisplayableText(messageWithEffectivelyEmptyContent)).toBe(
-      mockFormatToolCallToString(toolCall),
+      mockFormatToolCallToString(toolCall)
     );
 
     const messageWithEmptyContent: ChatMessage = {
@@ -134,7 +136,7 @@ describe("extractDisplayableText (composite function)", () => {
       tool_calls: [toolCall],
     };
     expect(extractDisplayableText(messageWithEmptyContent)).toBe(
-      mockFormatToolCallToString(toolCall),
+      mockFormatToolCallToString(toolCall)
     );
   });
 
@@ -149,7 +151,7 @@ describe("extractDisplayableText (composite function)", () => {
     };
     const expectedToolCallStr = mockFormatToolCallToString(toolCall);
     expect(extractDisplayableText(message)).toBe(
-      `The result is: ${expectedToolCallStr}`,
+      `The result is: ${expectedToolCallStr}`
     );
   });
 
@@ -167,7 +169,7 @@ describe("extractDisplayableText (composite function)", () => {
     };
     const expectedToolCallStr = mockFormatToolCallToString(toolCall);
     expect(extractDisplayableText(message)).toBe(
-      `Okay, checking weather for London. ${expectedToolCallStr}`,
+      `Okay, checking weather for London. ${expectedToolCallStr}`
     );
   });
 
@@ -178,7 +180,7 @@ describe("extractDisplayableText (composite function)", () => {
       tool_calls: [],
     };
     expect(extractDisplayableText(messageEmptyToolCalls)).toBe(
-      "No tools here.",
+      "No tools here."
     );
 
     const messageUndefinedToolCalls: ChatMessage = {
@@ -187,7 +189,7 @@ describe("extractDisplayableText (composite function)", () => {
       tool_calls: undefined,
     };
     expect(extractDisplayableText(messageUndefinedToolCalls)).toBe(
-      "Still no tools.",
+      "Still no tools."
     );
   });
 });
diff --git a/llama_stack/ui/lib/format-message-content.ts b/llama_stack/ui/lib/format-message-content.ts
index 3e7e03a12..ab79775c6 100644
--- a/llama_stack/ui/lib/format-message-content.ts
+++ b/llama_stack/ui/lib/format-message-content.ts
@@ -2,7 +2,7 @@ import { ChatMessage, ChatMessageContentPart } from "@/lib/types";
 import { formatToolCallToString } from "@/lib/format-tool-call";
 
 export function extractTextFromContentPart(
-  content: string | ChatMessageContentPart[] | null | undefined,
+  content: string | ChatMessageContentPart[] | null | undefined
 ): string {
   if (content === null || content === undefined) {
     return "";
@@ -37,7 +37,7 @@ export function extractTextFromContentPart(
 }
 
 export function extractDisplayableText(
-  message: ChatMessage | undefined | null,
+  message: ChatMessage | undefined | null
 ): string {
   if (!message) {
     return "";
diff --git a/llama_stack/ui/lib/format-tool-call.tsx b/llama_stack/ui/lib/format-tool-call.tsx
index f6a286a6e..ec1bdce38 100644
--- a/llama_stack/ui/lib/format-tool-call.tsx
+++ b/llama_stack/ui/lib/format-tool-call.tsx
@@ -5,7 +5,9 @@
  *                 with `name` and `arguments`.
  * @returns A formatted string or an empty string if data is malformed.
  */
-export function formatToolCallToString(toolCall: any): string {
+export function formatToolCallToString(toolCall: {
+  function?: { name?: string; arguments?: unknown };
+}): string {
   if (
     !toolCall ||
     !toolCall.function ||
@@ -24,7 +26,7 @@ export function formatToolCallToString(toolCall: any): string {
   } else {
     try {
       argsString = JSON.stringify(args);
-    } catch (error) {
+    } catch {
       return "";
     }
   }
diff --git a/llama_stack/ui/lib/truncate-text.ts b/llama_stack/ui/lib/truncate-text.ts
index 63e2194f5..59fc1f5ff 100644
--- a/llama_stack/ui/lib/truncate-text.ts
+++ b/llama_stack/ui/lib/truncate-text.ts
@@ -1,6 +1,6 @@
 export function truncateText(
   text: string | null | undefined,
-  maxLength: number = 50,
+  maxLength: number = 50
 ): string {
   if (!text) return "N/A";
   if (text.length <= maxLength) return text;
diff --git a/tests/integration/non_ci/responses/fixtures/fixtures.py b/tests/integration/non_ci/responses/fixtures/fixtures.py
index 2069010ad..62c4ae086 100644
--- a/tests/integration/non_ci/responses/fixtures/fixtures.py
+++ b/tests/integration/non_ci/responses/fixtures/fixtures.py
@@ -5,7 +5,6 @@
 # the root directory of this source tree.
 
 import os
-import re
 from pathlib import Path
 
 import pytest
@@ -48,19 +47,6 @@ def _load_all_verification_configs():
     return {"providers": all_provider_configs}
 
 
-def case_id_generator(case):
-    """Generate a test ID from the case's 'case_id' field, or use a default."""
-    case_id = case.get("case_id")
-    if isinstance(case_id, str | int):
-        return re.sub(r"\\W|^(?=\\d)", "_", str(case_id))
-    return None
-
-
-# Helper to get the base test name from the request object
-def get_base_test_name(request):
-    return request.node.originalname
-
-
 # --- End Helper Functions ---
 
 
diff --git a/tests/integration/non_ci/responses/fixtures/load.py b/tests/integration/non_ci/responses/fixtures/load.py
deleted file mode 100644
index 0184ee146..000000000
--- a/tests/integration/non_ci/responses/fixtures/load.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from pathlib import Path
-
-import yaml
-
-
-def load_test_cases(name: str):
-    fixture_dir = Path(__file__).parent / "test_cases"
-    yaml_path = fixture_dir / f"{name}.yaml"
-    with open(yaml_path) as f:
-        return yaml.safe_load(f)
diff --git a/tests/integration/non_ci/responses/fixtures/test_cases.py b/tests/integration/non_ci/responses/fixtures/test_cases.py
new file mode 100644
index 000000000..bdd1a5d81
--- /dev/null
+++ b/tests/integration/non_ci/responses/fixtures/test_cases.py
@@ -0,0 +1,262 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+
+import pytest
+from pydantic import BaseModel
+
+
+class ResponsesTestCase(BaseModel):
+    # Input can be a simple string or complex message structure
+    input: str | list[dict[str, Any]]
+    expected: str
+    # Tools as flexible dict structure (gets validated at runtime by the API)
+    tools: list[dict[str, Any]] | None = None
+    # Multi-turn conversations with input/output pairs
+    turns: list[tuple[str | list[dict[str, Any]], str]] | None = None
+    # File search specific fields
+    file_content: str | None = None
+    file_path: str | None = None
+    # Streaming flag
+    stream: bool | None = None
+
+
+# Basic response test cases
+basic_test_cases = [
+    pytest.param(
+        ResponsesTestCase(
+            input="Which planet do humans live on?",
+            expected="earth",
+        ),
+        id="earth",
+    ),
+    pytest.param(
+        ResponsesTestCase(
+            input="Which planet has rings around it with a name starting with letter S?",
+            expected="saturn",
+        ),
+        id="saturn",
+    ),
+    pytest.param(
+        ResponsesTestCase(
+            input=[
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "input_text",
+                            "text": "what teams are playing in this image?",
+                        }
+                    ],
+                },
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "input_image",
+                            "image_url": "https://upload.wikimedia.org/wikipedia/commons/3/3b/LeBron_James_Layup_%28Cleveland_vs_Brooklyn_2018%29.jpg",
+                        }
+                    ],
+                },
+            ],
+            expected="brooklyn nets",
+        ),
+        id="image_input",
+    ),
+]
+
+# Multi-turn test cases
+multi_turn_test_cases = [
+    pytest.param(
+        ResponsesTestCase(
+            input="",  # Not used for multi-turn
+            expected="",  # Not used for multi-turn
+            turns=[
+                ("Which planet do humans live on?", "earth"),
+                ("What is the name of the planet from your previous response?", "earth"),
+            ],
+        ),
+        id="earth",
+    ),
+]
+
+# Web search test cases
+web_search_test_cases = [
+    pytest.param(
+        ResponsesTestCase(
+            input="How many experts does the Llama 4 Maverick model have?",
+            tools=[{"type": "web_search", "search_context_size": "low"}],
+            expected="128",
+        ),
+        id="llama_experts",
+    ),
+]
+
+# File search test cases
+file_search_test_cases = [
+    pytest.param(
+        ResponsesTestCase(
+            input="How many experts does the Llama 4 Maverick model have?",
+            tools=[{"type": "file_search"}],
+            expected="128",
+            file_content="Llama 4 Maverick has 128 experts",
+        ),
+        id="llama_experts",
+    ),
+    pytest.param(
+        ResponsesTestCase(
+            input="How many experts does the Llama 4 Maverick model have?",
+            tools=[{"type": "file_search"}],
+            expected="128",
+            file_path="pdfs/llama_stack_and_models.pdf",
+        ),
+        id="llama_experts_pdf",
+    ),
+]
+
+# MCP tool test cases
+mcp_tool_test_cases = [
+    pytest.param(
+        ResponsesTestCase(
+            input="What is the boiling point of myawesomeliquid in Celsius?",
+            tools=[{"type": "mcp", "server_label": "localmcp", "server_url": "<FILLED_BY_TEST_RUNNER>"}],
+            expected="Hello, world!",
+        ),
+        id="boiling_point_tool",
+    ),
+]
+
+# Custom tool test cases
+custom_tool_test_cases = [
+    pytest.param(
+        ResponsesTestCase(
+            input="What's the weather like in San Francisco?",
+            tools=[
+                {
+                    "type": "function",
+                    "name": "get_weather",
+                    "description": "Get current temperature for a given location.",
+                    "parameters": {
+                        "additionalProperties": False,
+                        "properties": {
+                            "location": {
+                                "description": "City and country e.g. Bogotá, Colombia",
+                                "type": "string",
+                            }
+                        },
+                        "required": ["location"],
+                        "type": "object",
+                    },
+                }
+            ],
+            expected="",  # No specific expected output for custom tools
+        ),
+        id="sf_weather",
+    ),
+]
+
+# Image test cases
+image_test_cases = [
+    pytest.param(
+        ResponsesTestCase(
+            input=[
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "input_text",
+                            "text": "Identify the type of animal in this image.",
+                        },
+                        {
+                            "type": "input_image",
+                            "image_url": "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg",
+                        },
+                    ],
+                },
+            ],
+            expected="llama",
+        ),
+        id="llama_image",
+    ),
+]
+
+# Multi-turn image test cases
+multi_turn_image_test_cases = [
+    pytest.param(
+        ResponsesTestCase(
+            input="",  # Not used for multi-turn
+            expected="",  # Not used for multi-turn
+            turns=[
+                (
+                    [
+                        {
+                            "role": "user",
+                            "content": [
+                                {
+                                    "type": "input_text",
+                                    "text": "What type of animal is in this image? Please respond with a single word that starts with the letter 'L'.",
+                                },
+                                {
+                                    "type": "input_image",
+                                    "image_url": "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg",
+                                },
+                            ],
+                        },
+                    ],
+                    "llama",
+                ),
+                (
+                    "What country do you find this animal primarily in? What continent?",
+                    "peru",
+                ),
+            ],
+        ),
+        id="llama_image_understanding",
+    ),
+]
+
+# Multi-turn tool execution test cases
+multi_turn_tool_execution_test_cases = [
+    pytest.param(
+        ResponsesTestCase(
+            input="I need to check if user 'alice' can access the file 'document.txt'. First, get alice's user ID, then check if that user ID can access the file 'document.txt'. Do this as a series of steps, where each step is a separate message. Return only one tool call per step. Summarize the final result with a single 'yes' or 'no' response.",
+            tools=[{"type": "mcp", "server_label": "localmcp", "server_url": "<FILLED_BY_TEST_RUNNER>"}],
+            expected="yes",
+        ),
+        id="user_file_access_check",
+    ),
+    pytest.param(
+        ResponsesTestCase(
+            input="I need to get the results for the 'boiling_point' experiment. First, get the experiment ID for 'boiling_point', then use that ID to get the experiment results. Tell me the boiling point in Celsius.",
+            tools=[{"type": "mcp", "server_label": "localmcp", "server_url": "<FILLED_BY_TEST_RUNNER>"}],
+            expected="100°C",
+        ),
+        id="experiment_results_lookup",
+    ),
+]
+
+# Multi-turn tool execution streaming test cases
+multi_turn_tool_execution_streaming_test_cases = [
+    pytest.param(
+        ResponsesTestCase(
+            input="Help me with this security check: First, get the user ID for 'charlie', then get the permissions for that user ID, and finally check if that user can access 'secret_file.txt'. Stream your progress as you work through each step. Return only one tool call per step. Summarize the final result with a single 'yes' or 'no' response.",
+            tools=[{"type": "mcp", "server_label": "localmcp", "server_url": "<FILLED_BY_TEST_RUNNER>"}],
+            expected="no",
+            stream=True,
+        ),
+        id="user_permissions_workflow",
+    ),
+    pytest.param(
+        ResponsesTestCase(
+            input="I need a complete analysis: First, get the experiment ID for 'chemical_reaction', then get the results for that experiment, and tell me if the yield was above 80%. Return only one tool call per step.  Please stream your analysis process.",
+            tools=[{"type": "mcp", "server_label": "localmcp", "server_url": "<FILLED_BY_TEST_RUNNER>"}],
+            expected="85%",
+            stream=True,
+        ),
+        id="experiment_analysis_streaming",
+    ),
+]
diff --git a/tests/integration/non_ci/responses/fixtures/test_cases/chat_completion.yaml b/tests/integration/non_ci/responses/fixtures/test_cases/chat_completion.yaml
deleted file mode 100644
index 0c9f1fe9e..000000000
--- a/tests/integration/non_ci/responses/fixtures/test_cases/chat_completion.yaml
+++ /dev/null
@@ -1,397 +0,0 @@
-test_chat_basic:
-  test_name: test_chat_basic
-  test_params:
-    case:
-    - case_id: "earth"
-      input:
-        messages:
-        - content: Which planet do humans live on?
-          role: user
-      output: Earth
-    - case_id: "saturn"
-      input:
-        messages:
-        - content: Which planet has rings around it with a name starting with letter
-            S?
-          role: user
-      output: Saturn
-test_chat_input_validation:
-  test_name: test_chat_input_validation
-  test_params:
-    case:
-    - case_id: "messages_missing"
-      input:
-        messages: []
-      output:
-        error:
-          status_code: 400
-    - case_id: "messages_role_invalid"
-      input:
-        messages:
-        - content: Which planet do humans live on?
-          role: fake_role
-      output:
-        error:
-          status_code: 400
-    - case_id: "tool_choice_invalid"
-      input:
-        messages:
-        - content: Which planet do humans live on?
-          role: user
-        tool_choice: invalid
-      output:
-        error:
-          status_code: 400
-    - case_id: "tool_choice_no_tools"
-      input:
-        messages:
-        - content: Which planet do humans live on?
-          role: user
-        tool_choice: required
-      output:
-        error:
-          status_code: 400
-    - case_id: "tools_type_invalid"
-      input:
-        messages:
-        - content: Which planet do humans live on?
-          role: user
-        tools:
-        - type: invalid
-      output:
-        error:
-          status_code: 400
-test_chat_image:
-  test_name: test_chat_image
-  test_params:
-    case:
-    - input:
-        messages:
-        - content:
-          - text: What is in this image?
-            type: text
-          - image_url:
-              url: https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg
-            type: image_url
-          role: user
-      output: llama
-test_chat_structured_output:
-  test_name: test_chat_structured_output
-  test_params:
-    case:
-    - case_id: "calendar"
-      input:
-        messages:
-        - content: Extract the event information.
-          role: system
-        - content: Alice and Bob are going to a science fair on Friday.
-          role: user
-        response_format:
-          json_schema:
-            name: calendar_event
-            schema:
-              properties:
-                date:
-                  title: Date
-                  type: string
-                name:
-                  title: Name
-                  type: string
-                participants:
-                  items:
-                    type: string
-                  title: Participants
-                  type: array
-              required:
-              - name
-              - date
-              - participants
-              title: CalendarEvent
-              type: object
-          type: json_schema
-      output: valid_calendar_event
-    - case_id: "math"
-      input:
-        messages:
-        - content: You are a helpful math tutor. Guide the user through the solution
-            step by step.
-          role: system
-        - content: how can I solve 8x + 7 = -23
-          role: user
-        response_format:
-          json_schema:
-            name: math_reasoning
-            schema:
-              $defs:
-                Step:
-                  properties:
-                    explanation:
-                      title: Explanation
-                      type: string
-                    output:
-                      title: Output
-                      type: string
-                  required:
-                  - explanation
-                  - output
-                  title: Step
-                  type: object
-              properties:
-                final_answer:
-                  title: Final Answer
-                  type: string
-                steps:
-                  items:
-                    $ref: '#/$defs/Step'
-                  title: Steps
-                  type: array
-              required:
-              - steps
-              - final_answer
-              title: MathReasoning
-              type: object
-          type: json_schema
-      output: valid_math_reasoning
-test_tool_calling:
-  test_name: test_tool_calling
-  test_params:
-    case:
-    - input:
-        messages:
-        - content: You are a helpful assistant that can use tools to get information.
-          role: system
-        - content: What's the weather like in San Francisco?
-          role: user
-        tools:
-        - function:
-            description: Get current temperature for a given location.
-            name: get_weather
-            parameters:
-              additionalProperties: false
-              properties:
-                location:
-                  description: "City and country e.g. Bogot\xE1, Colombia"
-                  type: string
-              required:
-              - location
-              type: object
-          type: function
-      output: get_weather_tool_call
-
-test_chat_multi_turn_tool_calling:
-  test_name: test_chat_multi_turn_tool_calling
-  test_params:
-    case:
-    - case_id: "text_then_weather_tool"
-      input:
-        messages:
-        - - role: user
-            content: "What's the name of the Sun in latin?"
-        - - role: user
-            content: "What's the weather like in San Francisco?"
-        tools:
-        - function:
-            description: Get the current weather
-            name: get_weather
-            parameters:
-              type: object
-              properties:
-                location:
-                  description: "The city and state (both required), e.g. San Francisco, CA."
-                  type: string
-              required: ["location"]
-          type: function
-      tool_responses:
-      - response: "{'response': '70 degrees and foggy'}"
-      expected:
-      - num_tool_calls: 0
-        answer: ["sol"]
-      - num_tool_calls: 1
-        tool_name: get_weather
-        tool_arguments:
-          location: "San Francisco, CA"
-      - num_tool_calls: 0
-        answer: ["foggy", "70 degrees"]
-    - case_id: "weather_tool_then_text"
-      input:
-        messages:
-        - - role: user
-            content: "What's the weather like in San Francisco?"
-        tools:
-        - function:
-            description: Get the current weather
-            name: get_weather
-            parameters:
-              type: object
-              properties:
-                location:
-                  description: "The city and state (both required), e.g. San Francisco, CA."
-                  type: string
-              required: ["location"]
-          type: function
-      tool_responses:
-      - response: "{'response': '70 degrees and foggy'}"
-      expected:
-      - num_tool_calls: 1
-        tool_name: get_weather
-        tool_arguments:
-          location: "San Francisco, CA"
-      - num_tool_calls: 0
-        answer: ["foggy", "70 degrees"]
-    - case_id: "add_product_tool"
-      input:
-        messages:
-        - - role: user
-            content: "Please add a new product with name 'Widget', price 19.99, in stock, and tags ['new', 'sale'] and give me the product id."
-        tools:
-        - function:
-            description: Add a new product
-            name: addProduct
-            parameters:
-              type: object
-              properties:
-                name:
-                  description: "Name of the product"
-                  type: string
-                price:
-                  description: "Price of the product"
-                  type: number
-                inStock:
-                  description: "Availability status of the product."
-                  type: boolean
-                tags:
-                  description: "List of product tags"
-                  type: array
-                  items:
-                    type: string
-              required: ["name", "price", "inStock"]
-          type: function
-      tool_responses:
-      - response: "{'response': 'Successfully added product with id: 123'}"
-      expected:
-      - num_tool_calls: 1
-        tool_name: addProduct
-        tool_arguments:
-          name: "Widget"
-          price: 19.99
-          inStock: true
-          tags:
-          - "new"
-          - "sale"
-      - num_tool_calls: 0
-        answer: ["123", "product id: 123"]
-    - case_id: "get_then_create_event_tool"
-      input:
-        messages:
-        - - role: system
-            content: "Todays date is 2025-03-01."
-          - role: user
-            content: "Do i have any meetings on March 3rd at 10 am? Yes or no?"
-        - - role: user
-            content: "Alright then, Create an event named 'Team Building', scheduled for that time same time, in the 'Main Conference Room' and add Alice, Bob, Charlie to it. Give me the created event id."
-        tools:
-        - function:
-            description: Create a new event
-            name: create_event
-            parameters:
-              type: object
-              properties:
-                name:
-                  description: "Name of the event"
-                  type: string
-                date:
-                  description: "Date of the event in ISO format"
-                  type: string
-                time:
-                  description: "Event Time (HH:MM)"
-                  type: string
-                location:
-                  description: "Location of the event"
-                  type: string
-                participants:
-                  description: "List of participant names"
-                  type: array
-                  items:
-                    type: string
-              required: ["name", "date", "time", "location", "participants"]
-          type: function
-        - function:
-            description: Get an event by date and time
-            name: get_event
-            parameters:
-              type: object
-              properties:
-                date:
-                  description: "Date of the event in ISO format"
-                  type: string
-                time:
-                  description: "Event Time (HH:MM)"
-                  type: string
-              required: ["date", "time"]
-          type: function
-      tool_responses:
-      - response: "{'response': 'No events found for 2025-03-03 at 10:00'}"
-      - response: "{'response': 'Successfully created new event with id: e_123'}"
-      expected:
-      - num_tool_calls: 1
-        tool_name: get_event
-        tool_arguments:
-          date: "2025-03-03"
-          time: "10:00"
-      - num_tool_calls: 0
-        answer: ["no", "no events found", "no meetings"]
-      - num_tool_calls: 1
-        tool_name: create_event
-        tool_arguments:
-          name: "Team Building"
-          date: "2025-03-03"
-          time: "10:00"
-          location: "Main Conference Room"
-          participants:
-          - "Alice"
-          - "Bob"
-          - "Charlie"
-      - num_tool_calls: 0
-        answer: ["e_123", "event id: e_123"]
-    - case_id: "compare_monthly_expense_tool"
-      input:
-        messages:
-        - - role: system
-            content: "Todays date is 2025-03-01."
-          - role: user
-            content: "what was my monthly expense in Jan of this year?"
-        - - role: user
-            content: "Was it less than Feb of last year? Only answer with yes or no."
-        tools:
-        - function:
-            description: Get monthly expense summary
-            name: getMonthlyExpenseSummary
-            parameters:
-              type: object
-              properties:
-                month:
-                  description: "Month of the year (1-12)"
-                  type: integer
-                year:
-                  description: "Year"
-                  type: integer
-              required: ["month", "year"]
-          type: function
-      tool_responses:
-      - response: "{'response': 'Total expenses for January 2025: $1000'}"
-      - response: "{'response': 'Total expenses for February 2024: $2000'}"
-      expected:
-      - num_tool_calls: 1
-        tool_name: getMonthlyExpenseSummary
-        tool_arguments:
-          month: 1
-          year: 2025
-      - num_tool_calls: 0
-        answer: ["1000", "$1,000", "1,000"]
-      - num_tool_calls: 1
-        tool_name: getMonthlyExpenseSummary
-        tool_arguments:
-          month: 2
-          year: 2024
-      - num_tool_calls: 0
-        answer: ["yes"]
diff --git a/tests/integration/non_ci/responses/fixtures/test_cases/responses.yaml b/tests/integration/non_ci/responses/fixtures/test_cases/responses.yaml
deleted file mode 100644
index 353a64291..000000000
--- a/tests/integration/non_ci/responses/fixtures/test_cases/responses.yaml
+++ /dev/null
@@ -1,166 +0,0 @@
-test_response_basic:
-  test_name: test_response_basic
-  test_params:
-    case:
-    - case_id: "earth"
-      input: "Which planet do humans live on?"
-      output: "earth"
-    - case_id: "saturn"
-      input: "Which planet has rings around it with a name starting with letter S?"
-      output: "saturn"
-    - case_id: "image_input"
-      input:
-      - role: user
-        content:
-        - type: input_text
-          text: "what teams are playing in this image?"
-      - role: user
-        content:
-        - type: input_image
-          image_url: "https://upload.wikimedia.org/wikipedia/commons/3/3b/LeBron_James_Layup_%28Cleveland_vs_Brooklyn_2018%29.jpg"
-      output: "brooklyn nets"
-
-test_response_multi_turn:
-  test_name: test_response_multi_turn
-  test_params:
-    case:
-    - case_id: "earth"
-      turns:
-      - input: "Which planet do humans live on?"
-        output: "earth"
-      - input: "What is the name of the planet from your previous response?"
-        output: "earth"
-
-test_response_web_search:
-  test_name: test_response_web_search
-  test_params:
-    case:
-    - case_id: "llama_experts"
-      input: "How many experts does the Llama 4 Maverick model have?"
-      tools:
-      - type: web_search
-        search_context_size: "low"
-      output: "128"
-
-test_response_file_search:
-  test_name: test_response_file_search
-  test_params:
-    case:
-    - case_id: "llama_experts"
-      input: "How many experts does the Llama 4 Maverick model have?"
-      tools:
-      - type: file_search
-        # vector_store_ids param for file_search tool gets added by the test runner
-      file_content: "Llama 4 Maverick has 128 experts"
-      output: "128"
-    - case_id: "llama_experts_pdf"
-      input: "How many experts does the Llama 4 Maverick model have?"
-      tools:
-      - type: file_search
-        # vector_store_ids param for file_search toolgets added by the test runner
-      file_path: "pdfs/llama_stack_and_models.pdf"
-      output: "128"
-
-test_response_mcp_tool:
-  test_name: test_response_mcp_tool
-  test_params:
-    case:
-    - case_id: "boiling_point_tool"
-      input: "What is the boiling point of myawesomeliquid in Celsius?"
-      tools:
-      - type: mcp
-        server_label: "localmcp"
-        server_url: "<FILLED_BY_TEST_RUNNER>"
-      output: "Hello, world!"
-
-test_response_custom_tool:
-  test_name: test_response_custom_tool
-  test_params:
-    case:
-    - case_id: "sf_weather"
-      input: "What's the weather like in San Francisco?"
-      tools:
-      - type: function
-        name: get_weather
-        description: Get current temperature for a given location.
-        parameters:
-          additionalProperties: false
-          properties:
-            location:
-              description: "City and country e.g. Bogot\xE1, Colombia"
-              type: string
-          required:
-          - location
-          type: object
-
-test_response_image:
-  test_name: test_response_image
-  test_params:
-    case:
-    - case_id: "llama_image"
-      input:
-      - role: user
-        content:
-        - type: input_text
-          text: "Identify the type of animal in this image."
-        - type: input_image
-          image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
-      output: "llama"
-
-# the models are really poor at tool calling after seeing images :/
-test_response_multi_turn_image:
-  test_name: test_response_multi_turn_image
-  test_params:
-    case:
-    - case_id: "llama_image_understanding"
-      turns:
-      - input:
-        - role: user
-          content:
-          - type: input_text
-            text: "What type of animal is in this image? Please respond with a single word that starts with the letter 'L'."
-          - type: input_image
-            image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
-        output: "llama"
-      - input: "What country do you find this animal primarily in? What continent?"
-        output: "peru"
-
-test_response_multi_turn_tool_execution:
-  test_name: test_response_multi_turn_tool_execution
-  test_params:
-    case:
-    - case_id: "user_file_access_check"
-      input: "I need to check if user 'alice' can access the file 'document.txt'. First, get alice's user ID, then check if that user ID can access the file 'document.txt'. Do this as a series of steps, where each step is a separate message. Return only one tool call per step. Summarize the final result with a single 'yes' or 'no' response."
-      tools:
-      - type: mcp
-        server_label: "localmcp"
-        server_url: "<FILLED_BY_TEST_RUNNER>"
-      output: "yes"
-    - case_id: "experiment_results_lookup"
-      input: "I need to get the results for the 'boiling_point' experiment. First, get the experiment ID for 'boiling_point', then use that ID to get the experiment results. Tell me the boiling point in Celsius."
-      tools:
-      - type: mcp
-        server_label: "localmcp"
-        server_url: "<FILLED_BY_TEST_RUNNER>"
-      output: "100°C"
-
-test_response_multi_turn_tool_execution_streaming:
-  test_name: test_response_multi_turn_tool_execution_streaming
-  test_params:
-    case:
-    - case_id: "user_permissions_workflow"
-      input: "Help me with this security check: First, get the user ID for 'charlie', then get the permissions for that user ID, and finally check if that user can access 'secret_file.txt'. Stream your progress as you work through each step. Return only one tool call per step. Summarize the final result with a single 'yes' or 'no' response."
-      tools:
-      - type: mcp
-        server_label: "localmcp"
-        server_url: "<FILLED_BY_TEST_RUNNER>"
-      stream: true
-      output: "no"
-    - case_id: "experiment_analysis_streaming"
-      input: "I need a complete analysis: First, get the experiment ID for 'chemical_reaction', then get the results for that experiment, and tell me if the yield was above 80%. Return only one tool call per step.  Please stream your analysis process."
-      tools:
-      - type: mcp
-        server_label: "localmcp"
-        server_url: "<FILLED_BY_TEST_RUNNER>"
-      stream: true
-      output: "85%"
diff --git a/tests/integration/non_ci/responses/helpers.py b/tests/integration/non_ci/responses/helpers.py
new file mode 100644
index 000000000..7c988402f
--- /dev/null
+++ b/tests/integration/non_ci/responses/helpers.py
@@ -0,0 +1,64 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import time
+
+
+def new_vector_store(openai_client, name):
+    """Create a new vector store, cleaning up any existing one with the same name."""
+    # Ensure we don't reuse an existing vector store
+    vector_stores = openai_client.vector_stores.list()
+    for vector_store in vector_stores:
+        if vector_store.name == name:
+            openai_client.vector_stores.delete(vector_store_id=vector_store.id)
+
+    # Create a new vector store
+    vector_store = openai_client.vector_stores.create(name=name)
+    return vector_store
+
+
+def upload_file(openai_client, name, file_path):
+    """Upload a file, cleaning up any existing file with the same name."""
+    # Ensure we don't reuse an existing file
+    files = openai_client.files.list()
+    for file in files:
+        if file.filename == name:
+            openai_client.files.delete(file_id=file.id)
+
+    # Upload a text file with our document content
+    return openai_client.files.create(file=open(file_path, "rb"), purpose="assistants")
+
+
+def wait_for_file_attachment(compat_client, vector_store_id, file_id):
+    """Wait for a file to be attached to a vector store."""
+    file_attach_response = compat_client.vector_stores.files.retrieve(
+        vector_store_id=vector_store_id,
+        file_id=file_id,
+    )
+
+    while file_attach_response.status == "in_progress":
+        time.sleep(0.1)
+        file_attach_response = compat_client.vector_stores.files.retrieve(
+            vector_store_id=vector_store_id,
+            file_id=file_id,
+        )
+
+    assert file_attach_response.status == "completed", f"Expected file to be attached, got {file_attach_response}"
+    assert not file_attach_response.last_error
+    return file_attach_response
+
+
+def setup_mcp_tools(tools, mcp_server_info):
+    """Replace placeholder MCP server URLs with actual server info."""
+    # Create a deep copy to avoid modifying the original test case
+    import copy
+
+    tools_copy = copy.deepcopy(tools)
+
+    for tool in tools_copy:
+        if tool["type"] == "mcp" and tool["server_url"] == "<FILLED_BY_TEST_RUNNER>":
+            tool["server_url"] = mcp_server_info["server_url"]
+    return tools_copy
diff --git a/tests/integration/non_ci/responses/streaming_assertions.py b/tests/integration/non_ci/responses/streaming_assertions.py
new file mode 100644
index 000000000..4279ffbab
--- /dev/null
+++ b/tests/integration/non_ci/responses/streaming_assertions.py
@@ -0,0 +1,145 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+
+
+class StreamingValidator:
+    """Helper class for validating streaming response events."""
+
+    def __init__(self, chunks: list[Any]):
+        self.chunks = chunks
+        self.event_types = [chunk.type for chunk in chunks]
+
+    def assert_basic_event_sequence(self):
+        """Verify basic created -> completed event sequence."""
+        assert len(self.chunks) >= 2, f"Expected at least 2 chunks (created + completed), got {len(self.chunks)}"
+        assert self.chunks[0].type == "response.created", (
+            f"First chunk should be response.created, got {self.chunks[0].type}"
+        )
+        assert self.chunks[-1].type == "response.completed", (
+            f"Last chunk should be response.completed, got {self.chunks[-1].type}"
+        )
+
+        # Verify event order
+        created_index = self.event_types.index("response.created")
+        completed_index = self.event_types.index("response.completed")
+        assert created_index < completed_index, "response.created should come before response.completed"
+
+    def assert_response_consistency(self):
+        """Verify response ID consistency across events."""
+        response_ids = set()
+        for chunk in self.chunks:
+            if hasattr(chunk, "response_id"):
+                response_ids.add(chunk.response_id)
+            elif hasattr(chunk, "response") and hasattr(chunk.response, "id"):
+                response_ids.add(chunk.response.id)
+
+        assert len(response_ids) == 1, f"All events should reference the same response_id, found: {response_ids}"
+
+    def assert_has_incremental_content(self):
+        """Verify that content is delivered incrementally via delta events."""
+        delta_events = [
+            i for i, event_type in enumerate(self.event_types) if event_type == "response.output_text.delta"
+        ]
+        assert len(delta_events) > 0, "Expected delta events for true incremental streaming, but found none"
+
+        # Verify delta events have content
+        non_empty_deltas = 0
+        delta_content_total = ""
+
+        for delta_idx in delta_events:
+            chunk = self.chunks[delta_idx]
+            if hasattr(chunk, "delta") and chunk.delta:
+                delta_content_total += chunk.delta
+                non_empty_deltas += 1
+
+        assert non_empty_deltas > 0, "Delta events found but none contain content"
+        assert len(delta_content_total) > 0, "Delta events found but total delta content is empty"
+
+        return delta_content_total
+
+    def assert_content_quality(self, expected_content: str):
+        """Verify the final response contains expected content."""
+        final_chunk = self.chunks[-1]
+        if hasattr(final_chunk, "response"):
+            output_text = final_chunk.response.output_text.lower().strip()
+            assert len(output_text) > 0, "Response should have content"
+            assert expected_content.lower() in output_text, f"Expected '{expected_content}' in response"
+
+    def assert_has_tool_calls(self):
+        """Verify tool call streaming events are present."""
+        # Check for tool call events
+        delta_events = [
+            chunk
+            for chunk in self.chunks
+            if chunk.type in ["response.function_call_arguments.delta", "response.mcp_call.arguments.delta"]
+        ]
+        done_events = [
+            chunk
+            for chunk in self.chunks
+            if chunk.type in ["response.function_call_arguments.done", "response.mcp_call.arguments.done"]
+        ]
+
+        assert len(delta_events) > 0, f"Expected tool call delta events, got chunk types: {self.event_types}"
+        assert len(done_events) > 0, f"Expected tool call done events, got chunk types: {self.event_types}"
+
+        # Verify output item events
+        item_added_events = [chunk for chunk in self.chunks if chunk.type == "response.output_item.added"]
+        item_done_events = [chunk for chunk in self.chunks if chunk.type == "response.output_item.done"]
+
+        assert len(item_added_events) > 0, (
+            f"Expected response.output_item.added events, got chunk types: {self.event_types}"
+        )
+        assert len(item_done_events) > 0, (
+            f"Expected response.output_item.done events, got chunk types: {self.event_types}"
+        )
+
+    def assert_has_mcp_events(self):
+        """Verify MCP-specific streaming events are present."""
+        # Tool execution progress events
+        mcp_in_progress_events = [chunk for chunk in self.chunks if chunk.type == "response.mcp_call.in_progress"]
+        mcp_completed_events = [chunk for chunk in self.chunks if chunk.type == "response.mcp_call.completed"]
+
+        assert len(mcp_in_progress_events) > 0, (
+            f"Expected response.mcp_call.in_progress events, got chunk types: {self.event_types}"
+        )
+        assert len(mcp_completed_events) > 0, (
+            f"Expected response.mcp_call.completed events, got chunk types: {self.event_types}"
+        )
+
+        # MCP list tools events
+        mcp_list_tools_in_progress_events = [
+            chunk for chunk in self.chunks if chunk.type == "response.mcp_list_tools.in_progress"
+        ]
+        mcp_list_tools_completed_events = [
+            chunk for chunk in self.chunks if chunk.type == "response.mcp_list_tools.completed"
+        ]
+
+        assert len(mcp_list_tools_in_progress_events) > 0, (
+            f"Expected response.mcp_list_tools.in_progress events, got chunk types: {self.event_types}"
+        )
+        assert len(mcp_list_tools_completed_events) > 0, (
+            f"Expected response.mcp_list_tools.completed events, got chunk types: {self.event_types}"
+        )
+
+    def assert_rich_streaming(self, min_chunks: int = 10):
+        """Verify we have substantial streaming activity."""
+        assert len(self.chunks) > min_chunks, (
+            f"Expected rich streaming with many events, got only {len(self.chunks)} chunks"
+        )
+
+    def validate_event_structure(self):
+        """Validate the structure of various event types."""
+        for chunk in self.chunks:
+            if chunk.type == "response.created":
+                assert chunk.response.status == "in_progress"
+            elif chunk.type == "response.completed":
+                assert chunk.response.status == "completed"
+            elif hasattr(chunk, "item_id"):
+                assert chunk.item_id, "Events with item_id should have non-empty item_id"
+            elif hasattr(chunk, "sequence_number"):
+                assert isinstance(chunk.sequence_number, int), "sequence_number should be an integer"
diff --git a/tests/integration/non_ci/responses/test_basic_responses.py b/tests/integration/non_ci/responses/test_basic_responses.py
new file mode 100644
index 000000000..a8106e593
--- /dev/null
+++ b/tests/integration/non_ci/responses/test_basic_responses.py
@@ -0,0 +1,188 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import time
+
+import pytest
+from fixtures.test_cases import basic_test_cases, image_test_cases, multi_turn_image_test_cases, multi_turn_test_cases
+from streaming_assertions import StreamingValidator
+
+
+@pytest.mark.parametrize("case", basic_test_cases)
+def test_response_non_streaming_basic(compat_client, text_model_id, case):
+    response = compat_client.responses.create(
+        model=text_model_id,
+        input=case.input,
+        stream=False,
+    )
+    output_text = response.output_text.lower().strip()
+    assert len(output_text) > 0
+    assert case.expected.lower() in output_text
+
+    retrieved_response = compat_client.responses.retrieve(response_id=response.id)
+    assert retrieved_response.output_text == response.output_text
+
+    next_response = compat_client.responses.create(
+        model=text_model_id,
+        input="Repeat your previous response in all caps.",
+        previous_response_id=response.id,
+    )
+    next_output_text = next_response.output_text.strip()
+    assert case.expected.upper() in next_output_text
+
+
+@pytest.mark.parametrize("case", basic_test_cases)
+def test_response_streaming_basic(compat_client, text_model_id, case):
+    response = compat_client.responses.create(
+        model=text_model_id,
+        input=case.input,
+        stream=True,
+    )
+
+    # Track events and timing to verify proper streaming
+    events = []
+    event_times = []
+    response_id = ""
+
+    start_time = time.time()
+
+    for chunk in response:
+        current_time = time.time()
+        event_times.append(current_time - start_time)
+        events.append(chunk)
+
+        if chunk.type == "response.created":
+            # Verify response.created is emitted first and immediately
+            assert len(events) == 1, "response.created should be the first event"
+            assert event_times[0] < 0.1, "response.created should be emitted immediately"
+            assert chunk.response.status == "in_progress"
+            response_id = chunk.response.id
+
+        elif chunk.type == "response.completed":
+            # Verify response.completed comes after response.created
+            assert len(events) >= 2, "response.completed should come after response.created"
+            assert chunk.response.status == "completed"
+            assert chunk.response.id == response_id, "Response ID should be consistent"
+
+            # Verify content quality
+            output_text = chunk.response.output_text.lower().strip()
+            assert len(output_text) > 0, "Response should have content"
+            assert case.expected.lower() in output_text, f"Expected '{case.expected}' in response"
+
+    # Use validator for common checks
+    validator = StreamingValidator(events)
+    validator.assert_basic_event_sequence()
+    validator.assert_response_consistency()
+
+    # Verify stored response matches streamed response
+    retrieved_response = compat_client.responses.retrieve(response_id=response_id)
+    final_event = events[-1]
+    assert retrieved_response.output_text == final_event.response.output_text
+
+
+@pytest.mark.parametrize("case", basic_test_cases)
+def test_response_streaming_incremental_content(compat_client, text_model_id, case):
+    """Test that streaming actually delivers content incrementally, not just at the end."""
+    response = compat_client.responses.create(
+        model=text_model_id,
+        input=case.input,
+        stream=True,
+    )
+
+    # Track all events and their content to verify incremental streaming
+    events = []
+    content_snapshots = []
+    event_times = []
+
+    start_time = time.time()
+
+    for chunk in response:
+        current_time = time.time()
+        event_times.append(current_time - start_time)
+        events.append(chunk)
+
+        # Track content at each event based on event type
+        if chunk.type == "response.output_text.delta":
+            # For delta events, track the delta content
+            content_snapshots.append(chunk.delta)
+        elif hasattr(chunk, "response") and hasattr(chunk.response, "output_text"):
+            # For response.created/completed events, track the full output_text
+            content_snapshots.append(chunk.response.output_text)
+        else:
+            content_snapshots.append("")
+
+    validator = StreamingValidator(events)
+    validator.assert_basic_event_sequence()
+
+    # Check if we have incremental content updates
+    event_types = [event.type for event in events]
+    created_index = event_types.index("response.created")
+    completed_index = event_types.index("response.completed")
+
+    # The key test: verify content progression
+    created_content = content_snapshots[created_index]
+    completed_content = content_snapshots[completed_index]
+
+    # Verify that response.created has empty or minimal content
+    assert len(created_content) == 0, f"response.created should have empty content, got: {repr(created_content[:100])}"
+
+    # Verify that response.completed has the full content
+    assert len(completed_content) > 0, "response.completed should have content"
+    assert case.expected.lower() in completed_content.lower(), f"Expected '{case.expected}' in final content"
+
+    # Use validator for incremental content checks
+    delta_content_total = validator.assert_has_incremental_content()
+
+    # Verify that the accumulated delta content matches the final content
+    assert delta_content_total.strip() == completed_content.strip(), (
+        f"Delta content '{delta_content_total}' should match final content '{completed_content}'"
+    )
+
+    # Verify timing: delta events should come between created and completed
+    delta_events = [i for i, event_type in enumerate(event_types) if event_type == "response.output_text.delta"]
+    for delta_idx in delta_events:
+        assert created_index < delta_idx < completed_index, (
+            f"Delta event at index {delta_idx} should be between created ({created_index}) and completed ({completed_index})"
+        )
+
+
+@pytest.mark.parametrize("case", multi_turn_test_cases)
+def test_response_non_streaming_multi_turn(compat_client, text_model_id, case):
+    previous_response_id = None
+    for turn_input, turn_expected in case.turns:
+        response = compat_client.responses.create(
+            model=text_model_id,
+            input=turn_input,
+            previous_response_id=previous_response_id,
+        )
+        previous_response_id = response.id
+        output_text = response.output_text.lower()
+        assert turn_expected.lower() in output_text
+
+
+@pytest.mark.parametrize("case", image_test_cases)
+def test_response_non_streaming_image(compat_client, text_model_id, case):
+    response = compat_client.responses.create(
+        model=text_model_id,
+        input=case.input,
+        stream=False,
+    )
+    output_text = response.output_text.lower()
+    assert case.expected.lower() in output_text
+
+
+@pytest.mark.parametrize("case", multi_turn_image_test_cases)
+def test_response_non_streaming_multi_turn_image(compat_client, text_model_id, case):
+    previous_response_id = None
+    for turn_input, turn_expected in case.turns:
+        response = compat_client.responses.create(
+            model=text_model_id,
+            input=turn_input,
+            previous_response_id=previous_response_id,
+        )
+        previous_response_id = response.id
+        output_text = response.output_text.lower()
+        assert turn_expected.lower() in output_text
diff --git a/tests/integration/non_ci/responses/test_file_search.py b/tests/integration/non_ci/responses/test_file_search.py
new file mode 100644
index 000000000..ba7775a0b
--- /dev/null
+++ b/tests/integration/non_ci/responses/test_file_search.py
@@ -0,0 +1,318 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import json
+import time
+
+import pytest
+
+from llama_stack import LlamaStackAsLibraryClient
+
+from .helpers import new_vector_store, upload_file
+
+
+@pytest.mark.parametrize(
+    "text_format",
+    # Not testing json_object because most providers don't actually support it.
+    [
+        {"type": "text"},
+        {
+            "type": "json_schema",
+            "name": "capitals",
+            "description": "A schema for the capital of each country",
+            "schema": {"type": "object", "properties": {"capital": {"type": "string"}}},
+            "strict": True,
+        },
+    ],
+)
+def test_response_text_format(compat_client, text_model_id, text_format):
+    if isinstance(compat_client, LlamaStackAsLibraryClient):
+        pytest.skip("Responses API text format is not yet supported in library client.")
+
+    stream = False
+    response = compat_client.responses.create(
+        model=text_model_id,
+        input="What is the capital of France?",
+        stream=stream,
+        text={"format": text_format},
+    )
+    # by_alias=True is needed because otherwise Pydantic renames our "schema" field
+    assert response.text.format.model_dump(exclude_none=True, by_alias=True) == text_format
+    assert "paris" in response.output_text.lower()
+    if text_format["type"] == "json_schema":
+        assert "paris" in json.loads(response.output_text)["capital"].lower()
+
+
+@pytest.fixture
+def vector_store_with_filtered_files(compat_client, text_model_id, tmp_path_factory):
+    """Create a vector store with multiple files that have different attributes for filtering tests."""
+    if isinstance(compat_client, LlamaStackAsLibraryClient):
+        pytest.skip("Responses API file search is not yet supported in library client.")
+
+    vector_store = new_vector_store(compat_client, "test_vector_store_with_filters")
+    tmp_path = tmp_path_factory.mktemp("filter_test_files")
+
+    # Create multiple files with different attributes
+    files_data = [
+        {
+            "name": "us_marketing_q1.txt",
+            "content": "US promotional campaigns for Q1 2023. Revenue increased by 15% in the US region.",
+            "attributes": {
+                "region": "us",
+                "category": "marketing",
+                "date": 1672531200,  # Jan 1, 2023
+            },
+        },
+        {
+            "name": "us_engineering_q2.txt",
+            "content": "US technical updates for Q2 2023. New features deployed in the US region.",
+            "attributes": {
+                "region": "us",
+                "category": "engineering",
+                "date": 1680307200,  # Apr 1, 2023
+            },
+        },
+        {
+            "name": "eu_marketing_q1.txt",
+            "content": "European advertising campaign results for Q1 2023. Strong growth in EU markets.",
+            "attributes": {
+                "region": "eu",
+                "category": "marketing",
+                "date": 1672531200,  # Jan 1, 2023
+            },
+        },
+        {
+            "name": "asia_sales_q3.txt",
+            "content": "Asia Pacific revenue figures for Q3 2023. Record breaking quarter in Asia.",
+            "attributes": {
+                "region": "asia",
+                "category": "sales",
+                "date": 1688169600,  # Jul 1, 2023
+            },
+        },
+    ]
+
+    file_ids = []
+    for file_data in files_data:
+        # Create file
+        file_path = tmp_path / file_data["name"]
+        file_path.write_text(file_data["content"])
+
+        # Upload file
+        file_response = upload_file(compat_client, file_data["name"], str(file_path))
+        file_ids.append(file_response.id)
+
+        # Attach file to vector store with attributes
+        file_attach_response = compat_client.vector_stores.files.create(
+            vector_store_id=vector_store.id,
+            file_id=file_response.id,
+            attributes=file_data["attributes"],
+        )
+
+        # Wait for attachment
+        while file_attach_response.status == "in_progress":
+            time.sleep(0.1)
+            file_attach_response = compat_client.vector_stores.files.retrieve(
+                vector_store_id=vector_store.id,
+                file_id=file_response.id,
+            )
+        assert file_attach_response.status == "completed"
+
+    yield vector_store
+
+    # Cleanup: delete vector store and files
+    try:
+        compat_client.vector_stores.delete(vector_store_id=vector_store.id)
+        for file_id in file_ids:
+            try:
+                compat_client.files.delete(file_id=file_id)
+            except Exception:
+                pass  # File might already be deleted
+    except Exception:
+        pass  # Best effort cleanup
+
+
+def test_response_file_search_filter_by_region(compat_client, text_model_id, vector_store_with_filtered_files):
+    """Test file search with region equality filter."""
+    tools = [
+        {
+            "type": "file_search",
+            "vector_store_ids": [vector_store_with_filtered_files.id],
+            "filters": {"type": "eq", "key": "region", "value": "us"},
+        }
+    ]
+
+    response = compat_client.responses.create(
+        model=text_model_id,
+        input="What are the updates from the US region?",
+        tools=tools,
+        stream=False,
+        include=["file_search_call.results"],
+    )
+
+    # Verify file search was called with US filter
+    assert len(response.output) > 1
+    assert response.output[0].type == "file_search_call"
+    assert response.output[0].status == "completed"
+    assert response.output[0].results
+    # Should only return US files (not EU or Asia files)
+    for result in response.output[0].results:
+        assert "us" in result.text.lower() or "US" in result.text
+        # Ensure non-US regions are NOT returned
+        assert "european" not in result.text.lower()
+        assert "asia" not in result.text.lower()
+
+
+def test_response_file_search_filter_by_category(compat_client, text_model_id, vector_store_with_filtered_files):
+    """Test file search with category equality filter."""
+    tools = [
+        {
+            "type": "file_search",
+            "vector_store_ids": [vector_store_with_filtered_files.id],
+            "filters": {"type": "eq", "key": "category", "value": "marketing"},
+        }
+    ]
+
+    response = compat_client.responses.create(
+        model=text_model_id,
+        input="Show me all marketing reports",
+        tools=tools,
+        stream=False,
+        include=["file_search_call.results"],
+    )
+
+    assert response.output[0].type == "file_search_call"
+    assert response.output[0].status == "completed"
+    assert response.output[0].results
+    # Should only return marketing files (not engineering or sales)
+    for result in response.output[0].results:
+        # Marketing files should have promotional/advertising content
+        assert "promotional" in result.text.lower() or "advertising" in result.text.lower()
+        # Ensure non-marketing categories are NOT returned
+        assert "technical" not in result.text.lower()
+        assert "revenue figures" not in result.text.lower()
+
+
+def test_response_file_search_filter_by_date_range(compat_client, text_model_id, vector_store_with_filtered_files):
+    """Test file search with date range filter using compound AND."""
+    tools = [
+        {
+            "type": "file_search",
+            "vector_store_ids": [vector_store_with_filtered_files.id],
+            "filters": {
+                "type": "and",
+                "filters": [
+                    {
+                        "type": "gte",
+                        "key": "date",
+                        "value": 1672531200,  # Jan 1, 2023
+                    },
+                    {
+                        "type": "lt",
+                        "key": "date",
+                        "value": 1680307200,  # Apr 1, 2023
+                    },
+                ],
+            },
+        }
+    ]
+
+    response = compat_client.responses.create(
+        model=text_model_id,
+        input="What happened in Q1 2023?",
+        tools=tools,
+        stream=False,
+        include=["file_search_call.results"],
+    )
+
+    assert response.output[0].type == "file_search_call"
+    assert response.output[0].status == "completed"
+    assert response.output[0].results
+    # Should only return Q1 files (not Q2 or Q3)
+    for result in response.output[0].results:
+        assert "q1" in result.text.lower()
+        # Ensure non-Q1 quarters are NOT returned
+        assert "q2" not in result.text.lower()
+        assert "q3" not in result.text.lower()
+
+
+def test_response_file_search_filter_compound_and(compat_client, text_model_id, vector_store_with_filtered_files):
+    """Test file search with compound AND filter (region AND category)."""
+    tools = [
+        {
+            "type": "file_search",
+            "vector_store_ids": [vector_store_with_filtered_files.id],
+            "filters": {
+                "type": "and",
+                "filters": [
+                    {"type": "eq", "key": "region", "value": "us"},
+                    {"type": "eq", "key": "category", "value": "engineering"},
+                ],
+            },
+        }
+    ]
+
+    response = compat_client.responses.create(
+        model=text_model_id,
+        input="What are the engineering updates from the US?",
+        tools=tools,
+        stream=False,
+        include=["file_search_call.results"],
+    )
+
+    assert response.output[0].type == "file_search_call"
+    assert response.output[0].status == "completed"
+    assert response.output[0].results
+    # Should only return US engineering files
+    assert len(response.output[0].results) >= 1
+    for result in response.output[0].results:
+        assert "us" in result.text.lower() and "technical" in result.text.lower()
+        # Ensure it's not from other regions or categories
+        assert "european" not in result.text.lower() and "asia" not in result.text.lower()
+        assert "promotional" not in result.text.lower() and "revenue" not in result.text.lower()
+
+
+def test_response_file_search_filter_compound_or(compat_client, text_model_id, vector_store_with_filtered_files):
+    """Test file search with compound OR filter (marketing OR sales)."""
+    tools = [
+        {
+            "type": "file_search",
+            "vector_store_ids": [vector_store_with_filtered_files.id],
+            "filters": {
+                "type": "or",
+                "filters": [
+                    {"type": "eq", "key": "category", "value": "marketing"},
+                    {"type": "eq", "key": "category", "value": "sales"},
+                ],
+            },
+        }
+    ]
+
+    response = compat_client.responses.create(
+        model=text_model_id,
+        input="Show me marketing and sales documents",
+        tools=tools,
+        stream=False,
+        include=["file_search_call.results"],
+    )
+
+    assert response.output[0].type == "file_search_call"
+    assert response.output[0].status == "completed"
+    assert response.output[0].results
+    # Should return marketing and sales files, but NOT engineering
+    categories_found = set()
+    for result in response.output[0].results:
+        text_lower = result.text.lower()
+        if "promotional" in text_lower or "advertising" in text_lower:
+            categories_found.add("marketing")
+        if "revenue figures" in text_lower:
+            categories_found.add("sales")
+        # Ensure engineering files are NOT returned
+        assert "technical" not in text_lower, f"Engineering file should not be returned, but got: {result.text}"
+
+    # Verify we got at least one of the expected categories
+    assert len(categories_found) > 0, "Should have found at least one marketing or sales file"
+    assert categories_found.issubset({"marketing", "sales"}), f"Found unexpected categories: {categories_found}"
diff --git a/tests/integration/non_ci/responses/test_responses.py b/tests/integration/non_ci/responses/test_responses.py
deleted file mode 100644
index 04266eec8..000000000
--- a/tests/integration/non_ci/responses/test_responses.py
+++ /dev/null
@@ -1,1109 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import json
-import os
-import time
-
-import httpx
-import openai
-import pytest
-
-from llama_stack import LlamaStackAsLibraryClient
-from llama_stack.core.datatypes import AuthenticationRequiredError
-from tests.common.mcp import dependency_tools, make_mcp_server
-
-from .fixtures.fixtures import case_id_generator
-from .fixtures.load import load_test_cases
-
-responses_test_cases = load_test_cases("responses")
-
-
-def _new_vector_store(openai_client, name):
-    # Ensure we don't reuse an existing vector store
-    vector_stores = openai_client.vector_stores.list()
-    for vector_store in vector_stores:
-        if vector_store.name == name:
-            openai_client.vector_stores.delete(vector_store_id=vector_store.id)
-
-    # Create a new vector store
-    vector_store = openai_client.vector_stores.create(
-        name=name,
-    )
-    return vector_store
-
-
-def _upload_file(openai_client, name, file_path):
-    # Ensure we don't reuse an existing file
-    files = openai_client.files.list()
-    for file in files:
-        if file.filename == name:
-            openai_client.files.delete(file_id=file.id)
-
-    # Upload a text file with our document content
-    return openai_client.files.create(file=open(file_path, "rb"), purpose="assistants")
-
-
-@pytest.mark.parametrize(
-    "case",
-    responses_test_cases["test_response_basic"]["test_params"]["case"],
-    ids=case_id_generator,
-)
-def test_response_non_streaming_basic(request, compat_client, text_model_id, case):
-    response = compat_client.responses.create(
-        model=text_model_id,
-        input=case["input"],
-        stream=False,
-    )
-    output_text = response.output_text.lower().strip()
-    assert len(output_text) > 0
-    assert case["output"].lower() in output_text
-
-    retrieved_response = compat_client.responses.retrieve(response_id=response.id)
-    assert retrieved_response.output_text == response.output_text
-
-    next_response = compat_client.responses.create(
-        model=text_model_id,
-        input="Repeat your previous response in all caps.",
-        previous_response_id=response.id,
-    )
-    next_output_text = next_response.output_text.strip()
-    assert case["output"].upper() in next_output_text
-
-
-@pytest.mark.parametrize(
-    "case",
-    responses_test_cases["test_response_basic"]["test_params"]["case"],
-    ids=case_id_generator,
-)
-def test_response_streaming_basic(request, compat_client, text_model_id, case):
-    import time
-
-    response = compat_client.responses.create(
-        model=text_model_id,
-        input=case["input"],
-        stream=True,
-    )
-
-    # Track events and timing to verify proper streaming
-    events = []
-    event_times = []
-    response_id = ""
-
-    start_time = time.time()
-
-    for chunk in response:
-        current_time = time.time()
-        event_times.append(current_time - start_time)
-        events.append(chunk)
-
-        if chunk.type == "response.created":
-            # Verify response.created is emitted first and immediately
-            assert len(events) == 1, "response.created should be the first event"
-            assert event_times[0] < 0.1, "response.created should be emitted immediately"
-            assert chunk.response.status == "in_progress"
-            response_id = chunk.response.id
-
-        elif chunk.type == "response.completed":
-            # Verify response.completed comes after response.created
-            assert len(events) >= 2, "response.completed should come after response.created"
-            assert chunk.response.status == "completed"
-            assert chunk.response.id == response_id, "Response ID should be consistent"
-
-            # Verify content quality
-            output_text = chunk.response.output_text.lower().strip()
-            assert len(output_text) > 0, "Response should have content"
-            assert case["output"].lower() in output_text, f"Expected '{case['output']}' in response"
-
-    # Verify we got both required events
-    event_types = [event.type for event in events]
-    assert "response.created" in event_types, "Missing response.created event"
-    assert "response.completed" in event_types, "Missing response.completed event"
-
-    # Verify event order
-    created_index = event_types.index("response.created")
-    completed_index = event_types.index("response.completed")
-    assert created_index < completed_index, "response.created should come before response.completed"
-
-    # Verify stored response matches streamed response
-    retrieved_response = compat_client.responses.retrieve(response_id=response_id)
-    final_event = events[-1]
-    assert retrieved_response.output_text == final_event.response.output_text
-
-
-@pytest.mark.parametrize(
-    "case",
-    responses_test_cases["test_response_basic"]["test_params"]["case"],
-    ids=case_id_generator,
-)
-def test_response_streaming_incremental_content(request, compat_client, text_model_id, case):
-    """Test that streaming actually delivers content incrementally, not just at the end."""
-    import time
-
-    response = compat_client.responses.create(
-        model=text_model_id,
-        input=case["input"],
-        stream=True,
-    )
-
-    # Track all events and their content to verify incremental streaming
-    events = []
-    content_snapshots = []
-    event_times = []
-
-    start_time = time.time()
-
-    for chunk in response:
-        current_time = time.time()
-        event_times.append(current_time - start_time)
-        events.append(chunk)
-
-        # Track content at each event based on event type
-        if chunk.type == "response.output_text.delta":
-            # For delta events, track the delta content
-            content_snapshots.append(chunk.delta)
-        elif hasattr(chunk, "response") and hasattr(chunk.response, "output_text"):
-            # For response.created/completed events, track the full output_text
-            content_snapshots.append(chunk.response.output_text)
-        else:
-            content_snapshots.append("")
-
-    # Verify we have the expected events
-    event_types = [event.type for event in events]
-    assert "response.created" in event_types, "Missing response.created event"
-    assert "response.completed" in event_types, "Missing response.completed event"
-
-    # Check if we have incremental content updates
-    created_index = event_types.index("response.created")
-    completed_index = event_types.index("response.completed")
-
-    # The key test: verify content progression
-    created_content = content_snapshots[created_index]
-    completed_content = content_snapshots[completed_index]
-
-    # Verify that response.created has empty or minimal content
-    assert len(created_content) == 0, f"response.created should have empty content, got: {repr(created_content[:100])}"
-
-    # Verify that response.completed has the full content
-    assert len(completed_content) > 0, "response.completed should have content"
-    assert case["output"].lower() in completed_content.lower(), f"Expected '{case['output']}' in final content"
-
-    # Check for true incremental streaming by looking for delta events
-    delta_events = [i for i, event_type in enumerate(event_types) if event_type == "response.output_text.delta"]
-
-    # Assert that we have delta events (true incremental streaming)
-    assert len(delta_events) > 0, "Expected delta events for true incremental streaming, but found none"
-
-    # Verify delta events have content and accumulate to final content
-    delta_content_total = ""
-    non_empty_deltas = 0
-
-    for delta_idx in delta_events:
-        delta_content = content_snapshots[delta_idx]
-        if delta_content:
-            delta_content_total += delta_content
-            non_empty_deltas += 1
-
-    # Assert that we have meaningful delta content
-    assert non_empty_deltas > 0, "Delta events found but none contain content"
-    assert len(delta_content_total) > 0, "Delta events found but total delta content is empty"
-
-    # Verify that the accumulated delta content matches the final content
-    assert delta_content_total.strip() == completed_content.strip(), (
-        f"Delta content '{delta_content_total}' should match final content '{completed_content}'"
-    )
-
-    # Verify timing: delta events should come between created and completed
-    for delta_idx in delta_events:
-        assert created_index < delta_idx < completed_index, (
-            f"Delta event at index {delta_idx} should be between created ({created_index}) and completed ({completed_index})"
-        )
-
-
-@pytest.mark.parametrize(
-    "case",
-    responses_test_cases["test_response_multi_turn"]["test_params"]["case"],
-    ids=case_id_generator,
-)
-def test_response_non_streaming_multi_turn(request, compat_client, text_model_id, case):
-    previous_response_id = None
-    for turn in case["turns"]:
-        response = compat_client.responses.create(
-            model=text_model_id,
-            input=turn["input"],
-            previous_response_id=previous_response_id,
-            tools=turn["tools"] if "tools" in turn else None,
-        )
-        previous_response_id = response.id
-        output_text = response.output_text.lower()
-        assert turn["output"].lower() in output_text
-
-
-@pytest.mark.parametrize(
-    "case",
-    responses_test_cases["test_response_web_search"]["test_params"]["case"],
-    ids=case_id_generator,
-)
-def test_response_non_streaming_web_search(request, compat_client, text_model_id, case):
-    response = compat_client.responses.create(
-        model=text_model_id,
-        input=case["input"],
-        tools=case["tools"],
-        stream=False,
-    )
-    assert len(response.output) > 1
-    assert response.output[0].type == "web_search_call"
-    assert response.output[0].status == "completed"
-    assert response.output[1].type == "message"
-    assert response.output[1].status == "completed"
-    assert response.output[1].role == "assistant"
-    assert len(response.output[1].content) > 0
-    assert case["output"].lower() in response.output_text.lower().strip()
-
-
-@pytest.mark.parametrize(
-    "case",
-    responses_test_cases["test_response_file_search"]["test_params"]["case"],
-    ids=case_id_generator,
-)
-def test_response_non_streaming_file_search(request, compat_client, text_model_id, tmp_path, case):
-    if isinstance(compat_client, LlamaStackAsLibraryClient):
-        pytest.skip("Responses API file search is not yet supported in library client.")
-
-    vector_store = _new_vector_store(compat_client, "test_vector_store")
-
-    if "file_content" in case:
-        file_name = "test_response_non_streaming_file_search.txt"
-        file_path = tmp_path / file_name
-        file_path.write_text(case["file_content"])
-    elif "file_path" in case:
-        file_path = os.path.join(os.path.dirname(__file__), "fixtures", case["file_path"])
-        file_name = os.path.basename(file_path)
-    else:
-        raise ValueError(f"No file content or path provided for case {case['case_id']}")
-
-    file_response = _upload_file(compat_client, file_name, file_path)
-
-    # Attach our file to the vector store
-    file_attach_response = compat_client.vector_stores.files.create(
-        vector_store_id=vector_store.id,
-        file_id=file_response.id,
-    )
-
-    # Wait for the file to be attached
-    while file_attach_response.status == "in_progress":
-        time.sleep(0.1)
-        file_attach_response = compat_client.vector_stores.files.retrieve(
-            vector_store_id=vector_store.id,
-            file_id=file_response.id,
-        )
-    assert file_attach_response.status == "completed", f"Expected file to be attached, got {file_attach_response}"
-    assert not file_attach_response.last_error
-
-    # Update our tools with the right vector store id
-    tools = case["tools"]
-    for tool in tools:
-        if tool["type"] == "file_search":
-            tool["vector_store_ids"] = [vector_store.id]
-
-    # Create the response request, which should query our vector store
-    response = compat_client.responses.create(
-        model=text_model_id,
-        input=case["input"],
-        tools=tools,
-        stream=False,
-        include=["file_search_call.results"],
-    )
-
-    # Verify the file_search_tool was called
-    assert len(response.output) > 1
-    assert response.output[0].type == "file_search_call"
-    assert response.output[0].status == "completed"
-    assert response.output[0].queries  # ensure it's some non-empty list
-    assert response.output[0].results
-    assert case["output"].lower() in response.output[0].results[0].text.lower()
-    assert response.output[0].results[0].score > 0
-
-    # Verify the output_text generated by the response
-    assert case["output"].lower() in response.output_text.lower().strip()
-
-
-def test_response_non_streaming_file_search_empty_vector_store(request, compat_client, text_model_id):
-    if isinstance(compat_client, LlamaStackAsLibraryClient):
-        pytest.skip("Responses API file search is not yet supported in library client.")
-
-    vector_store = _new_vector_store(compat_client, "test_vector_store")
-
-    # Create the response request, which should query our vector store
-    response = compat_client.responses.create(
-        model=text_model_id,
-        input="How many experts does the Llama 4 Maverick model have?",
-        tools=[{"type": "file_search", "vector_store_ids": [vector_store.id]}],
-        stream=False,
-        include=["file_search_call.results"],
-    )
-
-    # Verify the file_search_tool was called
-    assert len(response.output) > 1
-    assert response.output[0].type == "file_search_call"
-    assert response.output[0].status == "completed"
-    assert response.output[0].queries  # ensure it's some non-empty list
-    assert not response.output[0].results  # ensure we don't get any results
-
-    # Verify some output_text was generated by the response
-    assert response.output_text
-
-
-@pytest.mark.parametrize(
-    "case",
-    responses_test_cases["test_response_mcp_tool"]["test_params"]["case"],
-    ids=case_id_generator,
-)
-def test_response_non_streaming_mcp_tool(request, compat_client, text_model_id, case):
-    if not isinstance(compat_client, LlamaStackAsLibraryClient):
-        pytest.skip("in-process MCP server is only supported in library client")
-
-    with make_mcp_server() as mcp_server_info:
-        tools = case["tools"]
-        for tool in tools:
-            if tool["type"] == "mcp":
-                tool["server_url"] = mcp_server_info["server_url"]
-
-        response = compat_client.responses.create(
-            model=text_model_id,
-            input=case["input"],
-            tools=tools,
-            stream=False,
-        )
-
-        assert len(response.output) >= 3
-        list_tools = response.output[0]
-        assert list_tools.type == "mcp_list_tools"
-        assert list_tools.server_label == "localmcp"
-        assert len(list_tools.tools) == 2
-        assert {t.name for t in list_tools.tools} == {
-            "get_boiling_point",
-            "greet_everyone",
-        }
-
-        call = response.output[1]
-        assert call.type == "mcp_call"
-        assert call.name == "get_boiling_point"
-        assert json.loads(call.arguments) == {
-            "liquid_name": "myawesomeliquid",
-            "celsius": True,
-        }
-        assert call.error is None
-        assert "-100" in call.output
-
-        # sometimes the model will call the tool again, so we need to get the last message
-        message = response.output[-1]
-        text_content = message.content[0].text
-        assert "boiling point" in text_content.lower()
-
-    with make_mcp_server(required_auth_token="test-token") as mcp_server_info:
-        tools = case["tools"]
-        for tool in tools:
-            if tool["type"] == "mcp":
-                tool["server_url"] = mcp_server_info["server_url"]
-
-        exc_type = (
-            AuthenticationRequiredError
-            if isinstance(compat_client, LlamaStackAsLibraryClient)
-            else (httpx.HTTPStatusError, openai.AuthenticationError)
-        )
-        with pytest.raises(exc_type):
-            compat_client.responses.create(
-                model=text_model_id,
-                input=case["input"],
-                tools=tools,
-                stream=False,
-            )
-
-        for tool in tools:
-            if tool["type"] == "mcp":
-                tool["server_url"] = mcp_server_info["server_url"]
-                tool["headers"] = {"Authorization": "Bearer test-token"}
-
-        response = compat_client.responses.create(
-            model=text_model_id,
-            input=case["input"],
-            tools=tools,
-            stream=False,
-        )
-        assert len(response.output) >= 3
-
-
-@pytest.mark.parametrize(
-    "case",
-    responses_test_cases["test_response_custom_tool"]["test_params"]["case"],
-    ids=case_id_generator,
-)
-def test_response_non_streaming_custom_tool(request, compat_client, text_model_id, case):
-    response = compat_client.responses.create(
-        model=text_model_id,
-        input=case["input"],
-        tools=case["tools"],
-        stream=False,
-    )
-    assert len(response.output) == 1
-    assert response.output[0].type == "function_call"
-    assert response.output[0].status == "completed"
-    assert response.output[0].name == "get_weather"
-
-
-@pytest.mark.parametrize(
-    "case",
-    responses_test_cases["test_response_image"]["test_params"]["case"],
-    ids=case_id_generator,
-)
-def test_response_non_streaming_image(request, compat_client, text_model_id, case):
-    response = compat_client.responses.create(
-        model=text_model_id,
-        input=case["input"],
-        stream=False,
-    )
-    output_text = response.output_text.lower()
-    assert case["output"].lower() in output_text
-
-
-@pytest.mark.parametrize(
-    "case",
-    responses_test_cases["test_response_multi_turn_image"]["test_params"]["case"],
-    ids=case_id_generator,
-)
-def test_response_non_streaming_multi_turn_image(request, compat_client, text_model_id, case):
-    previous_response_id = None
-    for turn in case["turns"]:
-        response = compat_client.responses.create(
-            model=text_model_id,
-            input=turn["input"],
-            previous_response_id=previous_response_id,
-            tools=turn["tools"] if "tools" in turn else None,
-        )
-        previous_response_id = response.id
-        output_text = response.output_text.lower()
-        assert turn["output"].lower() in output_text
-
-
-@pytest.mark.parametrize(
-    "case",
-    responses_test_cases["test_response_multi_turn_tool_execution"]["test_params"]["case"],
-    ids=case_id_generator,
-)
-def test_response_non_streaming_multi_turn_tool_execution(compat_client, text_model_id, case):
-    """Test multi-turn tool execution where multiple MCP tool calls are performed in sequence."""
-    if not isinstance(compat_client, LlamaStackAsLibraryClient):
-        pytest.skip("in-process MCP server is only supported in library client")
-
-    with make_mcp_server(tools=dependency_tools()) as mcp_server_info:
-        tools = case["tools"]
-        # Replace the placeholder URL with the actual server URL
-        for tool in tools:
-            if tool["type"] == "mcp" and tool["server_url"] == "<FILLED_BY_TEST_RUNNER>":
-                tool["server_url"] = mcp_server_info["server_url"]
-
-        response = compat_client.responses.create(
-            input=case["input"],
-            model=text_model_id,
-            tools=tools,
-        )
-
-        # Verify we have MCP tool calls in the output
-        mcp_list_tools = [output for output in response.output if output.type == "mcp_list_tools"]
-
-        mcp_calls = [output for output in response.output if output.type == "mcp_call"]
-        message_outputs = [output for output in response.output if output.type == "message"]
-
-        # Should have exactly 1 MCP list tools message (at the beginning)
-        assert len(mcp_list_tools) == 1, f"Expected exactly 1 mcp_list_tools, got {len(mcp_list_tools)}"
-        assert mcp_list_tools[0].server_label == "localmcp"
-        assert len(mcp_list_tools[0].tools) == 5  # Updated for dependency tools
-        expected_tool_names = {
-            "get_user_id",
-            "get_user_permissions",
-            "check_file_access",
-            "get_experiment_id",
-            "get_experiment_results",
-        }
-        assert {t.name for t in mcp_list_tools[0].tools} == expected_tool_names
-
-        assert len(mcp_calls) >= 1, f"Expected at least 1 mcp_call, got {len(mcp_calls)}"
-        for mcp_call in mcp_calls:
-            assert mcp_call.error is None, f"MCP call should not have errors, got: {mcp_call.error}"
-
-        assert len(message_outputs) >= 1, f"Expected at least 1 message output, got {len(message_outputs)}"
-
-        final_message = message_outputs[-1]
-        assert final_message.role == "assistant", f"Final message should be from assistant, got {final_message.role}"
-        assert final_message.status == "completed", f"Final message should be completed, got {final_message.status}"
-        assert len(final_message.content) > 0, "Final message should have content"
-
-        expected_output = case["output"]
-        assert expected_output.lower() in response.output_text.lower(), (
-            f"Expected '{expected_output}' to appear in response: {response.output_text}"
-        )
-
-
-@pytest.mark.parametrize(
-    "case",
-    responses_test_cases["test_response_multi_turn_tool_execution_streaming"]["test_params"]["case"],
-    ids=case_id_generator,
-)
-def test_response_streaming_multi_turn_tool_execution(compat_client, text_model_id, case):
-    """Test streaming multi-turn tool execution where multiple MCP tool calls are performed in sequence."""
-    if not isinstance(compat_client, LlamaStackAsLibraryClient):
-        pytest.skip("in-process MCP server is only supported in library client")
-
-    with make_mcp_server(tools=dependency_tools()) as mcp_server_info:
-        tools = case["tools"]
-        # Replace the placeholder URL with the actual server URL
-        for tool in tools:
-            if tool["type"] == "mcp" and tool["server_url"] == "<FILLED_BY_TEST_RUNNER>":
-                tool["server_url"] = mcp_server_info["server_url"]
-
-        stream = compat_client.responses.create(
-            input=case["input"],
-            model=text_model_id,
-            tools=tools,
-            stream=True,
-        )
-
-        chunks = []
-        for chunk in stream:
-            chunks.append(chunk)
-
-        # Should have at least response.created and response.completed
-        assert len(chunks) >= 2, f"Expected at least 2 chunks (created + completed), got {len(chunks)}"
-
-        # First chunk should be response.created
-        assert chunks[0].type == "response.created", f"First chunk should be response.created, got {chunks[0].type}"
-
-        # Last chunk should be response.completed
-        assert chunks[-1].type == "response.completed", (
-            f"Last chunk should be response.completed, got {chunks[-1].type}"
-        )
-
-        # Verify tool call streaming events are present
-        chunk_types = [chunk.type for chunk in chunks]
-
-        # Should have function call or MCP arguments delta/done events for tool calls
-        delta_events = [
-            chunk
-            for chunk in chunks
-            if chunk.type in ["response.function_call_arguments.delta", "response.mcp_call.arguments.delta"]
-        ]
-        done_events = [
-            chunk
-            for chunk in chunks
-            if chunk.type in ["response.function_call_arguments.done", "response.mcp_call.arguments.done"]
-        ]
-
-        # Should have output item events for tool calls
-        item_added_events = [chunk for chunk in chunks if chunk.type == "response.output_item.added"]
-        item_done_events = [chunk for chunk in chunks if chunk.type == "response.output_item.done"]
-
-        # Should have tool execution progress events
-        mcp_in_progress_events = [chunk for chunk in chunks if chunk.type == "response.mcp_call.in_progress"]
-        mcp_completed_events = [chunk for chunk in chunks if chunk.type == "response.mcp_call.completed"]
-
-        # Verify we have substantial streaming activity (not just batch events)
-        assert len(chunks) > 10, f"Expected rich streaming with many events, got only {len(chunks)} chunks"
-
-        # Since this test involves MCP tool calls, we should see streaming events
-        assert len(delta_events) > 0, (
-            f"Expected function_call_arguments.delta or mcp_call.arguments.delta events, got chunk types: {chunk_types}"
-        )
-        assert len(done_events) > 0, (
-            f"Expected function_call_arguments.done or mcp_call.arguments.done events, got chunk types: {chunk_types}"
-        )
-
-        # Should have output item events for function calls
-        assert len(item_added_events) > 0, f"Expected response.output_item.added events, got chunk types: {chunk_types}"
-        assert len(item_done_events) > 0, f"Expected response.output_item.done events, got chunk types: {chunk_types}"
-
-        # Should have tool execution progress events
-        assert len(mcp_in_progress_events) > 0, (
-            f"Expected response.mcp_call.in_progress events, got chunk types: {chunk_types}"
-        )
-        assert len(mcp_completed_events) > 0, (
-            f"Expected response.mcp_call.completed events, got chunk types: {chunk_types}"
-        )
-        # MCP failed events are optional (only if errors occur)
-
-        # Verify progress events have proper structure
-        for progress_event in mcp_in_progress_events:
-            assert hasattr(progress_event, "item_id"), "Progress event should have 'item_id' field"
-            assert hasattr(progress_event, "output_index"), "Progress event should have 'output_index' field"
-            assert hasattr(progress_event, "sequence_number"), "Progress event should have 'sequence_number' field"
-
-        for completed_event in mcp_completed_events:
-            assert hasattr(completed_event, "sequence_number"), "Completed event should have 'sequence_number' field"
-
-        # Verify delta events have proper structure
-        for delta_event in delta_events:
-            assert hasattr(delta_event, "delta"), "Delta event should have 'delta' field"
-            assert hasattr(delta_event, "item_id"), "Delta event should have 'item_id' field"
-            assert hasattr(delta_event, "sequence_number"), "Delta event should have 'sequence_number' field"
-            assert delta_event.delta, "Delta should not be empty"
-
-        # Verify done events have proper structure
-        for done_event in done_events:
-            assert hasattr(done_event, "arguments"), "Done event should have 'arguments' field"
-            assert hasattr(done_event, "item_id"), "Done event should have 'item_id' field"
-            assert done_event.arguments, "Final arguments should not be empty"
-
-        # Verify output item added events have proper structure
-        for added_event in item_added_events:
-            assert hasattr(added_event, "item"), "Added event should have 'item' field"
-            assert hasattr(added_event, "output_index"), "Added event should have 'output_index' field"
-            assert hasattr(added_event, "sequence_number"), "Added event should have 'sequence_number' field"
-            assert hasattr(added_event, "response_id"), "Added event should have 'response_id' field"
-            assert added_event.item.type in ["function_call", "mcp_call"], "Added item should be a tool call"
-            assert added_event.item.status == "in_progress", "Added item should be in progress"
-            assert added_event.response_id, "Response ID should not be empty"
-            assert isinstance(added_event.output_index, int), "Output index should be integer"
-            assert added_event.output_index >= 0, "Output index should be non-negative"
-
-        # Verify output item done events have proper structure
-        for done_event in item_done_events:
-            assert hasattr(done_event, "item"), "Done event should have 'item' field"
-            assert hasattr(done_event, "output_index"), "Done event should have 'output_index' field"
-            assert hasattr(done_event, "sequence_number"), "Done event should have 'sequence_number' field"
-            assert hasattr(done_event, "response_id"), "Done event should have 'response_id' field"
-            assert done_event.item.type in ["function_call", "mcp_call"], "Done item should be a tool call"
-            # Note: MCP calls don't have a status field, only function calls do
-            if done_event.item.type == "function_call":
-                assert done_event.item.status == "completed", "Function call should be completed"
-            assert done_event.response_id, "Response ID should not be empty"
-            assert isinstance(done_event.output_index, int), "Output index should be integer"
-            assert done_event.output_index >= 0, "Output index should be non-negative"
-
-        # Group function call and MCP argument events by item_id (these should have proper tracking)
-        argument_events_by_item_id = {}
-        for chunk in chunks:
-            if hasattr(chunk, "item_id") and chunk.type in [
-                "response.function_call_arguments.delta",
-                "response.function_call_arguments.done",
-                "response.mcp_call.arguments.delta",
-                "response.mcp_call.arguments.done",
-            ]:
-                item_id = chunk.item_id
-                if item_id not in argument_events_by_item_id:
-                    argument_events_by_item_id[item_id] = []
-                argument_events_by_item_id[item_id].append(chunk)
-
-        for item_id, related_events in argument_events_by_item_id.items():
-            # Should have at least one delta and one done event for a complete tool call
-            delta_events = [
-                e
-                for e in related_events
-                if e.type in ["response.function_call_arguments.delta", "response.mcp_call.arguments.delta"]
-            ]
-            done_events = [
-                e
-                for e in related_events
-                if e.type in ["response.function_call_arguments.done", "response.mcp_call.arguments.done"]
-            ]
-
-            assert len(delta_events) > 0, f"Item {item_id} should have at least one delta event"
-            assert len(done_events) == 1, f"Item {item_id} should have exactly one done event"
-
-            # Verify all events have the same item_id
-            for event in related_events:
-                assert event.item_id == item_id, f"Event should have consistent item_id {item_id}, got {event.item_id}"
-
-        # Verify content part events if they exist (for text streaming)
-        content_part_added_events = [chunk for chunk in chunks if chunk.type == "response.content_part.added"]
-        content_part_done_events = [chunk for chunk in chunks if chunk.type == "response.content_part.done"]
-
-        # Content part events should be paired (if any exist)
-        if len(content_part_added_events) > 0:
-            assert len(content_part_done_events) > 0, (
-                "Should have content_part.done events if content_part.added events exist"
-            )
-
-            # Verify content part event structure
-            for added_event in content_part_added_events:
-                assert hasattr(added_event, "response_id"), "Content part added event should have response_id"
-                assert hasattr(added_event, "item_id"), "Content part added event should have item_id"
-                assert hasattr(added_event, "part"), "Content part added event should have part"
-
-                # TODO: enable this after the client types are updated
-                # assert added_event.part.type == "output_text", "Content part should be an output_text"
-
-            for done_event in content_part_done_events:
-                assert hasattr(done_event, "response_id"), "Content part done event should have response_id"
-                assert hasattr(done_event, "item_id"), "Content part done event should have item_id"
-                assert hasattr(done_event, "part"), "Content part done event should have part"
-
-                # TODO: enable this after the client types are updated
-                # assert len(done_event.part.text) > 0, "Content part should have text when done"
-
-        # Basic pairing check: each output_item.added should be followed by some activity
-        # (but we can't enforce strict 1:1 pairing due to the complexity of multi-turn scenarios)
-        assert len(item_added_events) > 0, "Should have at least one output_item.added event"
-
-        # Verify response_id consistency across all events
-        response_ids = set()
-        for chunk in chunks:
-            if hasattr(chunk, "response_id"):
-                response_ids.add(chunk.response_id)
-            elif hasattr(chunk, "response") and hasattr(chunk.response, "id"):
-                response_ids.add(chunk.response.id)
-
-        assert len(response_ids) == 1, f"All events should reference the same response_id, found: {response_ids}"
-
-        # Get the final response from the last chunk
-        final_chunk = chunks[-1]
-        if hasattr(final_chunk, "response"):
-            final_response = final_chunk.response
-
-            # Verify multi-turn MCP tool execution results
-            mcp_list_tools = [output for output in final_response.output if output.type == "mcp_list_tools"]
-            mcp_calls = [output for output in final_response.output if output.type == "mcp_call"]
-            message_outputs = [output for output in final_response.output if output.type == "message"]
-
-            # Should have exactly 1 MCP list tools message (at the beginning)
-            assert len(mcp_list_tools) == 1, f"Expected exactly 1 mcp_list_tools, got {len(mcp_list_tools)}"
-            assert mcp_list_tools[0].server_label == "localmcp"
-            assert len(mcp_list_tools[0].tools) == 5  # Updated for dependency tools
-            expected_tool_names = {
-                "get_user_id",
-                "get_user_permissions",
-                "check_file_access",
-                "get_experiment_id",
-                "get_experiment_results",
-            }
-            assert {t.name for t in mcp_list_tools[0].tools} == expected_tool_names
-
-            # Should have at least 1 MCP call (the model should call at least one tool)
-            assert len(mcp_calls) >= 1, f"Expected at least 1 mcp_call, got {len(mcp_calls)}"
-
-            # All MCP calls should be completed (verifies our tool execution works)
-            for mcp_call in mcp_calls:
-                assert mcp_call.error is None, f"MCP call should not have errors, got: {mcp_call.error}"
-
-            # Should have at least one final message response
-            assert len(message_outputs) >= 1, f"Expected at least 1 message output, got {len(message_outputs)}"
-
-            # Final message should be from assistant and completed
-            final_message = message_outputs[-1]
-            assert final_message.role == "assistant", (
-                f"Final message should be from assistant, got {final_message.role}"
-            )
-            assert final_message.status == "completed", f"Final message should be completed, got {final_message.status}"
-            assert len(final_message.content) > 0, "Final message should have content"
-
-            # Check that the expected output appears in the response
-            expected_output = case["output"]
-            assert expected_output.lower() in final_response.output_text.lower(), (
-                f"Expected '{expected_output}' to appear in response: {final_response.output_text}"
-            )
-
-
-@pytest.mark.parametrize(
-    "text_format",
-    # Not testing json_object because most providers don't actually support it.
-    [
-        {"type": "text"},
-        {
-            "type": "json_schema",
-            "name": "capitals",
-            "description": "A schema for the capital of each country",
-            "schema": {"type": "object", "properties": {"capital": {"type": "string"}}},
-            "strict": True,
-        },
-    ],
-)
-def test_response_text_format(compat_client, text_model_id, text_format):
-    if isinstance(compat_client, LlamaStackAsLibraryClient):
-        pytest.skip("Responses API text format is not yet supported in library client.")
-
-    stream = False
-    response = compat_client.responses.create(
-        model=text_model_id,
-        input="What is the capital of France?",
-        stream=stream,
-        text={"format": text_format},
-    )
-    # by_alias=True is needed because otherwise Pydantic renames our "schema" field
-    assert response.text.format.model_dump(exclude_none=True, by_alias=True) == text_format
-    assert "paris" in response.output_text.lower()
-    if text_format["type"] == "json_schema":
-        assert "paris" in json.loads(response.output_text)["capital"].lower()
-
-
-@pytest.fixture
-def vector_store_with_filtered_files(compat_client, text_model_id, tmp_path_factory):
-    """Create a vector store with multiple files that have different attributes for filtering tests."""
-    if isinstance(compat_client, LlamaStackAsLibraryClient):
-        pytest.skip("Responses API file search is not yet supported in library client.")
-
-    vector_store = _new_vector_store(compat_client, "test_vector_store_with_filters")
-    tmp_path = tmp_path_factory.mktemp("filter_test_files")
-
-    # Create multiple files with different attributes
-    files_data = [
-        {
-            "name": "us_marketing_q1.txt",
-            "content": "US promotional campaigns for Q1 2023. Revenue increased by 15% in the US region.",
-            "attributes": {
-                "region": "us",
-                "category": "marketing",
-                "date": 1672531200,  # Jan 1, 2023
-            },
-        },
-        {
-            "name": "us_engineering_q2.txt",
-            "content": "US technical updates for Q2 2023. New features deployed in the US region.",
-            "attributes": {
-                "region": "us",
-                "category": "engineering",
-                "date": 1680307200,  # Apr 1, 2023
-            },
-        },
-        {
-            "name": "eu_marketing_q1.txt",
-            "content": "European advertising campaign results for Q1 2023. Strong growth in EU markets.",
-            "attributes": {
-                "region": "eu",
-                "category": "marketing",
-                "date": 1672531200,  # Jan 1, 2023
-            },
-        },
-        {
-            "name": "asia_sales_q3.txt",
-            "content": "Asia Pacific revenue figures for Q3 2023. Record breaking quarter in Asia.",
-            "attributes": {
-                "region": "asia",
-                "category": "sales",
-                "date": 1688169600,  # Jul 1, 2023
-            },
-        },
-    ]
-
-    file_ids = []
-    for file_data in files_data:
-        # Create file
-        file_path = tmp_path / file_data["name"]
-        file_path.write_text(file_data["content"])
-
-        # Upload file
-        file_response = _upload_file(compat_client, file_data["name"], str(file_path))
-        file_ids.append(file_response.id)
-
-        # Attach file to vector store with attributes
-        file_attach_response = compat_client.vector_stores.files.create(
-            vector_store_id=vector_store.id,
-            file_id=file_response.id,
-            attributes=file_data["attributes"],
-        )
-
-        # Wait for attachment
-        while file_attach_response.status == "in_progress":
-            time.sleep(0.1)
-            file_attach_response = compat_client.vector_stores.files.retrieve(
-                vector_store_id=vector_store.id,
-                file_id=file_response.id,
-            )
-        assert file_attach_response.status == "completed"
-
-    yield vector_store
-
-    # Cleanup: delete vector store and files
-    try:
-        compat_client.vector_stores.delete(vector_store_id=vector_store.id)
-        for file_id in file_ids:
-            try:
-                compat_client.files.delete(file_id=file_id)
-            except Exception:
-                pass  # File might already be deleted
-    except Exception:
-        pass  # Best effort cleanup
-
-
-def test_response_file_search_filter_by_region(compat_client, text_model_id, vector_store_with_filtered_files):
-    """Test file search with region equality filter."""
-    tools = [
-        {
-            "type": "file_search",
-            "vector_store_ids": [vector_store_with_filtered_files.id],
-            "filters": {"type": "eq", "key": "region", "value": "us"},
-        }
-    ]
-
-    response = compat_client.responses.create(
-        model=text_model_id,
-        input="What are the updates from the US region?",
-        tools=tools,
-        stream=False,
-        include=["file_search_call.results"],
-    )
-
-    # Verify file search was called with US filter
-    assert len(response.output) > 1
-    assert response.output[0].type == "file_search_call"
-    assert response.output[0].status == "completed"
-    assert response.output[0].results
-    # Should only return US files (not EU or Asia files)
-    for result in response.output[0].results:
-        assert "us" in result.text.lower() or "US" in result.text
-        # Ensure non-US regions are NOT returned
-        assert "european" not in result.text.lower()
-        assert "asia" not in result.text.lower()
-
-
-def test_response_file_search_filter_by_category(compat_client, text_model_id, vector_store_with_filtered_files):
-    """Test file search with category equality filter."""
-    tools = [
-        {
-            "type": "file_search",
-            "vector_store_ids": [vector_store_with_filtered_files.id],
-            "filters": {"type": "eq", "key": "category", "value": "marketing"},
-        }
-    ]
-
-    response = compat_client.responses.create(
-        model=text_model_id,
-        input="Show me all marketing reports",
-        tools=tools,
-        stream=False,
-        include=["file_search_call.results"],
-    )
-
-    assert response.output[0].type == "file_search_call"
-    assert response.output[0].status == "completed"
-    assert response.output[0].results
-    # Should only return marketing files (not engineering or sales)
-    for result in response.output[0].results:
-        # Marketing files should have promotional/advertising content
-        assert "promotional" in result.text.lower() or "advertising" in result.text.lower()
-        # Ensure non-marketing categories are NOT returned
-        assert "technical" not in result.text.lower()
-        assert "revenue figures" not in result.text.lower()
-
-
-def test_response_file_search_filter_by_date_range(compat_client, text_model_id, vector_store_with_filtered_files):
-    """Test file search with date range filter using compound AND."""
-    tools = [
-        {
-            "type": "file_search",
-            "vector_store_ids": [vector_store_with_filtered_files.id],
-            "filters": {
-                "type": "and",
-                "filters": [
-                    {
-                        "type": "gte",
-                        "key": "date",
-                        "value": 1672531200,  # Jan 1, 2023
-                    },
-                    {
-                        "type": "lt",
-                        "key": "date",
-                        "value": 1680307200,  # Apr 1, 2023
-                    },
-                ],
-            },
-        }
-    ]
-
-    response = compat_client.responses.create(
-        model=text_model_id,
-        input="What happened in Q1 2023?",
-        tools=tools,
-        stream=False,
-        include=["file_search_call.results"],
-    )
-
-    assert response.output[0].type == "file_search_call"
-    assert response.output[0].status == "completed"
-    assert response.output[0].results
-    # Should only return Q1 files (not Q2 or Q3)
-    for result in response.output[0].results:
-        assert "q1" in result.text.lower()
-        # Ensure non-Q1 quarters are NOT returned
-        assert "q2" not in result.text.lower()
-        assert "q3" not in result.text.lower()
-
-
-def test_response_file_search_filter_compound_and(compat_client, text_model_id, vector_store_with_filtered_files):
-    """Test file search with compound AND filter (region AND category)."""
-    tools = [
-        {
-            "type": "file_search",
-            "vector_store_ids": [vector_store_with_filtered_files.id],
-            "filters": {
-                "type": "and",
-                "filters": [
-                    {"type": "eq", "key": "region", "value": "us"},
-                    {"type": "eq", "key": "category", "value": "engineering"},
-                ],
-            },
-        }
-    ]
-
-    response = compat_client.responses.create(
-        model=text_model_id,
-        input="What are the engineering updates from the US?",
-        tools=tools,
-        stream=False,
-        include=["file_search_call.results"],
-    )
-
-    assert response.output[0].type == "file_search_call"
-    assert response.output[0].status == "completed"
-    assert response.output[0].results
-    # Should only return US engineering files
-    assert len(response.output[0].results) >= 1
-    for result in response.output[0].results:
-        assert "us" in result.text.lower() and "technical" in result.text.lower()
-        # Ensure it's not from other regions or categories
-        assert "european" not in result.text.lower() and "asia" not in result.text.lower()
-        assert "promotional" not in result.text.lower() and "revenue" not in result.text.lower()
-
-
-def test_response_file_search_filter_compound_or(compat_client, text_model_id, vector_store_with_filtered_files):
-    """Test file search with compound OR filter (marketing OR sales)."""
-    tools = [
-        {
-            "type": "file_search",
-            "vector_store_ids": [vector_store_with_filtered_files.id],
-            "filters": {
-                "type": "or",
-                "filters": [
-                    {"type": "eq", "key": "category", "value": "marketing"},
-                    {"type": "eq", "key": "category", "value": "sales"},
-                ],
-            },
-        }
-    ]
-
-    response = compat_client.responses.create(
-        model=text_model_id,
-        input="Show me marketing and sales documents",
-        tools=tools,
-        stream=False,
-        include=["file_search_call.results"],
-    )
-
-    assert response.output[0].type == "file_search_call"
-    assert response.output[0].status == "completed"
-    assert response.output[0].results
-    # Should return marketing and sales files, but NOT engineering
-    categories_found = set()
-    for result in response.output[0].results:
-        text_lower = result.text.lower()
-        if "promotional" in text_lower or "advertising" in text_lower:
-            categories_found.add("marketing")
-        if "revenue figures" in text_lower:
-            categories_found.add("sales")
-        # Ensure engineering files are NOT returned
-        assert "technical" not in text_lower, f"Engineering file should not be returned, but got: {result.text}"
-
-    # Verify we got at least one of the expected categories
-    assert len(categories_found) > 0, "Should have found at least one marketing or sales file"
-    assert categories_found.issubset({"marketing", "sales"}), f"Found unexpected categories: {categories_found}"
diff --git a/tests/integration/non_ci/responses/test_tool_responses.py b/tests/integration/non_ci/responses/test_tool_responses.py
new file mode 100644
index 000000000..33d109863
--- /dev/null
+++ b/tests/integration/non_ci/responses/test_tool_responses.py
@@ -0,0 +1,335 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import json
+import os
+
+import httpx
+import openai
+import pytest
+from fixtures.test_cases import (
+    custom_tool_test_cases,
+    file_search_test_cases,
+    mcp_tool_test_cases,
+    multi_turn_tool_execution_streaming_test_cases,
+    multi_turn_tool_execution_test_cases,
+    web_search_test_cases,
+)
+from helpers import new_vector_store, setup_mcp_tools, upload_file, wait_for_file_attachment
+from streaming_assertions import StreamingValidator
+
+from llama_stack import LlamaStackAsLibraryClient
+from llama_stack.core.datatypes import AuthenticationRequiredError
+from tests.common.mcp import dependency_tools, make_mcp_server
+
+
+@pytest.mark.parametrize("case", web_search_test_cases)
+def test_response_non_streaming_web_search(compat_client, text_model_id, case):
+    response = compat_client.responses.create(
+        model=text_model_id,
+        input=case.input,
+        tools=case.tools,
+        stream=False,
+    )
+    assert len(response.output) > 1
+    assert response.output[0].type == "web_search_call"
+    assert response.output[0].status == "completed"
+    assert response.output[1].type == "message"
+    assert response.output[1].status == "completed"
+    assert response.output[1].role == "assistant"
+    assert len(response.output[1].content) > 0
+    assert case.expected.lower() in response.output_text.lower().strip()
+
+
+@pytest.mark.parametrize("case", file_search_test_cases)
+def test_response_non_streaming_file_search(compat_client, text_model_id, tmp_path, case):
+    if isinstance(compat_client, LlamaStackAsLibraryClient):
+        pytest.skip("Responses API file search is not yet supported in library client.")
+
+    vector_store = new_vector_store(compat_client, "test_vector_store")
+
+    if case.file_content:
+        file_name = "test_response_non_streaming_file_search.txt"
+        file_path = tmp_path / file_name
+        file_path.write_text(case.file_content)
+    elif case.file_path:
+        file_path = os.path.join(os.path.dirname(__file__), "fixtures", case.file_path)
+        file_name = os.path.basename(file_path)
+    else:
+        raise ValueError("No file content or path provided for case")
+
+    file_response = upload_file(compat_client, file_name, file_path)
+
+    # Attach our file to the vector store
+    compat_client.vector_stores.files.create(
+        vector_store_id=vector_store.id,
+        file_id=file_response.id,
+    )
+
+    # Wait for the file to be attached
+    wait_for_file_attachment(compat_client, vector_store.id, file_response.id)
+
+    # Update our tools with the right vector store id
+    tools = case.tools
+    for tool in tools:
+        if tool["type"] == "file_search":
+            tool["vector_store_ids"] = [vector_store.id]
+
+    # Create the response request, which should query our vector store
+    response = compat_client.responses.create(
+        model=text_model_id,
+        input=case.input,
+        tools=tools,
+        stream=False,
+        include=["file_search_call.results"],
+    )
+
+    # Verify the file_search_tool was called
+    assert len(response.output) > 1
+    assert response.output[0].type == "file_search_call"
+    assert response.output[0].status == "completed"
+    assert response.output[0].queries  # ensure it's some non-empty list
+    assert response.output[0].results
+    assert case.expected.lower() in response.output[0].results[0].text.lower()
+    assert response.output[0].results[0].score > 0
+
+    # Verify the output_text generated by the response
+    assert case.expected.lower() in response.output_text.lower().strip()
+
+
+def test_response_non_streaming_file_search_empty_vector_store(compat_client, text_model_id):
+    if isinstance(compat_client, LlamaStackAsLibraryClient):
+        pytest.skip("Responses API file search is not yet supported in library client.")
+
+    vector_store = new_vector_store(compat_client, "test_vector_store")
+
+    # Create the response request, which should query our vector store
+    response = compat_client.responses.create(
+        model=text_model_id,
+        input="How many experts does the Llama 4 Maverick model have?",
+        tools=[{"type": "file_search", "vector_store_ids": [vector_store.id]}],
+        stream=False,
+        include=["file_search_call.results"],
+    )
+
+    # Verify the file_search_tool was called
+    assert len(response.output) > 1
+    assert response.output[0].type == "file_search_call"
+    assert response.output[0].status == "completed"
+    assert response.output[0].queries  # ensure it's some non-empty list
+    assert not response.output[0].results  # ensure we don't get any results
+
+    # Verify some output_text was generated by the response
+    assert response.output_text
+
+
+@pytest.mark.parametrize("case", mcp_tool_test_cases)
+def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case):
+    if not isinstance(compat_client, LlamaStackAsLibraryClient):
+        pytest.skip("in-process MCP server is only supported in library client")
+
+    with make_mcp_server() as mcp_server_info:
+        tools = setup_mcp_tools(case.tools, mcp_server_info)
+
+        response = compat_client.responses.create(
+            model=text_model_id,
+            input=case.input,
+            tools=tools,
+            stream=False,
+        )
+
+        assert len(response.output) >= 3
+        list_tools = response.output[0]
+        assert list_tools.type == "mcp_list_tools"
+        assert list_tools.server_label == "localmcp"
+        assert len(list_tools.tools) == 2
+        assert {t.name for t in list_tools.tools} == {
+            "get_boiling_point",
+            "greet_everyone",
+        }
+
+        call = response.output[1]
+        assert call.type == "mcp_call"
+        assert call.name == "get_boiling_point"
+        assert json.loads(call.arguments) == {
+            "liquid_name": "myawesomeliquid",
+            "celsius": True,
+        }
+        assert call.error is None
+        assert "-100" in call.output
+
+        # sometimes the model will call the tool again, so we need to get the last message
+        message = response.output[-1]
+        text_content = message.content[0].text
+        assert "boiling point" in text_content.lower()
+
+    with make_mcp_server(required_auth_token="test-token") as mcp_server_info:
+        tools = setup_mcp_tools(case.tools, mcp_server_info)
+
+        exc_type = (
+            AuthenticationRequiredError
+            if isinstance(compat_client, LlamaStackAsLibraryClient)
+            else (httpx.HTTPStatusError, openai.AuthenticationError)
+        )
+        with pytest.raises(exc_type):
+            compat_client.responses.create(
+                model=text_model_id,
+                input=case.input,
+                tools=tools,
+                stream=False,
+            )
+
+        for tool in tools:
+            if tool["type"] == "mcp":
+                tool["headers"] = {"Authorization": "Bearer test-token"}
+
+        response = compat_client.responses.create(
+            model=text_model_id,
+            input=case.input,
+            tools=tools,
+            stream=False,
+        )
+        assert len(response.output) >= 3
+
+
+@pytest.mark.parametrize("case", custom_tool_test_cases)
+def test_response_non_streaming_custom_tool(compat_client, text_model_id, case):
+    response = compat_client.responses.create(
+        model=text_model_id,
+        input=case.input,
+        tools=case.tools,
+        stream=False,
+    )
+    assert len(response.output) == 1
+    assert response.output[0].type == "function_call"
+    assert response.output[0].status == "completed"
+    assert response.output[0].name == "get_weather"
+
+
+@pytest.mark.parametrize("case", multi_turn_tool_execution_test_cases)
+def test_response_non_streaming_multi_turn_tool_execution(compat_client, text_model_id, case):
+    """Test multi-turn tool execution where multiple MCP tool calls are performed in sequence."""
+    if not isinstance(compat_client, LlamaStackAsLibraryClient):
+        pytest.skip("in-process MCP server is only supported in library client")
+
+    with make_mcp_server(tools=dependency_tools()) as mcp_server_info:
+        tools = setup_mcp_tools(case.tools, mcp_server_info)
+
+        response = compat_client.responses.create(
+            input=case.input,
+            model=text_model_id,
+            tools=tools,
+        )
+
+        # Verify we have MCP tool calls in the output
+        mcp_list_tools = [output for output in response.output if output.type == "mcp_list_tools"]
+        mcp_calls = [output for output in response.output if output.type == "mcp_call"]
+        message_outputs = [output for output in response.output if output.type == "message"]
+
+        # Should have exactly 1 MCP list tools message (at the beginning)
+        assert len(mcp_list_tools) == 1, f"Expected exactly 1 mcp_list_tools, got {len(mcp_list_tools)}"
+        assert mcp_list_tools[0].server_label == "localmcp"
+        assert len(mcp_list_tools[0].tools) == 5  # Updated for dependency tools
+        expected_tool_names = {
+            "get_user_id",
+            "get_user_permissions",
+            "check_file_access",
+            "get_experiment_id",
+            "get_experiment_results",
+        }
+        assert {t.name for t in mcp_list_tools[0].tools} == expected_tool_names
+
+        assert len(mcp_calls) >= 1, f"Expected at least 1 mcp_call, got {len(mcp_calls)}"
+        for mcp_call in mcp_calls:
+            assert mcp_call.error is None, f"MCP call should not have errors, got: {mcp_call.error}"
+
+        assert len(message_outputs) >= 1, f"Expected at least 1 message output, got {len(message_outputs)}"
+
+        final_message = message_outputs[-1]
+        assert final_message.role == "assistant", f"Final message should be from assistant, got {final_message.role}"
+        assert final_message.status == "completed", f"Final message should be completed, got {final_message.status}"
+        assert len(final_message.content) > 0, "Final message should have content"
+
+        expected_output = case.expected
+        assert expected_output.lower() in response.output_text.lower(), (
+            f"Expected '{expected_output}' to appear in response: {response.output_text}"
+        )
+
+
+@pytest.mark.parametrize("case", multi_turn_tool_execution_streaming_test_cases)
+def test_response_streaming_multi_turn_tool_execution(compat_client, text_model_id, case):
+    """Test streaming multi-turn tool execution where multiple MCP tool calls are performed in sequence."""
+    if not isinstance(compat_client, LlamaStackAsLibraryClient):
+        pytest.skip("in-process MCP server is only supported in library client")
+
+    with make_mcp_server(tools=dependency_tools()) as mcp_server_info:
+        tools = setup_mcp_tools(case.tools, mcp_server_info)
+
+        stream = compat_client.responses.create(
+            input=case.input,
+            model=text_model_id,
+            tools=tools,
+            stream=True,
+        )
+
+        chunks = []
+        for chunk in stream:
+            chunks.append(chunk)
+
+        # Use validator for common streaming checks
+        validator = StreamingValidator(chunks)
+        validator.assert_basic_event_sequence()
+        validator.assert_response_consistency()
+        validator.assert_has_tool_calls()
+        validator.assert_has_mcp_events()
+        validator.assert_rich_streaming()
+
+        # Get the final response from the last chunk
+        final_chunk = chunks[-1]
+        if hasattr(final_chunk, "response"):
+            final_response = final_chunk.response
+
+            # Verify multi-turn MCP tool execution results
+            mcp_list_tools = [output for output in final_response.output if output.type == "mcp_list_tools"]
+            mcp_calls = [output for output in final_response.output if output.type == "mcp_call"]
+            message_outputs = [output for output in final_response.output if output.type == "message"]
+
+            # Should have exactly 1 MCP list tools message (at the beginning)
+            assert len(mcp_list_tools) == 1, f"Expected exactly 1 mcp_list_tools, got {len(mcp_list_tools)}"
+            assert mcp_list_tools[0].server_label == "localmcp"
+            assert len(mcp_list_tools[0].tools) == 5  # Updated for dependency tools
+            expected_tool_names = {
+                "get_user_id",
+                "get_user_permissions",
+                "check_file_access",
+                "get_experiment_id",
+                "get_experiment_results",
+            }
+            assert {t.name for t in mcp_list_tools[0].tools} == expected_tool_names
+
+            # Should have at least 1 MCP call (the model should call at least one tool)
+            assert len(mcp_calls) >= 1, f"Expected at least 1 mcp_call, got {len(mcp_calls)}"
+
+            # All MCP calls should be completed (verifies our tool execution works)
+            for mcp_call in mcp_calls:
+                assert mcp_call.error is None, f"MCP call should not have errors, got: {mcp_call.error}"
+
+            # Should have at least one final message response
+            assert len(message_outputs) >= 1, f"Expected at least 1 message output, got {len(message_outputs)}"
+
+            # Final message should be from assistant and completed
+            final_message = message_outputs[-1]
+            assert final_message.role == "assistant", (
+                f"Final message should be from assistant, got {final_message.role}"
+            )
+            assert final_message.status == "completed", f"Final message should be completed, got {final_message.status}"
+            assert len(final_message.content) > 0, "Final message should have content"
+
+            # Check that the expected output appears in the response
+            expected_output = case.expected
+            assert expected_output.lower() in final_response.output_text.lower(), (
+                f"Expected '{expected_output}' to appear in response: {final_response.output_text}"
+            )
diff --git a/tests/integration/recordings/index.sqlite b/tests/integration/recordings/index.sqlite
index 6f73bb3a0..1951ee7d6 100644
Binary files a/tests/integration/recordings/index.sqlite and b/tests/integration/recordings/index.sqlite differ
diff --git a/tests/integration/recordings/responses/4a3a4447b16b.json b/tests/integration/recordings/responses/4a3a4447b16b.json
index dbaec07e9..96b40a792 100644
--- a/tests/integration/recordings/responses/4a3a4447b16b.json
+++ b/tests/integration/recordings/responses/4a3a4447b16b.json
@@ -14,7 +14,7 @@
         "models": [
           {
             "model": "nomic-embed-text:latest",
-            "modified_at": "2025-08-05T14:04:07.946926-07:00",
+            "modified_at": "2025-08-14T20:26:10.795125-07:00",
             "digest": "0a109f422b47e3a30ba2b10eca18548e944e8a23073ee3f3e947efcf3c45e59f",
             "size": 274302450,
             "details": {
diff --git a/tests/integration/recordings/responses/6fe1d4fedf12.json b/tests/integration/recordings/responses/6fe1d4fedf12.json
index 4db74b4e9..733c7bd55 100644
--- a/tests/integration/recordings/responses/6fe1d4fedf12.json
+++ b/tests/integration/recordings/responses/6fe1d4fedf12.json
@@ -1,7 +1,7 @@
 {
   "request": {
     "method": "POST",
-    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "url": "http://localhost:11434/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "llama3.2:3b-instruct-fp16",
@@ -24,7 +24,7 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
@@ -39,7 +39,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090066,
+          "created": 1755228961,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -50,7 +50,7 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
@@ -65,7 +65,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090066,
+          "created": 1755228961,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -76,7 +76,7 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
@@ -91,7 +91,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090066,
+          "created": 1755228961,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -102,7 +102,7 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
@@ -117,7 +117,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090066,
+          "created": 1755228961,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -128,7 +128,7 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
@@ -143,7 +143,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090066,
+          "created": 1755228961,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -154,7 +154,7 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
@@ -169,7 +169,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090066,
+          "created": 1755228961,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -180,7 +180,7 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
@@ -195,7 +195,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090067,
+          "created": 1755228961,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -206,7 +206,7 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
@@ -221,7 +221,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090067,
+          "created": 1755228962,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -232,7 +232,7 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
@@ -247,7 +247,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090067,
+          "created": 1755228962,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -258,7 +258,7 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
@@ -273,7 +273,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090067,
+          "created": 1755228962,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -284,7 +284,7 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
@@ -299,7 +299,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090067,
+          "created": 1755228962,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -310,7 +310,7 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
@@ -325,7 +325,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090067,
+          "created": 1755228962,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -336,7 +336,7 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
@@ -351,7 +351,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090067,
+          "created": 1755228962,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -362,7 +362,7 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
@@ -377,7 +377,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090067,
+          "created": 1755228962,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -388,7 +388,7 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
@@ -403,7 +403,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090067,
+          "created": 1755228962,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -414,7 +414,7 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
@@ -429,7 +429,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090067,
+          "created": 1755228962,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -440,7 +440,7 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
@@ -455,7 +455,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090067,
+          "created": 1755228962,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -466,11 +466,11 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
-                "content": " general",
+                "content": " you",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -481,7 +481,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090067,
+          "created": 1755228962,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -492,7 +492,33 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " with",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228962,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
@@ -507,7 +533,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090067,
+          "created": 1755228962,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -518,11 +544,11 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
-                "content": " about",
+                "content": " on",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -533,7 +559,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090067,
+          "created": 1755228962,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -544,3335 +570,7 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " Tokyo",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090067,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "'s",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090067,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " climate",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090067,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": ".\n\n",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090067,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "Tok",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090067,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "yo",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090067,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " has",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090068,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " a",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090068,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " humid",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090068,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " subt",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090068,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "ropical",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090068,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " climate",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090068,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " with",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090068,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " distinct",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090068,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " seasons",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090068,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": ":\n\n",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090068,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "*",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090068,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " Spring",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090068,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " (",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090068,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "March",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090068,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " to",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090068,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " May",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090068,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "):",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090068,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " Mild",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090068,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " temperatures",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090068,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": ",",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090068,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " ranging",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090069,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " from",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090069,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " ",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090069,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "10",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090069,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "-",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090069,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "20",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090069,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "\u00b0C",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090069,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " (",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090069,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "50",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090069,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "-",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090069,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "68",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090069,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "\u00b0F",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090069,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "),",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090069,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " with",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090069,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " occasional",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090069,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " rain",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090069,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " showers",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090069,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": ".\n",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090069,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "*",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090069,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " Summer",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090070,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " (",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090070,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "June",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090070,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " to",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090070,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " August",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090070,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "):",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090070,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " Hot",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090070,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " and",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090070,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " humid",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090070,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": ",",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090070,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " with",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090070,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " average",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090070,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " high",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090070,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " temperatures",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090070,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " around",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090070,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " ",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090070,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "28",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090070,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "-",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090070,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "30",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090070,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "\u00b0C",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090070,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " (",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090071,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "82",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090071,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "-",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090071,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "86",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090071,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "\u00b0F",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090071,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": ").\n",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090071,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "*",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090071,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " Autumn",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090071,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " (",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090071,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "September",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090071,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " to",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090071,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " November",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090071,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "):",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090071,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " Comfort",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090071,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "able",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090071,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " temperatures",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090071,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": ",",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090071,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " ranging",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090071,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " from",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090071,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " ",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090072,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "10",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090072,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "-",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090072,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "25",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090072,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "\u00b0C",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090072,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " (",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090072,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "50",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090072,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "-",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090072,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "77",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090072,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "\u00b0F",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090072,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "),",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090072,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " with",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090072,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " gentle",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090072,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " rainfall",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090072,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": ".\n",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090072,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "*",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090072,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " Winter",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090072,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " (",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090072,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "December",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090072,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " to",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090072,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " February",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090073,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "):",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090073,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " Cool",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090073,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " temperatures",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090073,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": ",",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090073,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " averaging",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090073,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " around",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090073,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " ",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090073,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "0",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090073,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "-",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090073,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "10",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090073,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "\u00b0C",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090073,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " (",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090073,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "32",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090073,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "-",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090073,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "50",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090073,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "\u00b0F",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090073,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "),",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090073,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " with",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090073,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " occasional",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090074,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " cold",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090074,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " snaps",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090074,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": ".\n\n",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090074,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": "For",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090074,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
@@ -3887,7 +585,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090074,
+          "created": 1755228962,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -3898,11 +596,11 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
-                "content": " most",
+                "content": " typical",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -3913,7 +611,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090074,
+          "created": 1755228962,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -3924,11 +622,11 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
-                "content": " up",
+                "content": " climate",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -3939,7 +637,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090074,
+          "created": 1755228962,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -3950,11 +648,11 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
-                "content": "-to",
+                "content": " of",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -3965,7 +663,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090074,
+          "created": 1755228962,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -3976,11 +674,11 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
-                "content": "-date",
+                "content": " Tokyo",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -3991,7 +689,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090074,
+          "created": 1755228962,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -4002,7 +700,267 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " or",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228962,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " suggest",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228962,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " ways",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228962,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228962,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " you",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228962,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " to",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228963,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " find",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228963,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " out",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228963,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228963,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " current",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228963,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
@@ -4017,7 +975,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090074,
+          "created": 1755228963,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -4028,11 +986,11 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
-                "content": " information",
+                "content": ".\n\n",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -4043,7 +1001,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090074,
+          "created": 1755228963,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -4054,11 +1012,11 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
-                "content": ",",
+                "content": "Tok",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -4069,7 +1027,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090074,
+          "created": 1755228963,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -4080,11 +1038,11 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
-                "content": " I",
+                "content": "yo",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -4095,7 +1053,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090074,
+          "created": 1755228963,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -4106,11 +1064,11 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
-                "content": " recommend",
+                "content": " has",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -4121,7 +1079,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090074,
+          "created": 1755228963,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -4132,33 +1090,7 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " checking",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090074,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
@@ -4173,7 +1105,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090074,
+          "created": 1755228963,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -4184,11 +1116,11 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
-                "content": " reliable",
+                "content": " humid",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -4199,7 +1131,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090074,
+          "created": 1755228963,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -4210,11 +1142,11 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
-                "content": " weather",
+                "content": " subt",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -4225,7 +1157,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090074,
+          "created": 1755228963,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -4236,11 +1168,11 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
-                "content": " website",
+                "content": "ropical",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -4251,7 +1183,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090074,
+          "created": 1755228963,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -4262,11 +1194,11 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
-                "content": " or",
+                "content": " climate",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -4277,7 +1209,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090075,
+          "created": 1755228963,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -4288,33 +1220,7 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
-          "choices": [
-            {
-              "delta": {
-                "content": " app",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754090075,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
@@ -4329,7 +1235,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090075,
+          "created": 1755228963,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -4340,11 +1246,11 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
-                "content": " such",
+                "content": " characterized",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -4355,7 +1261,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090075,
+          "created": 1755228963,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -4366,11 +1272,11 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
-                "content": " as",
+                "content": " by",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -4381,7 +1287,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090075,
+          "created": 1755228963,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -4392,11 +1298,11 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
-                "content": " Acc",
+                "content": " hot",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -4407,7 +1313,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090075,
+          "created": 1755228963,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -4418,11 +1324,11 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
-                "content": "u",
+                "content": " and",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -4433,7 +1339,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090075,
+          "created": 1755228963,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -4444,11 +1350,11 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
-                "content": "Weather",
+                "content": " humid",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -4459,7 +1365,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090075,
+          "created": 1755228963,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -4470,11 +1376,11 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
-                "content": " or",
+                "content": " summers",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -4485,7 +1391,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090075,
+          "created": 1755228963,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -4496,11 +1402,11 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
-                "content": " Weather",
+                "content": ",",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -4511,7 +1417,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090075,
+          "created": 1755228963,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -4522,11 +1428,11 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
-                "content": ".com",
+                "content": " mild",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -4537,7 +1443,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090075,
+          "created": 1755228963,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -4548,7 +1454,215 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " winters",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228964,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228964,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " and",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228964,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " moderate",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228964,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " spring",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228964,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " and",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228964,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " autumn",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228964,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " seasons",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228964,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
@@ -4563,7 +1677,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090075,
+          "created": 1755228964,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
@@ -4574,7 +1688,3569 @@
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
-          "id": "chatcmpl-751",
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " Here",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228964,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "'s",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228964,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " a",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228964,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " general",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228964,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " idea",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228964,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228964,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " what",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228964,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " you",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228964,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " might",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228964,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " expect",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228964,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": ":\n\n",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228964,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "*",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228964,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " Summer",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228964,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " (",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228964,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "June",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228964,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " to",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228964,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " August",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228965,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "):",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228965,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " Hot",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228965,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " and",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228965,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " humid",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228965,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228965,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " with",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228965,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " temperatures",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228965,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " often",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228965,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " reaching",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228965,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " ",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228965,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "30",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228965,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0C",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228965,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " (",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228965,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "86",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228965,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0F",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228965,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": ")",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228965,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " or",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228965,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " higher",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228965,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": ".\n",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228965,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "*",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228965,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " Autumn",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228965,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " (",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228965,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "September",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228965,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " to",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228966,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " November",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228966,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "):",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228966,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " Mild",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228966,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228966,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " with",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228966,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " temperatures",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228966,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " ranging",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228966,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " from",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228966,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " ",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228966,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "10",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228966,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0C",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228966,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " (",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228966,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "50",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228966,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0F",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228966,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": ")",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228966,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " to",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228966,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " ",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228966,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "20",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228966,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0C",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228966,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " (",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228966,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "68",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228966,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0F",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228966,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": ").\n",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228966,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "*",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228967,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " Spring",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228967,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " (",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228967,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "March",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228967,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " to",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228967,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " May",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228967,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": ")",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228967,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " and",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228967,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " Winter",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228967,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " (",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228967,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "December",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228967,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " to",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228967,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " February",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228967,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "):",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228967,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " Cool",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228967,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " and",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228967,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " sometimes",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228967,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " rainy",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228967,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": ".\n\n",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228967,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "If",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228967,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " you",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228967,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " need",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228967,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " up",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228967,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "-to",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228967,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "-date",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228968,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " information",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228968,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " on",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228968,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228968,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " current",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228968,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " weather",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228968,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " in",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228968,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " Tokyo",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228968,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228968,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " I",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228968,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " recommend",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228968,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " checking",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228968,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " a",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228968,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " reliable",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228968,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " online",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228968,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " weather",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228968,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " source",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228968,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " such",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228968,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " as",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228968,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": ":\n\n",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228968,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "-",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228968,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " Acc",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228968,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "u",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228968,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "Weather",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228968,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "\n",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228969,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "-",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228969,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " BBC",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228969,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " Weather",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228969,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "\n",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228969,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "-",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228969,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " The",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228969,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " Weather",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228969,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " Channel",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228969,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "\n\n",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228969,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": "Or",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228969,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " you",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228969,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " can",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228969,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " check",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228969,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " local",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228969,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " news",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228969,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " websites",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228969,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " or",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228969,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " mobile",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228969,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " apps",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228969,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228969,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228969,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " latest",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228969,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": " forecast",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228969,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228970,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-381",
           "choices": [
             {
               "delta": {
@@ -4589,7 +5265,7 @@
               "logprobs": null
             }
           ],
-          "created": 1754090075,
+          "created": 1755228970,
           "model": "llama3.2:3b-instruct-fp16",
           "object": "chat.completion.chunk",
           "service_tier": null,
diff --git a/tests/integration/recordings/responses/d0ac68cbde69.json b/tests/integration/recordings/responses/d0ac68cbde69.json
index e9a939aa0..b37962fb6 100644
--- a/tests/integration/recordings/responses/d0ac68cbde69.json
+++ b/tests/integration/recordings/responses/d0ac68cbde69.json
@@ -11,26 +11,7 @@
     "body": {
       "__type__": "ollama._types.ProcessResponse",
       "__data__": {
-        "models": [
-          {
-            "model": "llama3.2:3b",
-            "name": "llama3.2:3b",
-            "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
-            "expires_at": "2025-08-06T15:57:21.573326-04:00",
-            "size": 4030033920,
-            "size_vram": 4030033920,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "Q4_K_M"
-            }
-          }
-        ]
+        "models": []
       }
     },
     "is_streaming": false
diff --git a/tests/integration/recordings/responses/decfd950646c.json b/tests/integration/recordings/responses/decfd950646c.json
new file mode 100644
index 000000000..f62340c27
--- /dev/null
+++ b/tests/integration/recordings/responses/decfd950646c.json
@@ -0,0 +1,109 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What's the weather in Tokyo? YOU MUST USE THE get_weather function to get the weather."
+        }
+      ],
+      "response_format": {
+        "type": "text"
+      },
+      "stream": true,
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "type": "function",
+            "name": "get_weather",
+            "description": "Get the weather in a given city",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "city": {
+                  "type": "string",
+                  "description": "The city to get the weather for"
+                }
+              }
+            },
+            "strict": null
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-620",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_490d5ur7",
+                    "function": {
+                      "arguments": "{\"city\":\"Tokyo\"}",
+                      "name": "get_weather"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228972,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-620",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1755228972,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      }
+    ],
+    "is_streaming": true
+  }
+}
diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
index 4132a74a3..5ea14d7c7 100644
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@@ -41,7 +41,7 @@ from llama_stack.apis.inference import (
 )
 from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime
 from llama_stack.core.access_control.access_control import default_policy
-from llama_stack.providers.inline.agents.meta_reference.openai_responses import (
+from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import (
     OpenAIResponsesImpl,
 )
 from llama_stack.providers.utils.responses.responses_store import ResponsesStore
diff --git a/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py b/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py
new file mode 100644
index 000000000..b568ce135
--- /dev/null
+++ b/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py
@@ -0,0 +1,310 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+import pytest
+
+from llama_stack.apis.agents.openai_responses import (
+    OpenAIResponseInputFunctionToolCallOutput,
+    OpenAIResponseInputMessageContentImage,
+    OpenAIResponseInputMessageContentText,
+    OpenAIResponseInputToolFunction,
+    OpenAIResponseInputToolWebSearch,
+    OpenAIResponseMessage,
+    OpenAIResponseOutputMessageContentOutputText,
+    OpenAIResponseOutputMessageFunctionToolCall,
+    OpenAIResponseText,
+    OpenAIResponseTextFormat,
+)
+from llama_stack.apis.inference import (
+    OpenAIAssistantMessageParam,
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartTextParam,
+    OpenAIChatCompletionToolCall,
+    OpenAIChatCompletionToolCallFunction,
+    OpenAIChoice,
+    OpenAIDeveloperMessageParam,
+    OpenAIResponseFormatJSONObject,
+    OpenAIResponseFormatJSONSchema,
+    OpenAIResponseFormatText,
+    OpenAISystemMessageParam,
+    OpenAIToolMessageParam,
+    OpenAIUserMessageParam,
+)
+from llama_stack.providers.inline.agents.meta_reference.responses.utils import (
+    convert_chat_choice_to_response_message,
+    convert_response_content_to_chat_content,
+    convert_response_input_to_chat_messages,
+    convert_response_text_to_chat_response_format,
+    get_message_type_by_role,
+    is_function_tool_call,
+)
+
+
+class TestConvertChatChoiceToResponseMessage:
+    @pytest.mark.asyncio
+    async def test_convert_string_content(self):
+        choice = OpenAIChoice(
+            message=OpenAIAssistantMessageParam(content="Test message"),
+            finish_reason="stop",
+            index=0,
+        )
+
+        result = await convert_chat_choice_to_response_message(choice)
+
+        assert result.role == "assistant"
+        assert result.status == "completed"
+        assert len(result.content) == 1
+        assert isinstance(result.content[0], OpenAIResponseOutputMessageContentOutputText)
+        assert result.content[0].text == "Test message"
+
+    @pytest.mark.asyncio
+    async def test_convert_text_param_content(self):
+        choice = OpenAIChoice(
+            message=OpenAIAssistantMessageParam(
+                content=[OpenAIChatCompletionContentPartTextParam(text="Test text param")]
+            ),
+            finish_reason="stop",
+            index=0,
+        )
+
+        with pytest.raises(ValueError) as exc_info:
+            await convert_chat_choice_to_response_message(choice)
+
+        assert "does not yet support output content type" in str(exc_info.value)
+
+
+class TestConvertResponseContentToChatContent:
+    @pytest.mark.asyncio
+    async def test_convert_string_content(self):
+        result = await convert_response_content_to_chat_content("Simple string")
+        assert result == "Simple string"
+
+    @pytest.mark.asyncio
+    async def test_convert_text_content_parts(self):
+        content = [
+            OpenAIResponseInputMessageContentText(text="First part"),
+            OpenAIResponseOutputMessageContentOutputText(text="Second part"),
+        ]
+
+        result = await convert_response_content_to_chat_content(content)
+
+        assert len(result) == 2
+        assert isinstance(result[0], OpenAIChatCompletionContentPartTextParam)
+        assert result[0].text == "First part"
+        assert isinstance(result[1], OpenAIChatCompletionContentPartTextParam)
+        assert result[1].text == "Second part"
+
+    @pytest.mark.asyncio
+    async def test_convert_image_content(self):
+        content = [OpenAIResponseInputMessageContentImage(image_url="https://example.com/image.jpg", detail="high")]
+
+        result = await convert_response_content_to_chat_content(content)
+
+        assert len(result) == 1
+        assert isinstance(result[0], OpenAIChatCompletionContentPartImageParam)
+        assert result[0].image_url.url == "https://example.com/image.jpg"
+        assert result[0].image_url.detail == "high"
+
+
+class TestConvertResponseInputToChatMessages:
+    @pytest.mark.asyncio
+    async def test_convert_string_input(self):
+        result = await convert_response_input_to_chat_messages("User message")
+
+        assert len(result) == 1
+        assert isinstance(result[0], OpenAIUserMessageParam)
+        assert result[0].content == "User message"
+
+    @pytest.mark.asyncio
+    async def test_convert_function_tool_call_output(self):
+        input_items = [
+            OpenAIResponseInputFunctionToolCallOutput(
+                output="Tool output",
+                call_id="call_123",
+            )
+        ]
+
+        result = await convert_response_input_to_chat_messages(input_items)
+
+        assert len(result) == 1
+        assert isinstance(result[0], OpenAIToolMessageParam)
+        assert result[0].content == "Tool output"
+        assert result[0].tool_call_id == "call_123"
+
+    @pytest.mark.asyncio
+    async def test_convert_function_tool_call(self):
+        input_items = [
+            OpenAIResponseOutputMessageFunctionToolCall(
+                call_id="call_456",
+                name="test_function",
+                arguments='{"param": "value"}',
+            )
+        ]
+
+        result = await convert_response_input_to_chat_messages(input_items)
+
+        assert len(result) == 1
+        assert isinstance(result[0], OpenAIAssistantMessageParam)
+        assert len(result[0].tool_calls) == 1
+        assert result[0].tool_calls[0].id == "call_456"
+        assert result[0].tool_calls[0].function.name == "test_function"
+        assert result[0].tool_calls[0].function.arguments == '{"param": "value"}'
+
+    @pytest.mark.asyncio
+    async def test_convert_response_message(self):
+        input_items = [
+            OpenAIResponseMessage(
+                role="user",
+                content=[OpenAIResponseInputMessageContentText(text="User text")],
+            )
+        ]
+
+        result = await convert_response_input_to_chat_messages(input_items)
+
+        assert len(result) == 1
+        assert isinstance(result[0], OpenAIUserMessageParam)
+        # Content should be converted to chat content format
+        assert len(result[0].content) == 1
+        assert result[0].content[0].text == "User text"
+
+
+class TestConvertResponseTextToChatResponseFormat:
+    @pytest.mark.asyncio
+    async def test_convert_text_format(self):
+        text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text"))
+        result = await convert_response_text_to_chat_response_format(text)
+
+        assert isinstance(result, OpenAIResponseFormatText)
+        assert result.type == "text"
+
+    @pytest.mark.asyncio
+    async def test_convert_json_object_format(self):
+        text = OpenAIResponseText(format={"type": "json_object"})
+        result = await convert_response_text_to_chat_response_format(text)
+
+        assert isinstance(result, OpenAIResponseFormatJSONObject)
+
+    @pytest.mark.asyncio
+    async def test_convert_json_schema_format(self):
+        schema_def = {"type": "object", "properties": {"test": {"type": "string"}}}
+        text = OpenAIResponseText(
+            format={
+                "type": "json_schema",
+                "name": "test_schema",
+                "schema": schema_def,
+            }
+        )
+        result = await convert_response_text_to_chat_response_format(text)
+
+        assert isinstance(result, OpenAIResponseFormatJSONSchema)
+        assert result.json_schema["name"] == "test_schema"
+        assert result.json_schema["schema"] == schema_def
+
+    @pytest.mark.asyncio
+    async def test_default_text_format(self):
+        text = OpenAIResponseText()
+        result = await convert_response_text_to_chat_response_format(text)
+
+        assert isinstance(result, OpenAIResponseFormatText)
+        assert result.type == "text"
+
+
+class TestGetMessageTypeByRole:
+    @pytest.mark.asyncio
+    async def test_user_role(self):
+        result = await get_message_type_by_role("user")
+        assert result == OpenAIUserMessageParam
+
+    @pytest.mark.asyncio
+    async def test_system_role(self):
+        result = await get_message_type_by_role("system")
+        assert result == OpenAISystemMessageParam
+
+    @pytest.mark.asyncio
+    async def test_assistant_role(self):
+        result = await get_message_type_by_role("assistant")
+        assert result == OpenAIAssistantMessageParam
+
+    @pytest.mark.asyncio
+    async def test_developer_role(self):
+        result = await get_message_type_by_role("developer")
+        assert result == OpenAIDeveloperMessageParam
+
+    @pytest.mark.asyncio
+    async def test_unknown_role(self):
+        result = await get_message_type_by_role("unknown")
+        assert result is None
+
+
+class TestIsFunctionToolCall:
+    def test_is_function_tool_call_true(self):
+        tool_call = OpenAIChatCompletionToolCall(
+            index=0,
+            id="call_123",
+            function=OpenAIChatCompletionToolCallFunction(
+                name="test_function",
+                arguments="{}",
+            ),
+        )
+        tools = [
+            OpenAIResponseInputToolFunction(
+                type="function", name="test_function", parameters={"type": "object", "properties": {}}
+            ),
+            OpenAIResponseInputToolWebSearch(type="web_search"),
+        ]
+
+        result = is_function_tool_call(tool_call, tools)
+        assert result is True
+
+    def test_is_function_tool_call_false_different_name(self):
+        tool_call = OpenAIChatCompletionToolCall(
+            index=0,
+            id="call_123",
+            function=OpenAIChatCompletionToolCallFunction(
+                name="other_function",
+                arguments="{}",
+            ),
+        )
+        tools = [
+            OpenAIResponseInputToolFunction(
+                type="function", name="test_function", parameters={"type": "object", "properties": {}}
+            ),
+        ]
+
+        result = is_function_tool_call(tool_call, tools)
+        assert result is False
+
+    def test_is_function_tool_call_false_no_function(self):
+        tool_call = OpenAIChatCompletionToolCall(
+            index=0,
+            id="call_123",
+            function=None,
+        )
+        tools = [
+            OpenAIResponseInputToolFunction(
+                type="function", name="test_function", parameters={"type": "object", "properties": {}}
+            ),
+        ]
+
+        result = is_function_tool_call(tool_call, tools)
+        assert result is False
+
+    def test_is_function_tool_call_false_wrong_type(self):
+        tool_call = OpenAIChatCompletionToolCall(
+            index=0,
+            id="call_123",
+            function=OpenAIChatCompletionToolCallFunction(
+                name="web_search",
+                arguments="{}",
+            ),
+        )
+        tools = [
+            OpenAIResponseInputToolWebSearch(type="web_search"),
+        ]
+
+        result = is_function_tool_call(tool_call, tools)
+        assert result is False
diff --git a/tests/unit/providers/utils/inference/test_openai_compat.py b/tests/unit/providers/utils/inference/test_openai_compat.py
index 5b8527d1b..ddc70e102 100644
--- a/tests/unit/providers/utils/inference/test_openai_compat.py
+++ b/tests/unit/providers/utils/inference/test_openai_compat.py
@@ -24,6 +24,7 @@ from llama_stack.apis.inference import (
 from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall
 from llama_stack.providers.utils.inference.openai_compat import (
     convert_message_to_openai_dict,
+    convert_message_to_openai_dict_new,
     openai_messages_to_messages,
 )
 
@@ -182,3 +183,42 @@ def test_user_message_accepts_images():
     assert len(msg.content) == 2
     assert msg.content[0].text == "Describe this image:"
     assert msg.content[1].image_url.url == "http://example.com/image.jpg"
+
+
+async def test_convert_message_to_openai_dict_new_user_message():
+    """Test convert_message_to_openai_dict_new with UserMessage."""
+    message = UserMessage(content="Hello, world!", role="user")
+    result = await convert_message_to_openai_dict_new(message)
+
+    assert result["role"] == "user"
+    assert result["content"] == "Hello, world!"
+
+
+async def test_convert_message_to_openai_dict_new_completion_message_with_tool_calls():
+    """Test convert_message_to_openai_dict_new with CompletionMessage containing tool calls."""
+    message = CompletionMessage(
+        content="I'll help you find the weather.",
+        tool_calls=[
+            ToolCall(
+                call_id="call_123",
+                tool_name="get_weather",
+                arguments={"city": "Sligo"},
+                arguments_json='{"city": "Sligo"}',
+            )
+        ],
+        stop_reason=StopReason.end_of_turn,
+    )
+    result = await convert_message_to_openai_dict_new(message)
+
+    # This would have failed with "Cannot instantiate typing.Union" before the fix
+    assert result["role"] == "assistant"
+    assert result["content"] == "I'll help you find the weather."
+    assert "tool_calls" in result
+    assert result["tool_calls"] is not None
+    assert len(result["tool_calls"]) == 1
+
+    tool_call = result["tool_calls"][0]
+    assert tool_call.id == "call_123"
+    assert tool_call.type == "function"
+    assert tool_call.function.name == "get_weather"
+    assert tool_call.function.arguments == '{"city": "Sligo"}'