From 386c806c704220bb5e0448f2559b6bdd46f427ce Mon Sep 17 00:00:00 2001
From: ehhuang <ehhuang@users.noreply.github.com>
Date: Mon, 3 Mar 2025 14:48:32 -0800
Subject: [PATCH] test: introduce recordable mocks for Agent tests (#1268)

Summary:

Agent tests shouldn't need to run inference and tools calls repeatedly.
This PR introduces a way to record inference/tool calls and reuse them
in subsequent test runs, which makes the tests more reliable and saves
costs.

Test Plan:
Run when there's no recorded calls created (fails):
```
LLAMA_STACK_CONFIG=fireworks pytest -s -v tests/client-sdk/agents/test_agents.py --safety-shield meta-llama/Llama-Guard-3-8B
```

Run when `--record-responses` to record calls:
```
LLAMA_STACK_CONFIG=fireworks pytest -s -v tests/client-sdk/agents/test_agents.py --safety-shield meta-llama/Llama-Guard-3-8B --record-responses
```

Run without `--record-responses` again (succeeds):
```
LLAMA_STACK_CONFIG=fireworks pytest -s -v tests/client-sdk/agents/test_agents.py --safety-shield meta-llama/Llama-Guard-3-8B
```
---
 tests/client-sdk/agents/test_agents.py        |   58 +-
 tests/client-sdk/conftest.py                  |   66 +
 tests/client-sdk/fixtures/recordable_mock.py  |  208 +
 .../recorded_responses/chat_completion.json   | 6324 +++++++++++++++++
 .../recorded_responses/chat_completion.pickle |  Bin 0 -> 168310 bytes
 .../recorded_responses/invoke_tool.json       |  266 +
 .../recorded_responses/invoke_tool.pickle     |  Bin 0 -> 35821 bytes
 7 files changed, 6893 insertions(+), 29 deletions(-)
 create mode 100644 tests/client-sdk/fixtures/recordable_mock.py
 create mode 100644 tests/client-sdk/fixtures/recorded_responses/chat_completion.json
 create mode 100644 tests/client-sdk/fixtures/recorded_responses/chat_completion.pickle
 create mode 100644 tests/client-sdk/fixtures/recorded_responses/invoke_tool.json
 create mode 100644 tests/client-sdk/fixtures/recorded_responses/invoke_tool.pickle

diff --git a/tests/client-sdk/agents/test_agents.py b/tests/client-sdk/agents/test_agents.py
index 4d808da23..ca97eb692 100644
--- a/tests/client-sdk/agents/test_agents.py
+++ b/tests/client-sdk/agents/test_agents.py
@@ -41,8 +41,8 @@ def get_boiling_point(liquid_name: str, celcius: bool = True) -> int:
 
 
 @pytest.fixture(scope="session")
-def agent_config(llama_stack_client, text_model_id):
-    available_shields = [shield.identifier for shield in llama_stack_client.shields.list()]
+def agent_config(llama_stack_client_with_mocked_inference, text_model_id):
+    available_shields = [shield.identifier for shield in llama_stack_client_with_mocked_inference.shields.list()]
     available_shields = available_shields[:1]
     agent_config = AgentConfig(
         model=text_model_id,
@@ -62,8 +62,8 @@ def agent_config(llama_stack_client, text_model_id):
     return agent_config
 
 
-def test_agent_simple(llama_stack_client, agent_config):
-    agent = Agent(llama_stack_client, agent_config)
+def test_agent_simple(llama_stack_client_with_mocked_inference, agent_config):
+    agent = Agent(llama_stack_client_with_mocked_inference, agent_config)
     session_id = agent.create_session(f"test-session-{uuid4()}")
 
     simple_hello = agent.create_turn(
@@ -100,7 +100,7 @@ def test_agent_simple(llama_stack_client, agent_config):
         assert "I can't" in logs_str
 
 
-def test_tool_config(llama_stack_client, agent_config):
+def test_tool_config(llama_stack_client_with_mocked_inference, agent_config):
     common_params = dict(
         model="meta-llama/Llama-3.2-3B-Instruct",
         instructions="You are a helpful assistant",
@@ -156,14 +156,14 @@ def test_tool_config(llama_stack_client, agent_config):
         Server__AgentConfig(**agent_config)
 
 
-def test_builtin_tool_web_search(llama_stack_client, agent_config):
+def test_builtin_tool_web_search(llama_stack_client_with_mocked_inference, agent_config):
     agent_config = {
         **agent_config,
         "toolgroups": [
             "builtin::websearch",
         ],
     }
-    agent = Agent(llama_stack_client, agent_config)
+    agent = Agent(llama_stack_client_with_mocked_inference, agent_config)
     session_id = agent.create_session(f"test-session-{uuid4()}")
 
     response = agent.create_turn(
@@ -186,14 +186,14 @@ def test_builtin_tool_web_search(llama_stack_client, agent_config):
         assert "No Violation" in logs_str
 
 
-def test_builtin_tool_code_execution(llama_stack_client, agent_config):
+def test_builtin_tool_code_execution(llama_stack_client_with_mocked_inference, agent_config):
     agent_config = {
         **agent_config,
         "toolgroups": [
             "builtin::code_interpreter",
         ],
     }
-    agent = Agent(llama_stack_client, agent_config)
+    agent = Agent(llama_stack_client_with_mocked_inference, agent_config)
     session_id = agent.create_session(f"test-session-{uuid4()}")
 
     response = agent.create_turn(
@@ -215,7 +215,7 @@ def test_builtin_tool_code_execution(llama_stack_client, agent_config):
 # This test must be run in an environment where `bwrap` is available. If you are running against a
 # server, this means the _server_ must have `bwrap` available. If you are using library client, then
 # you must have `bwrap` available in test's environment.
-def test_code_interpreter_for_attachments(llama_stack_client, agent_config):
+def test_code_interpreter_for_attachments(llama_stack_client_with_mocked_inference, agent_config):
     agent_config = {
         **agent_config,
         "toolgroups": [
@@ -223,7 +223,7 @@ def test_code_interpreter_for_attachments(llama_stack_client, agent_config):
         ],
     }
 
-    codex_agent = Agent(llama_stack_client, agent_config)
+    codex_agent = Agent(llama_stack_client_with_mocked_inference, agent_config)
     session_id = codex_agent.create_session(f"test-session-{uuid4()}")
     inflation_doc = AgentDocument(
         content="https://raw.githubusercontent.com/meta-llama/llama-stack-apps/main/examples/resources/inflation.csv",
@@ -251,7 +251,7 @@ def test_code_interpreter_for_attachments(llama_stack_client, agent_config):
         assert "Tool:code_interpreter" in logs_str
 
 
-def test_custom_tool(llama_stack_client, agent_config):
+def test_custom_tool(llama_stack_client_with_mocked_inference, agent_config):
     client_tool = get_boiling_point
     agent_config = {
         **agent_config,
@@ -259,7 +259,7 @@ def test_custom_tool(llama_stack_client, agent_config):
         "client_tools": [client_tool.get_tool_definition()],
     }
 
-    agent = Agent(llama_stack_client, agent_config, client_tools=(client_tool,))
+    agent = Agent(llama_stack_client_with_mocked_inference, agent_config, client_tools=(client_tool,))
     session_id = agent.create_session(f"test-session-{uuid4()}")
 
     response = agent.create_turn(
@@ -278,7 +278,7 @@ def test_custom_tool(llama_stack_client, agent_config):
     assert "get_boiling_point" in logs_str
 
 
-def test_custom_tool_infinite_loop(llama_stack_client, agent_config):
+def test_custom_tool_infinite_loop(llama_stack_client_with_mocked_inference, agent_config):
     client_tool = get_boiling_point
     agent_config = {
         **agent_config,
@@ -287,7 +287,7 @@ def test_custom_tool_infinite_loop(llama_stack_client, agent_config):
         "max_infer_iters": 5,
     }
 
-    agent = Agent(llama_stack_client, agent_config, client_tools=(client_tool,))
+    agent = Agent(llama_stack_client_with_mocked_inference, agent_config, client_tools=(client_tool,))
     session_id = agent.create_session(f"test-session-{uuid4()}")
 
     response = agent.create_turn(
@@ -305,7 +305,7 @@ def test_custom_tool_infinite_loop(llama_stack_client, agent_config):
     assert num_tool_calls <= 5
 
 
-def test_tool_choice(llama_stack_client, agent_config):
+def test_tool_choice(llama_stack_client_with_mocked_inference, agent_config):
     def run_agent(tool_choice):
         client_tool = get_boiling_point
 
@@ -315,7 +315,7 @@ def test_tool_choice(llama_stack_client, agent_config):
             "client_tools": [client_tool.get_tool_definition()],
         }
 
-        agent = Agent(llama_stack_client, test_agent_config, client_tools=(client_tool,))
+        agent = Agent(llama_stack_client_with_mocked_inference, test_agent_config, client_tools=(client_tool,))
         session_id = agent.create_session(f"test-session-{uuid4()}")
 
         response = agent.create_turn(
@@ -342,7 +342,7 @@ def test_tool_choice(llama_stack_client, agent_config):
 
 
 @pytest.mark.parametrize("rag_tool_name", ["builtin::rag/knowledge_search", "builtin::rag"])
-def test_rag_agent(llama_stack_client, agent_config, rag_tool_name):
+def test_rag_agent(llama_stack_client_with_mocked_inference, agent_config, rag_tool_name):
     urls = ["chat.rst", "llama3.rst", "memory_optimizations.rst", "lora_finetune.rst"]
     documents = [
         Document(
@@ -354,12 +354,12 @@ def test_rag_agent(llama_stack_client, agent_config, rag_tool_name):
         for i, url in enumerate(urls)
     ]
     vector_db_id = f"test-vector-db-{uuid4()}"
-    llama_stack_client.vector_dbs.register(
+    llama_stack_client_with_mocked_inference.vector_dbs.register(
         vector_db_id=vector_db_id,
         embedding_model="all-MiniLM-L6-v2",
         embedding_dimension=384,
     )
-    llama_stack_client.tool_runtime.rag_tool.insert(
+    llama_stack_client_with_mocked_inference.tool_runtime.rag_tool.insert(
         documents=documents,
         vector_db_id=vector_db_id,
         # small chunks help to get specific info out of the docs
@@ -376,7 +376,7 @@ def test_rag_agent(llama_stack_client, agent_config, rag_tool_name):
             )
         ],
     }
-    rag_agent = Agent(llama_stack_client, agent_config)
+    rag_agent = Agent(llama_stack_client_with_mocked_inference, agent_config)
     session_id = rag_agent.create_session(f"test-session-{uuid4()}")
     user_prompts = [
         (
@@ -401,7 +401,7 @@ def test_rag_agent(llama_stack_client, agent_config, rag_tool_name):
             assert expected_kw in response.output_message.content.lower()
 
 
-def test_rag_agent_with_attachments(llama_stack_client, agent_config):
+def test_rag_agent_with_attachments(llama_stack_client_with_mocked_inference, agent_config):
     urls = ["chat.rst", "llama3.rst", "memory_optimizations.rst", "lora_finetune.rst"]
     documents = [
         Document(
@@ -423,7 +423,7 @@ def test_rag_agent_with_attachments(llama_stack_client, agent_config):
             )
         ],
     }
-    rag_agent = Agent(llama_stack_client, agent_config)
+    rag_agent = Agent(llama_stack_client_with_mocked_inference, agent_config)
     session_id = rag_agent.create_session(f"test-session-{uuid4()}")
     user_prompts = [
         (
@@ -462,7 +462,7 @@ def test_rag_agent_with_attachments(llama_stack_client, agent_config):
     assert "lora" in response.output_message.content.lower()
 
 
-def test_rag_and_code_agent(llama_stack_client, agent_config):
+def test_rag_and_code_agent(llama_stack_client_with_mocked_inference, agent_config):
     documents = []
     documents.append(
         Document(
@@ -484,12 +484,12 @@ def test_rag_and_code_agent(llama_stack_client, agent_config):
         )
     )
     vector_db_id = f"test-vector-db-{uuid4()}"
-    llama_stack_client.vector_dbs.register(
+    llama_stack_client_with_mocked_inference.vector_dbs.register(
         vector_db_id=vector_db_id,
         embedding_model="all-MiniLM-L6-v2",
         embedding_dimension=384,
     )
-    llama_stack_client.tool_runtime.rag_tool.insert(
+    llama_stack_client_with_mocked_inference.tool_runtime.rag_tool.insert(
         documents=documents,
         vector_db_id=vector_db_id,
         chunk_size_in_tokens=128,
@@ -504,7 +504,7 @@ def test_rag_and_code_agent(llama_stack_client, agent_config):
             "builtin::code_interpreter",
         ],
     }
-    agent = Agent(llama_stack_client, agent_config)
+    agent = Agent(llama_stack_client_with_mocked_inference, agent_config)
     inflation_doc = Document(
         document_id="test_csv",
         content="https://raw.githubusercontent.com/meta-llama/llama-stack-apps/main/examples/resources/inflation.csv",
@@ -546,7 +546,7 @@ def test_rag_and_code_agent(llama_stack_client, agent_config):
             assert expected_kw in response.output_message.content.lower()
 
 
-def test_create_turn_response(llama_stack_client, agent_config):
+def test_create_turn_response(llama_stack_client_with_mocked_inference, agent_config):
     client_tool = get_boiling_point
     agent_config = {
         **agent_config,
@@ -555,7 +555,7 @@ def test_create_turn_response(llama_stack_client, agent_config):
         "client_tools": [client_tool.get_tool_definition()],
     }
 
-    agent = Agent(llama_stack_client, agent_config, client_tools=(client_tool,))
+    agent = Agent(llama_stack_client_with_mocked_inference, agent_config, client_tools=(client_tool,))
     session_id = agent.create_session(f"test-session-{uuid4()}")
 
     response = agent.create_turn(
diff --git a/tests/client-sdk/conftest.py b/tests/client-sdk/conftest.py
index 3ecf45086..270dcb8af 100644
--- a/tests/client-sdk/conftest.py
+++ b/tests/client-sdk/conftest.py
@@ -3,13 +3,18 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
+import copy
+import logging
 import os
+from pathlib import Path
 
 import pytest
+from fixtures.recordable_mock import RecordableMock
 from llama_stack_client import LlamaStackClient
 from report import Report
 
 from llama_stack import LlamaStackAsLibraryClient
+from llama_stack.apis.datatypes import Api
 from llama_stack.providers.tests.env import get_env_or_fail
 
 
@@ -66,6 +71,12 @@ def pytest_addoption(parser):
         default=384,
         help="Output dimensionality of the embedding model to use for testing",
     )
+    parser.addoption(
+        "--record-responses",
+        action="store_true",
+        default=False,
+        help="Record new API responses instead of using cached ones.",
+    )
 
 
 @pytest.fixture(scope="session")
@@ -101,6 +112,61 @@ def llama_stack_client(provider_data, text_model_id):
     return client
 
 
+@pytest.fixture(scope="session")
+def llama_stack_client_with_mocked_inference(llama_stack_client, request):
+    """
+    Returns a client with mocked inference APIs and tool runtime APIs that use recorded responses by default.
+
+    If --record-responses is passed, it will call the real APIs and record the responses.
+    """
+    if not isinstance(llama_stack_client, LlamaStackAsLibraryClient):
+        logging.warning(
+            "llama_stack_client_with_mocked_inference is not supported for this client, returning original client without mocking"
+        )
+        return llama_stack_client
+
+    record_responses = request.config.getoption("--record-responses")
+    cache_dir = Path(__file__).parent / "fixtures" / "recorded_responses"
+
+    # Create a shallow copy of the client to avoid modifying the original
+    client = copy.copy(llama_stack_client)
+
+    # Get the inference API used by the agents implementation
+    agents_impl = client.async_client.impls[Api.agents]
+    original_inference = agents_impl.inference_api
+
+    # Create a new inference object with the same attributes
+    inference_mock = copy.copy(original_inference)
+
+    # Replace the methods with recordable mocks
+    inference_mock.chat_completion = RecordableMock(
+        original_inference.chat_completion, cache_dir, "chat_completion", record=record_responses
+    )
+    inference_mock.completion = RecordableMock(
+        original_inference.completion, cache_dir, "text_completion", record=record_responses
+    )
+    inference_mock.embeddings = RecordableMock(
+        original_inference.embeddings, cache_dir, "embeddings", record=record_responses
+    )
+
+    # Replace the inference API in the agents implementation
+    agents_impl.inference_api = inference_mock
+
+    original_tool_runtime_api = agents_impl.tool_runtime_api
+    tool_runtime_mock = copy.copy(original_tool_runtime_api)
+
+    # Replace the methods with recordable mocks
+    tool_runtime_mock.invoke_tool = RecordableMock(
+        original_tool_runtime_api.invoke_tool, cache_dir, "invoke_tool", record=record_responses
+    )
+    agents_impl.tool_runtime_api = tool_runtime_mock
+
+    # Also update the client.inference for consistency
+    client.inference = inference_mock
+
+    return client
+
+
 @pytest.fixture(scope="session")
 def inference_provider_type(llama_stack_client):
     providers = llama_stack_client.providers.list()
diff --git a/tests/client-sdk/fixtures/recordable_mock.py b/tests/client-sdk/fixtures/recordable_mock.py
new file mode 100644
index 000000000..d8704a0d5
--- /dev/null
+++ b/tests/client-sdk/fixtures/recordable_mock.py
@@ -0,0 +1,208 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+import json
+import os
+import pickle
+import re
+from pathlib import Path
+
+
+class RecordableMock:
+    """A mock that can record and replay API responses."""
+
+    def __init__(self, real_func, cache_dir, func_name, record=False):
+        self.real_func = real_func
+        self.pickle_path = Path(cache_dir) / f"{func_name}.pickle"
+        self.json_path = Path(cache_dir) / f"{func_name}.json"
+        self.record = record
+        self.cache = {}
+
+        # Load existing cache if available and not recording
+        if self.pickle_path.exists():
+            try:
+                with open(self.pickle_path, "rb") as f:
+                    self.cache = pickle.load(f)
+            except Exception as e:
+                print(f"Error loading cache from {self.pickle_path}: {e}")
+
+    async def __call__(self, *args, **kwargs):
+        """
+        Returns a coroutine that when awaited returns the result or an async generator,
+        matching the behavior of the original function.
+        """
+        # Create a cache key from the arguments
+        key = self._create_cache_key(args, kwargs)
+
+        if self.record:
+            # In record mode, always call the real function
+            real_result = self.real_func(*args, **kwargs)
+
+            # If it's a coroutine, we need to create a wrapper coroutine
+            if hasattr(real_result, "__await__"):
+                # Define a coroutine function that will record the result
+                async def record_coroutine():
+                    try:
+                        # Await the real coroutine
+                        result = await real_result
+
+                        # Check if the result is an async generator
+                        if hasattr(result, "__aiter__"):
+                            # It's an async generator, so we need to record its chunks
+                            chunks = []
+
+                            # Create and return a new async generator that records chunks
+                            async def recording_generator():
+                                nonlocal chunks
+                                async for chunk in result:
+                                    chunks.append(chunk)
+                                    yield chunk
+                                # After all chunks are yielded, save to cache
+                                self.cache[key] = {"type": "generator", "chunks": chunks}
+                                self._save_cache()
+
+                            return recording_generator()
+                        else:
+                            # It's a regular result, save it to cache
+                            self.cache[key] = {"type": "value", "value": result}
+                            self._save_cache()
+                            return result
+                    except Exception as e:
+                        print(f"Error in recording mode: {e}")
+                        raise
+
+                return await record_coroutine()
+            else:
+                # It's already an async generator, so we need to record its chunks
+                async def record_generator():
+                    chunks = []
+                    async for chunk in real_result:
+                        chunks.append(chunk)
+                        yield chunk
+                    # After all chunks are yielded, save to cache
+                    self.cache[key] = {"type": "generator", "chunks": chunks}
+                    self._save_cache()
+
+                return record_generator()
+        elif key not in self.cache:
+            # In replay mode, if the key is not in the cache, throw an error
+            raise KeyError(
+                f"No cached response found for key: {key}\nRun with --record-responses to record this response."
+            )
+        else:
+            # In replay mode with a cached response
+            cached_data = self.cache[key]
+
+            # Check if it's a value or chunks
+            if cached_data.get("type") == "value":
+                # It's a regular value
+                return cached_data["value"]
+            else:
+                # It's chunks from an async generator
+                async def replay_generator():
+                    for chunk in cached_data["chunks"]:
+                        yield chunk
+
+                return replay_generator()
+
+    def _create_cache_key(self, args, kwargs):
+        """Create a hashable key from the function arguments, ignoring auto-generated IDs."""
+        # Convert args and kwargs to a string representation directly
+        args_str = str(args)
+        kwargs_str = str(sorted([(k, kwargs[k]) for k in kwargs]))
+
+        # Combine into a single key
+        key = f"{args_str}_{kwargs_str}"
+
+        # Post-process the key with regex to replace IDs with placeholders
+        # Replace UUIDs and similar patterns
+        key = re.sub(r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", "<UUID>", key)
+
+        # Replace temporary file paths created by tempfile.mkdtemp()
+        key = re.sub(r"/var/folders/[^,'\"\s]+", "<TEMP_FILE>", key)
+
+        return key
+
+    def _save_cache(self):
+        """Save the cache to disk in both pickle and JSON formats."""
+        os.makedirs(self.pickle_path.parent, exist_ok=True)
+
+        # Save as pickle for exact object preservation
+        with open(self.pickle_path, "wb") as f:
+            pickle.dump(self.cache, f)
+
+        # Also save as JSON for human readability and diffing
+        try:
+            # Create a simplified version of the cache for JSON
+            json_cache = {}
+            for key, value in self.cache.items():
+                if value.get("type") == "generator":
+                    # For generators, create a simplified representation of each chunk
+                    chunks = []
+                    for chunk in value["chunks"]:
+                        chunk_dict = self._object_to_json_safe_dict(chunk)
+                        chunks.append(chunk_dict)
+                    json_cache[key] = {"type": "generator", "chunks": chunks}
+                else:
+                    # For values, create a simplified representation
+                    val = value["value"]
+                    val_dict = self._object_to_json_safe_dict(val)
+                    json_cache[key] = {"type": "value", "value": val_dict}
+
+            # Write the JSON file with pretty formatting
+            with open(self.json_path, "w") as f:
+                json.dump(json_cache, f, indent=2, sort_keys=True)
+        except Exception as e:
+            print(f"Error saving JSON cache: {e}")
+
+    def _object_to_json_safe_dict(self, obj):
+        """Convert an object to a JSON-safe dictionary."""
+        # Handle enum types
+        if hasattr(obj, "value") and hasattr(obj.__class__, "__members__"):
+            return {"__enum__": obj.__class__.__name__, "value": obj.value}
+
+        # Handle Pydantic models
+        if hasattr(obj, "model_dump"):
+            return self._process_dict(obj.model_dump())
+        elif hasattr(obj, "dict"):
+            return self._process_dict(obj.dict())
+
+        # Handle regular objects with __dict__
+        try:
+            return self._process_dict(vars(obj))
+        except Exception as e:
+            print(f"Error converting object to JSON-safe dict: {e}")
+            # If we can't get a dict, convert to string
+            return str(obj)
+
+    def _process_dict(self, d):
+        """Process a dictionary to make all values JSON-safe."""
+        if not isinstance(d, dict):
+            return d
+
+        result = {}
+        for k, v in d.items():
+            if isinstance(v, dict):
+                result[k] = self._process_dict(v)
+            elif isinstance(v, list):
+                result[k] = [
+                    self._process_dict(item)
+                    if isinstance(item, dict)
+                    else self._object_to_json_safe_dict(item)
+                    if hasattr(item, "__dict__")
+                    else item
+                    for item in v
+                ]
+            elif hasattr(v, "value") and hasattr(v.__class__, "__members__"):
+                # Handle enum
+                result[k] = {"__enum__": v.__class__.__name__, "value": v.value}
+            elif hasattr(v, "__dict__"):
+                # Handle nested objects
+                result[k] = self._object_to_json_safe_dict(v)
+            else:
+                # Basic types
+                result[k] = v
+
+        return result
diff --git a/tests/client-sdk/fixtures/recorded_responses/chat_completion.json b/tests/client-sdk/fixtures/recorded_responses/chat_completion.json
new file mode 100644
index 000000000..e84b9be24
--- /dev/null
+++ b/tests/client-sdk/fixtures/recorded_responses/chat_completion.json
@@ -0,0 +1,6324 @@
+{
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant Always respond with tool calls no matter what. '), UserMessage(role='user', content='Get the boiling point of polyjuice with a tool call.', context=None), CompletionMessage(role='assistant', content='', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[ToolCall(call_id='<UUID>', tool_name='get_boiling_point', arguments={'liquid_name': 'polyjuice', 'celcius': 'true'})]), ToolResponseMessage(role='tool', call_id='<UUID>', tool_name='get_boiling_point', content='-100'), CompletionMessage(role='assistant', content='', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[ToolCall(call_id='<UUID>', tool_name='get_boiling_point', arguments={'liquid_name': 'polyjuice', 'celcius': 'false'})]), ToolResponseMessage(role='tool', call_id='<UUID>', tool_name='get_boiling_point', content='-100')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='str', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "The",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " boiling point of polyjuice is -100 degrees Fahrenheit.",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant Always respond with tool calls no matter what. '), UserMessage(role='user', content='Get the boiling point of polyjuice with a tool call.', context=None), CompletionMessage(role='assistant', content='', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[ToolCall(call_id='<UUID>', tool_name='get_boiling_point', arguments={'liquid_name': 'polyjuice', 'celcius': 'true'})]), ToolResponseMessage(role='tool', call_id='<UUID>', tool_name='get_boiling_point', content='-100')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='str', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "{\"",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "type\": \"function\", \"name\": \"get_boiling_point",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "\", \"parameters\": {\"liquid_name\":",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " \"polyjuice\", \"cel",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "cius\": \"false\"}}",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "succeeded"
+            },
+            "tool_call": {
+              "arguments": {
+                "celcius": "false",
+                "liquid_name": "polyjuice"
+              },
+              "call_id": "dc0f86d3-2b7a-45b0-8e58-8f49c9942190",
+              "tool_name": "get_boiling_point"
+            },
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant Always respond with tool calls no matter what. '), UserMessage(role='user', content='Get the boiling point of polyjuice with a tool call.', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='str', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "started"
+            },
+            "tool_call": "",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "{\"type\": \"function\", \"name\": \"get_bo",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "iling_point\", \"parameters\": {\"liquid_name\": \"poly",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "juice\", \"celcius\": \"true\"}}",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "succeeded"
+            },
+            "tool_call": {
+              "arguments": {
+                "celcius": "true",
+                "liquid_name": "polyjuice"
+              },
+              "call_id": "510ca34b-5ba9-4d5f-9ff3-c56de756fc95",
+              "tool_name": "get_boiling_point"
+            },
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Call get_boiling_point and answer What is the boiling point of polyjuice?', context=None), CompletionMessage(role='assistant', content='', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[ToolCall(call_id='<UUID>', tool_name='get_boiling_point', arguments={'liquid_name': 'polyjuice', 'celcius': 'true'})]), ToolResponseMessage(role='tool', call_id='<UUID>', tool_name='get_boiling_point', content='-100')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='str', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "The",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " boiling point of polyjuice is -100\u00b0C.",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Call get_boiling_point and answer What is the boiling point of polyjuice?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='str', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "started"
+            },
+            "tool_call": "",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "{\"type\": \"function\", \"name\": \"get_boiling_point",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "\", \"parameters\": {\"liquid_name\": \"",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "polyjuice\", \"celcius\": \"true\"}}",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "succeeded"
+            },
+            "tool_call": {
+              "arguments": {
+                "celcius": "true",
+                "liquid_name": "polyjuice"
+              },
+              "call_id": "ac699f8a-43ca-4f0b-abd4-0597722b42ee",
+              "tool_name": "get_boiling_point"
+            },
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Give me a sentence that contains the word: hello', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "The",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " customer smiled and said \"hello\" to",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " the friendly store clerk.",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv file, can you describe it?', context=None), ToolResponseMessage(role='tool', call_id='', tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, content=[TextContentItem(type='text', text='# User provided a file accessible to you at \"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.')]), CompletionMessage(role='assistant', content='', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[ToolCall(call_id='<UUID>', tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, arguments={'code': 'import pandas as pd\\ndf = pd.read_csv(\"<TEMP_FILE>\")\\nprint(df.head())'})]), ToolResponseMessage(role='tool', call_id='<UUID>', tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\"), CompletionMessage(role='assistant', content='', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[ToolCall(call_id='<UUID>', tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, arguments={'code': 'import pandas as pd\\ndf = pd.read_csv(\"<TEMP_FILE>\")\\nprint(df.head())\\nprint(df.info())\\nprint(df.describe())'})]), ToolResponseMessage(role='tool', call_id='<UUID>', tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)}), ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "The",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " error message indicates that the `bwrap.core` module",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " is not found. This is likely",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " because the `bwrap` package is not installed. To fix this",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": ", you can install the `bwrap` package using pip:\n\n```\n",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "pip install bwrap\n```\n\nHowever, if you don't have",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " permission to install packages, you can use the `knowledge_search` function to",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " get information about the CSV file instead:\n\n```\n{\n",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "    \"type\": \"function\",\n    \"name\": \"",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "knowledge_search\",\n    \"parameters\": {\n       ",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " \"query\": \"describe a csv file\"\n    }\n}\n``",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "`\n\nThis will return a description of the CSV file.",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv file, can you describe it?', context=None), ToolResponseMessage(role='tool', call_id='', tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, content=[TextContentItem(type='text', text='# User provided a file accessible to you at \"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.')]), CompletionMessage(role='assistant', content='', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[ToolCall(call_id='<UUID>', tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, arguments={'code': 'import pandas as pd\\ndf = pd.read_csv(\"<TEMP_FILE>\")\\nprint(df.head())'})]), ToolResponseMessage(role='tool', call_id='<UUID>', tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)}), ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "started"
+            },
+            "tool_call": "",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "import pandas as pd\ndf = pd.read",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "_csv(\"/var/folders/cz/v",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "yh7y1d11xg881",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "lsxsshnc5c0000gn/T/tmpn9tl",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "gts1/qYsQ3ZJLinflation.csv",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "\")\nprint(df.head())\nprint(df.info())\nprint(df.describe())",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "succeeded"
+            },
+            "tool_call": {
+              "arguments": {
+                "code": "import pandas as pd\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmpn9tlgts1/qYsQ3ZJLinflation.csv\")\nprint(df.head())\nprint(df.info())\nprint(df.describe())"
+              },
+              "call_id": "6c3c4895-55a7-4083-b5d1-6ee42bcbe5fa",
+              "tool_name": {
+                "__enum__": "BuiltinTool",
+                "value": "code_interpreter"
+              }
+            },
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv file, can you describe it?', context=None), ToolResponseMessage(role='tool', call_id='', tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, content=[TextContentItem(type='text', text='# User provided a file accessible to you at \"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)}), ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "started"
+            },
+            "tool_call": "",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "import pandas as pd\ndf = pd.read_csv(\"/",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "var/folders/cz/vyh7",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "y1d11xg881lsxsshnc5c0000",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "gn/T/tmpn9tlgts1",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "/qYsQ3ZJLin",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "flation.csv\")\nprint(df.head())",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "succeeded"
+            },
+            "tool_call": {
+              "arguments": {
+                "code": "import pandas as pd\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmpn9tlgts1/qYsQ3ZJLinflation.csv\")\nprint(df.head())"
+              },
+              "call_id": "e6c48b40-6504-4043-b3fa-644bd7fafd0f",
+              "tool_name": {
+                "__enum__": "BuiltinTool",
+                "value": "code_interpreter"
+              }
+            },
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv, can you describe it?', context=None), CompletionMessage(role='assistant', content='', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[ToolCall(call_id='<UUID>', tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, arguments={'code': 'import pandas as pd\\n# Load data\\ndf = pd.read_csv(\"<TEMP_FILE>\")\\n# Rows\\nprint(\"Number of rows and columns in the data:\", df.shape)\\n# Columns\\nprint(\"Columns of the data are:\", len(df.columns))\\n# Column names\\nprint(\"Columns of the data are:\", df.columns)\\n# Column dtypes\\nprint(\"Datatype of the columns are:\", df.dtypes)'})]), ToolResponseMessage(role='tool', call_id='<UUID>', tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\"), CompletionMessage(role='assistant', content='It seems that the file \"<TEMP_FILE>\" does not exist. \\n\\nTo describe the csv file, you need to provide the actual file path or the file itself. If you are running this code in a notebook, you can use the `upload` button to upload the file. If you are running this code in a script, you need to provide the file path.\\n\\nHere is an example of how you can describe the csv file if you have it in the same directory as your script:\\n\\n```python\\nimport pandas as pd\\n\\n# Load data\\ndf = pd.read_csv(\\'inflation.csv\\')\\n\\n# Print summary of the data\\nprint(df.head())  # Print the first few rows of the data\\nprint(df.info())  # Print information about the data\\nprint(df.describe())  # Print summary statistics about the data\\n```\\n\\nThis will print the first few rows of the data, information about the data, and summary statistics about the data.', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[]), UserMessage(role='user', content='Plot average yearly inflation as a time series', context=None), CompletionMessage(role='assistant', content='', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[ToolCall(call_id='<UUID>', tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, arguments={'code': \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n\\n# Convert date column to datetime\\ndf['date'] = pd.to_datetime(df['date'])\\n\\n# Group by year and calculate average inflation\\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\\n\\n# Plot time series\\nplt.figure(figsize=(10,6))\\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\\nplt.title('Average Yearly Inflation')\\nplt.xlabel('Year')\\nplt.ylabel('Average Inflation')\\nplt.grid(True)\\nplt.show()\"})]), ToolResponseMessage(role='tool', call_id='<UUID>', tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "This",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " code will create a time series plot of the average yearly inflation.",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " The x-axis represents the year",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " and the y-axis represents the average inflation",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": ". The plot will show the trend of average yearly inflation over the",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " years.\n\nPlease note that you need to replace 'inflation.csv",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "' with the actual path to your csv file. Also, this",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " code assumes that the csv file has a column named 'date'",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " and another column named 'inflation'. If",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " your csv file has different column names, you",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " need to adjust the code accordingly.",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv, can you describe it?', context=None), CompletionMessage(role='assistant', content='', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[ToolCall(call_id='<UUID>', tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, arguments={'code': 'import pandas as pd\\n# Load data\\ndf = pd.read_csv(\"<TEMP_FILE>\")\\n# Rows\\nprint(\"Number of rows and columns in the data:\", df.shape)\\n# Columns\\nprint(\"Columns of the data are:\", len(df.columns))\\n# Column names\\nprint(\"Columns of the data are:\", df.columns)\\n# Column dtypes\\nprint(\"Datatype of the columns are:\", df.dtypes)'})]), ToolResponseMessage(role='tool', call_id='<UUID>', tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\"), CompletionMessage(role='assistant', content='It seems that the file \"<TEMP_FILE>\" does not exist. \\n\\nTo describe the csv file, you need to provide the actual file path or the file itself. If you are running this code in a notebook, you can use the `upload` button to upload the file. If you are running this code in a script, you need to provide the file path.\\n\\nHere is an example of how you can describe the csv file if you have it in the same directory as your script:\\n\\n```python\\nimport pandas as pd\\n\\n# Load data\\ndf = pd.read_csv(\\'inflation.csv\\')\\n\\n# Print summary of the data\\nprint(df.head())  # Print the first few rows of the data\\nprint(df.info())  # Print information about the data\\nprint(df.describe())  # Print summary statistics about the data\\n```\\n\\nThis will print the first few rows of the data, information about the data, and summary statistics about the data.', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[]), UserMessage(role='user', content='Plot average yearly inflation as a time series', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "started"
+            },
+            "tool_call": "",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": " data\ndf = pd.read_csv('inflation.csv')\n\n#",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": " Convert date column to datetime\ndf['date'] = pd.to",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "_datetime(df['date'])\n\n# Group by year and calculate average",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": " inflation\naverage_inflation = df.groupby(df['date'].",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "dt.year)['inflation'].mean()\n\n# Plot",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": " time series\nplt.figure(figsize=(10,6))\nplt",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": ".plot(average_inflation.index, average_inflation.values, marker='",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "o')\nplt.title('Average Yearly Inflation')\nplt.xlabel",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "('Year')\nplt.ylabel('",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "Average Inflation')\nplt.grid(True)\n",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "plt.show()",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "succeeded"
+            },
+            "tool_call": {
+              "arguments": {
+                "code": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load data\ndf = pd.read_csv('inflation.csv')\n\n# Convert date column to datetime\ndf['date'] = pd.to_datetime(df['date'])\n\n# Group by year and calculate average inflation\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\n\n# Plot time series\nplt.figure(figsize=(10,6))\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\nplt.title('Average Yearly Inflation')\nplt.xlabel('Year')\nplt.ylabel('Average Inflation')\nplt.grid(True)\nplt.show()"
+              },
+              "call_id": "81d7a873-376b-438e-916d-d5454e6ed09e",
+              "tool_name": {
+                "__enum__": "BuiltinTool",
+                "value": "code_interpreter"
+              }
+            },
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv, can you describe it?', context=None), ToolResponseMessage(role='tool', call_id='', tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, content=[TextContentItem(type='text', text='# User provided a file accessible to you at \"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.')]), CompletionMessage(role='assistant', content='', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[ToolCall(call_id='<UUID>', tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, arguments={'code': 'import pandas as pd\\n# Load data\\ndf = pd.read_csv(\"<TEMP_FILE>\")\\n# Rows\\nprint(\"Number of rows and columns in the data:\", df.shape)\\n# Columns\\nprint(\"Columns of the data are:\", len(df.columns))\\n# Column names\\nprint(\"Columns of the data are:\", df.columns)\\n# Column dtypes\\nprint(\"Datatype of the columns are:\", df.dtypes)'})]), ToolResponseMessage(role='tool', call_id='<UUID>', tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "It",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " seems that the file \"/var/folders/c",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "z/vyh7y1d11xg881lsx",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "sshnc5c0000gn/T/tmp",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "n9tlgts1/ciW",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "Y4iENinflation.csv\"",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " does not exist. \n\nTo describe the",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " csv file, you need to provide the",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " actual file path or the file itself. If you are running this",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " code in a notebook, you can use the `upload",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "` button to upload the file. If",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " you are running this code in a script",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": ", you need to provide the file path.\n\nHere is an",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " example of how you can describe the csv file if you have it",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " in the same directory as your script:\n\n```python\nimport pandas",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " as pd\n\n# Load data\ndf = pd.read_csv('",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "inflation.csv')\n\n# Print summary of the data\nprint",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "(df.head())  # Print the first",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " few rows of the data\nprint(df.info())  # Print",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " information about the data\nprint(df.describe())  #",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " Print summary statistics about the data\n```\n\n",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "This will print the first few rows of the data,",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " information about the data, and",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " summary statistics about the data.",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv, can you describe it?', context=None), ToolResponseMessage(role='tool', call_id='', tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, content=[TextContentItem(type='text', text='# User provided a file accessible to you at \"<TEMP_FILE>\"\\nYou can use code_interpreter to load and inspect it.')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "started"
+            },
+            "tool_call": "",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "import pandas as pd\n# Load data",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "\ndf = pd.read_csv(\"/var/folders/cz",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "/vyh7y1d11xg881lsx",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "sshnc5c0000gn/T/tmpn",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "9tlgts1/ciWY4i",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "ENinflation.csv\")\n# Rows\n",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "print(\"Number of rows and columns",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": " in the data:\", df.shape)\n",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "# Columns\nprint(\"Columns of the",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": " data are:\", len(df.columns))\n#",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": " Column names\nprint(\"Columns of the",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": " data are:\", df.columns)\n# Column dt",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "ypes\nprint(\"Datatype of the",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": " columns are:\", df.dtypes)",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "succeeded"
+            },
+            "tool_call": {
+              "arguments": {
+                "code": "import pandas as pd\n# Load data\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmpn9tlgts1/ciWY4iENinflation.csv\")\n# Rows\nprint(\"Number of rows and columns in the data:\", df.shape)\n# Columns\nprint(\"Columns of the data are:\", len(df.columns))\n# Column names\nprint(\"Columns of the data are:\", df.columns)\n# Column dtypes\nprint(\"Datatype of the columns are:\", df.dtypes)"
+              },
+              "call_id": "02669cd2-ac2f-481b-ab74-7b1671e15947",
+              "tool_name": {
+                "__enum__": "BuiltinTool",
+                "value": "code_interpreter"
+              }
+            },
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None), CompletionMessage(role='assistant', content='', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[ToolCall(call_id='<UUID>', tool_name='knowledge_search', arguments={'query': 'Torchtune documentation'})]), ToolResponseMessage(role='tool', call_id='<UUID>', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:f4fd3\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\"json\",\\n        data_files=\"data/my_data.json\",\\n        split=\"train\",\\n        conversation_column=\"dialogue\",\\n        conversation_style=\"sharegpt\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we\\'re fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral\\'s :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n'), TextContentItem(type='text', text=\"Result 2:\\nDocument_id:cbc88\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\"), TextContentItem(type='text', text='Result 3:\\nDocument_id:8892b\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='Result 4:\\nDocument_id:cbc88\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune\\'s `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:8892b\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we\\'ve enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')]), CompletionMessage(role='assistant', content='You can use the following function call to answer the user\\'s question:\\n\\n{\"type\": \"function\", \"name\": \"knowledge_search\", \"parameters\": {\"query\": \"How to fine-tune a Llama2 model with LoRA in torchtune\"}}', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[]), UserMessage(role='user', content='Tell me how to use LoRA', context=None), CompletionMessage(role='assistant', content='', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[ToolCall(call_id='<UUID>', tool_name='knowledge_search', arguments={'query': 'How to use LoRA'})]), ToolResponseMessage(role='tool', call_id='<UUID>', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text=\"Result 1:\\nDocument_id:cbc88\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\"), TextContentItem(type='text', text='Result 2:\\nDocument_id:cbc88\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune\\'s `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 3:\\nDocument_id:8892b\\nContent:  with training with LoRA quickly,\\njust specify any config with ``_lora`` in its name, e.g:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device\\n\\n\\nThere are two sets of parameters to customize LoRA to suit your needs. Firstly, the parameters which control\\nwhich linear layers LoRA should be applied to in the model:\\n\\n* ``lora_attn_modules: List[str]`` accepts a list of strings specifying which layers of the model to apply\\n  LoRA to:\\n\\n  * ``q_proj`` applies LoRA to the query projection layer.\\n  * ``k_proj`` applies LoRA to the key projection layer.\\n  * ``v_proj`` applies LoRA to the value projection layer.\\n  * ``output_proj`` applies LoRA to the attention output projection layer.\\n\\n  Whilst adding more layers to be fine-tuned may improve model accuracy,\\n  this will come at the cost of increased memory usage and reduced training speed.\\n\\n* ``apply_lora_to_mlp: Bool`` applies LoRA to the MLP in each transformer layer.\\n* ``apply_lora_to_output: Bool`` applies LoRA to the model\\'s final output projection.\\n  This is usually a projection to vocabulary space (e.g. in language models), but\\n  other modelling tasks may have different projections - classifier models will project\\n  to the number of classes, for example\\n\\n.. note::\\n\\n  Models which use tied embeddings (such as Gemma and Qwen2 1.5B and 0.5B) for the\\n  final output projection do not support ``apply_lora_to_output``.\\n\\nThese are all specified under the ``model`` flag or config entry, i.e:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device  \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\",\"output_proj\"]\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.llama3.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    model.lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\",\"output_proj\"]\\n\\nSecondly, parameters which control the scale of the impact of LoRA on the model:\\n\\n* ``lora_rank: int`` affects the scale of\\n'), TextContentItem(type='text', text='Result 4:\\nDocument_id:cbc88\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet\\'s take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\"q_proj\", \"k_proj\", \"v_proj\", and \"output_proj\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\"q_proj\", \"v_proj\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2_7b <torchtune.models.llama2.lora_llama2_7b>` alone will not handle the definition of which parameters are trainable.\\n    See :ref:`below<setting_trainable_params>` for how to do this.\\n\\nLet\\'s inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n  # Print the first layer\\'s self-attention in the usual Llama2 model\\n  >>> print(base_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n    (pos_embeddings): RotaryPositionalEmbeddings()\\n  )\\n\\n  # Print the same for Llama2 with LoRA weights\\n  >>> print(lora_model.layers[0].attn)\\n  MultiHeadAttention(\\n    (q_proj): LoRALinear(\\n      (dropout): Dropout(p=0.0, inplace=False)\\n     \\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:9dcb7\\nContent: ora_finetune_label>`.\\nFor more on QLoRA in torchtune, see our :ref:`QLoRA Tutorial <qlora_finetune_label>`.\\n\\nLet\\'s take a look at how we can fine-tune Llama3-8B-Instruct with LoRA on a single device using torchtune. In this example, we will fine-tune\\nfor one epoch on a common instruct dataset for illustrative purposes. The basic command for a single-device LoRA fine-tune is\\n\\n.. code-block:: bash\\n\\n    tune run lora_finetune_single_device --config llama3/8B_lora_single_device\\n\\n.. note::\\n    To see a full list of recipes and their corresponding configs, simply run ``tune ls`` from the command line.\\n\\nWe can also add :ref:`command-line overrides <cli_override>` as needed, e.g.\\n\\n.. code-block:: bash\\n\\n    tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n        checkpointer.checkpoint_dir=<checkpoint_dir> \\\\\\n        tokenizer.path=<checkpoint_dir>/tokenizer.model \\\\\\n        checkpointer.output_dir=<checkpoint_dir>\\n\\nThis will load the Llama3-8B-Instruct checkpoint and tokenizer from ``<checkpoint_dir>`` used in the :ref:`tune download <tune_download_label>` command above,\\nthen save a final checkpoint in the same directory following the original format. For more details on the\\ncheckpoint formats supported in torchtune, see our :ref:`checkpointing deep-dive <understand_checkpointer>`.\\n\\n.. note::\\n    To see the full set of configurable parameters for this (and other) configs we can use :ref:`tune cp <tune_cp_cli_label>` to copy (and modify)\\n    the default config. :ref:`tune cp <tune_cp_cli_label>` can be used with recipe scripts too, in case you want to make more custom changes\\n    that cannot be achieved by directly modifying existing configurable parameters. For more on :ref:`tune cp <tune_cp_cli_label>` see the section on\\n    :ref:`modifying configs <tune_cp_label>` in our \":ref:`finetune_llama_label`\" tutorial.\\n\\nOnce training is complete, the model checkpoints will be saved and their locations will be logged. For\\nLoRA fine-tuning, the final checkpoint will contain the merged weights, and a copy of just the (much smaller) LoRA weights\\nwill\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "To",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " use LoRA, you can follow these steps:\n\n1.",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "  Install the necessary packages, including torchtune and the L",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "lama2 model.\n2.  Load the Llama2 model",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " and specify which layers to apply LoRA to.\n3. ",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " Define the LoRA parameters, such as the rank and alpha values",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": ".\n4.  Train the model using the LoRA fine-t",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "uning recipe in torchtune.\n\nHere is",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " an example of how to use",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " LoRA with the Llama2 model",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": ":\n\n```python\nfrom torchtune.models.llama2 import",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " llama2_7b, lora_llama",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "2_7b\n\n# Build Llama",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "2 without any LoRA layers\nbase_model = llama2_",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "7b()\n\n# The default settings for lora_llama2",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "_7b will match those for llama2_7",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "b\n# We just need to define which layers we want",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " LoRA applied to.\n# Within each self",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "-attention, we can choose from [\"q_proj\", \"",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "k_proj\", \"v_proj\", and \"output_proj",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "\"].\n# We can also set apply_lora_to_m",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "lp=True or apply_lora_to_output=True",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " to apply LoRA to other",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " linear\n# layers outside of the self-attention.\nl",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "ora_model = lora_llama",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "2_7b(lora_at",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "tn_modules=[\"q_proj\", \"",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "v_proj\"])\n\n# Print the first",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " layer's self-attention in the",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " usual Llama2 model\nprint",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "(base_model.layers[0].attn)\n# Print the",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " same for Llama2 with LoRA",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " weights\nprint(lora_model.layers[0].attn",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": ")\n```\n\nThis code will load the Llama2 model",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " and apply LoRA to the specified layers. The `l",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "ora_attn_modules` parameter is used to specify which layers",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " to apply LoRA to, and the `apply_lora",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "_to_mlp` and `apply_lora_to_output`",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " parameters can be used to apply LoRA",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " to other linear layers outside of the self",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "-attention.\n\nYou can also use the",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " `tune run` command to fine-tune the model",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " using the LoRA fine-tuning recipe in torchtune.",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " For example:\n\n```bash\ntune",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " run lora_finetune_single_device --config llama3",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "/8B_lora_single_device\n``",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "`\n\nThis will run the LoRA fine-tuning recipe on the",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " Llama3-8B-Instruct model using the default configuration",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": ". You can modify the configuration by adding command-line overrides, such",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " as:\n\n```bash\ntune run",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "out_of_tokens"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None), CompletionMessage(role='assistant', content='', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[ToolCall(call_id='<UUID>', tool_name='knowledge_search', arguments={'query': 'Torchtune documentation'})]), ToolResponseMessage(role='tool', call_id='<UUID>', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:f4fd3\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\"json\",\\n        data_files=\"data/my_data.json\",\\n        split=\"train\",\\n        conversation_column=\"dialogue\",\\n        conversation_style=\"sharegpt\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we\\'re fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral\\'s :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n'), TextContentItem(type='text', text=\"Result 2:\\nDocument_id:cbc88\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\"), TextContentItem(type='text', text='Result 3:\\nDocument_id:8892b\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='Result 4:\\nDocument_id:cbc88\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune\\'s `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:8892b\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we\\'ve enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')]), CompletionMessage(role='assistant', content='You can use the following function call to answer the user\\'s question:\\n\\n{\"type\": \"function\", \"name\": \"knowledge_search\", \"parameters\": {\"query\": \"How to fine-tune a Llama2 model with LoRA in torchtune\"}}', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[]), UserMessage(role='user', content='Tell me how to use LoRA', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "{\"",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "type\": \"function\", \"",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "name\": \"knowledge_search\", \"parameters\":",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " {\"query\": \"How to use LoRA\"}}",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "succeeded"
+            },
+            "tool_call": {
+              "arguments": {
+                "query": "How to use LoRA"
+              },
+              "call_id": "64448cc3-c11a-4bae-bdcc-e5b8d13b888f",
+              "tool_name": "knowledge_search"
+            },
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None), CompletionMessage(role='assistant', content='', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[ToolCall(call_id='<UUID>', tool_name='knowledge_search', arguments={'query': 'Torchtune documentation'})]), ToolResponseMessage(role='tool', call_id='<UUID>', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:f4fd3\\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n    from torchtune.datasets import chat_dataset\\n    from torchtune.models.llama3 import llama3_tokenizer\\n\\n    tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\\n    ds = chat_dataset(\\n        tokenizer=tokenizer,\\n        source=\"json\",\\n        data_files=\"data/my_data.json\",\\n        split=\"train\",\\n        conversation_column=\"dialogue\",\\n        conversation_style=\"sharegpt\",\\n    )\\n\\n.. code-block:: yaml\\n\\n    # In config\\n    tokenizer:\\n      _component_: torchtune.models.llama3.llama3_tokenizer\\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n    dataset:\\n      _component_: torchtune.datasets.chat_dataset\\n      source: json\\n      data_files: data/my_data.json\\n      split: train\\n      conversation_column: dialogue\\n      conversation_style: sharegpt\\n\\n.. note::\\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\\n    Dataset classes and they will honor them. This is useful for common parameters\\n    such as specifying the data split with :code:`split` or configuration with\\n    :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we\\'re fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral\\'s :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations <https://\\n'), TextContentItem(type='text', text=\"Result 2:\\nDocument_id:cbc88\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\\n\\n.. grid:: 2\\n\\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n      * What LoRA is and how it saves memory during finetuning\\n      * An overview of LoRA components in torchtune\\n      * How to run a LoRA finetune using torchtune\\n      * How to experiment with different LoRA configurations\\n\\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n      * Be familiar with :ref:`torchtune<overview_label>`\\n      * Make sure to :ref:`install torchtune<install_label>`\\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW <https://py\\n\"), TextContentItem(type='text', text='Result 3:\\nDocument_id:8892b\\nContent: ` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='Result 4:\\nDocument_id:cbc88\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n    of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune\\'s `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n    for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n    and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n  # Model Arguments\\n  model:\\n    _component_: lora_llama2_7b\\n    lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n    lora_rank: 8\\n    lora_alpha: 16\\n  ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:8892b\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n  model.apply_lora_to_mlp=True \\\\\\n  model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\\\\n  model.lora_rank=16 \\\\\\n  model.lora_alpha=32 \\\\\\n  model.use_dora=True \\\\\\n  model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n  model:\\n    _component_: torchtune.models.lora_llama3_8b\\n    apply_lora_to_mlp: True\\n    lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\\n    lora_rank: 16\\n    lora_alpha: 32\\n    use_dora: True\\n    quantize_base: True\\n\\n\\n.. note::\\n\\n   Under the hood, we\\'ve enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "You",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " can use the following function call to answer",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " the user's question:\n\n{\"type\": \"function\", \"",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "name\": \"knowledge_search\", \"parameters\":",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " {\"query\": \"How to fine-tune a Llama2",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " model with LoRA in torchtune\"}}",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "started"
+            },
+            "tool_call": "",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "{\"type\": \"function\", \"name\":",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": " \"knowledge_search\", \"parameters\": {\"",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "query\": \"Torchtune documentation\"}}",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "succeeded"
+            },
+            "tool_call": {
+              "arguments": {
+                "query": "Torchtune documentation"
+              },
+              "call_id": "376cc471-66f6-4560-9c7a-51958cde4927",
+              "tool_name": "knowledge_search"
+            },
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Instead of the standard multi-head attention, what attention type does Llama3-8B use?', context=None), CompletionMessage(role='assistant', content='', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[ToolCall(call_id='<UUID>', tool_name='knowledge_search', arguments={'query': 'Llama3-8B attention type'})]), ToolResponseMessage(role='tool', call_id='<UUID>', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:num-1\\nContent:  3 <https://llama.meta.com/llama3>`_ is a new family of models released by Meta AI that improves upon the performance of the Llama2 family\\nof models across a `range of different benchmarks <https://huggingface.co/meta-llama/Meta-Llama-3-8B#base-pretrained-models>`_.\\nCurrently there are two different sizes of Meta Llama 3: 8B and 70B. In this tutorial we will focus on the 8B size model.\\nThere are a few main changes between Llama2-7B and Llama3-8B models:\\n\\n- Llama3-8B uses `grouped-query attention <https://arxiv.org/abs/2305.13245>`_ instead of the standard multi-head attention from Llama2-7B\\n- Llama3-8B has a larger vocab size (128,256 instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken <https://github.com/openai/tiktoken>`_ instead of `sentencepiece <https://github.com/google/sentencepiece>`_)\\n- Llama3-\\n'), TextContentItem(type='text', text=\"Result 2:\\nDocument_id:num-1\\nContent:  instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken <https://github.com/openai/tiktoken>`_ instead of `sentencepiece <https://github.com/google/sentencepiece>`_)\\n- Llama3-8B uses a larger intermediate dimension in its MLP layers than Llama2-7B\\n- Llama3-8B uses a higher base value to calculate theta in its `rotary positional embeddings <https://arxiv.org/abs/2104.09864>`_\\n\\n|\\n\\nGetting access to Llama3-8B-Instruct\\n------------------------------------\\n\\nFor this tutorial, we will be using the instruction-tuned version of Llama3-8B. First, let's download the model from Hugging Face. You will need to follow the instructions\\non the `official Meta page <https://github.com/meta-llama/llama3/blob/main/README.md>`_ to gain access to the model.\\nNext, make sure you grab your Hugging Face token from `here <https://huggingface.co/settings/tokens>`_.\\n\\n\\n.. code-block:: bash\\n\\n    tune download meta-llama/Meta-Llama-3\\n\"), TextContentItem(type='text', text=\"Result 3:\\nDocument_id:num-0\\nContent: :`download Llama3 Instruct weights <llama3_label>`\\n\\n\\nTemplate changes from Llama2 to Llama3\\n--------------------------------------\\n\\nThe Llama2 chat model requires a specific template when prompting the pre-trained\\nmodel. Since the chat model was pretrained with this prompt template, if you want to run\\ninference on the model, you'll need to use the same template for optimal performance\\non chat data. Otherwise, the model will just perform standard text completion, which\\nmay or may not align with your intended use case.\\n\\nFrom the `official Llama2 prompt\\ntemplate guide <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-2>`_\\nfor the Llama2 chat model, we can see that special tags are added:\\n\\n.. code-block:: text\\n\\n    <s>[INST] <<SYS>>\\n    You are a helpful, respectful, and honest assistant.\\n    <</SYS>>\\n\\n    Hi! I am a human. [/INST] Hello there! Nice to meet you! I'm Meta AI, your friendly AI assistant </s>\\n\\nLlama3 Instruct `overhauled <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3>`\\n\"), TextContentItem(type='text', text='Result 4:\\nDocument_id:num-0\\nContent: \\'m Meta AI, your friendly AI assistant<|eot_id|>\\n\\nThe tags are entirely different, and they are actually encoded differently than in\\nLlama2. Let\\'s walk through tokenizing an example with the Llama2 template and the\\nLlama3 template to understand how.\\n\\n.. note::\\n    The Llama3 Base model uses a `different prompt template\\n    <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3>`_ than Llama3 Instruct\\n    because it has not yet been instruct tuned and the extra special tokens are untrained. If you\\n    are running inference on the Llama3 Base model without fine-tuning we recommend the base\\n    template for optimal performance. Generally, for instruct and chat data, we recommend using\\n    Llama3 Instruct with its prompt template. The rest of this tutorial assumes you are using\\n    Llama3 Instruct.\\n\\n.. _prompt_template_vs_special_tokens:\\n\\nTokenizing prompt templates & special tokens\\n--------------------------------------------\\n\\nLet\\'s say I have a sample of a single user-assistant turn accompanied with a system\\nprompt:\\n\\n.. code-block:: python\\n\\n    sample = [\\n        {\\n            \"role\": \"system\",\\n            \"\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:num-3\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet\\'s take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\"q_proj\", \"k_proj\", \"v_proj\", and \"output_proj\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\"q_proj\", \"v_proj\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='insert_into_memory', description='Insert documents into memory', parameters={}), ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "L",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "lama3-8B uses grouped",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "-query attention instead of the standard multi-head",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " attention from Llama2-7B.",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Instead of the standard multi-head attention, what attention type does Llama3-8B use?', context=None), CompletionMessage(role='assistant', content='', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[ToolCall(call_id='<UUID>', tool_name='knowledge_search', arguments={'query': 'Llama3-8B attention type'})]), ToolResponseMessage(role='tool', call_id='<UUID>', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:num-1\\nContent:  3 <https://llama.meta.com/llama3>`_ is a new family of models released by Meta AI that improves upon the performance of the Llama2 family\\nof models across a `range of different benchmarks <https://huggingface.co/meta-llama/Meta-Llama-3-8B#base-pretrained-models>`_.\\nCurrently there are two different sizes of Meta Llama 3: 8B and 70B. In this tutorial we will focus on the 8B size model.\\nThere are a few main changes between Llama2-7B and Llama3-8B models:\\n\\n- Llama3-8B uses `grouped-query attention <https://arxiv.org/abs/2305.13245>`_ instead of the standard multi-head attention from Llama2-7B\\n- Llama3-8B has a larger vocab size (128,256 instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken <https://github.com/openai/tiktoken>`_ instead of `sentencepiece <https://github.com/google/sentencepiece>`_)\\n- Llama3-\\n'), TextContentItem(type='text', text=\"Result 2:\\nDocument_id:num-1\\nContent:  instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken <https://github.com/openai/tiktoken>`_ instead of `sentencepiece <https://github.com/google/sentencepiece>`_)\\n- Llama3-8B uses a larger intermediate dimension in its MLP layers than Llama2-7B\\n- Llama3-8B uses a higher base value to calculate theta in its `rotary positional embeddings <https://arxiv.org/abs/2104.09864>`_\\n\\n|\\n\\nGetting access to Llama3-8B-Instruct\\n------------------------------------\\n\\nFor this tutorial, we will be using the instruction-tuned version of Llama3-8B. First, let's download the model from Hugging Face. You will need to follow the instructions\\non the `official Meta page <https://github.com/meta-llama/llama3/blob/main/README.md>`_ to gain access to the model.\\nNext, make sure you grab your Hugging Face token from `here <https://huggingface.co/settings/tokens>`_.\\n\\n\\n.. code-block:: bash\\n\\n    tune download meta-llama/Meta-Llama-3\\n\"), TextContentItem(type='text', text=\"Result 3:\\nDocument_id:num-0\\nContent: :`download Llama3 Instruct weights <llama3_label>`\\n\\n\\nTemplate changes from Llama2 to Llama3\\n--------------------------------------\\n\\nThe Llama2 chat model requires a specific template when prompting the pre-trained\\nmodel. Since the chat model was pretrained with this prompt template, if you want to run\\ninference on the model, you'll need to use the same template for optimal performance\\non chat data. Otherwise, the model will just perform standard text completion, which\\nmay or may not align with your intended use case.\\n\\nFrom the `official Llama2 prompt\\ntemplate guide <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-2>`_\\nfor the Llama2 chat model, we can see that special tags are added:\\n\\n.. code-block:: text\\n\\n    <s>[INST] <<SYS>>\\n    You are a helpful, respectful, and honest assistant.\\n    <</SYS>>\\n\\n    Hi! I am a human. [/INST] Hello there! Nice to meet you! I'm Meta AI, your friendly AI assistant </s>\\n\\nLlama3 Instruct `overhauled <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3>`\\n\"), TextContentItem(type='text', text='Result 4:\\nDocument_id:num-0\\nContent: \\'m Meta AI, your friendly AI assistant<|eot_id|>\\n\\nThe tags are entirely different, and they are actually encoded differently than in\\nLlama2. Let\\'s walk through tokenizing an example with the Llama2 template and the\\nLlama3 template to understand how.\\n\\n.. note::\\n    The Llama3 Base model uses a `different prompt template\\n    <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3>`_ than Llama3 Instruct\\n    because it has not yet been instruct tuned and the extra special tokens are untrained. If you\\n    are running inference on the Llama3 Base model without fine-tuning we recommend the base\\n    template for optimal performance. Generally, for instruct and chat data, we recommend using\\n    Llama3 Instruct with its prompt template. The rest of this tutorial assumes you are using\\n    Llama3 Instruct.\\n\\n.. _prompt_template_vs_special_tokens:\\n\\nTokenizing prompt templates & special tokens\\n--------------------------------------------\\n\\nLet\\'s say I have a sample of a single user-assistant turn accompanied with a system\\nprompt:\\n\\n.. code-block:: python\\n\\n    sample = [\\n        {\\n            \"role\": \"system\",\\n            \"\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:num-3\\nContent:  LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet\\'s take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n  # Build Llama2 without any LoRA layers\\n  base_model = llama2_7b()\\n\\n  # The default settings for lora_llama2_7b will match those for llama2_7b\\n  # We just need to define which layers we want LoRA applied to.\\n  # Within each self-attention, we can choose from [\"q_proj\", \"k_proj\", \"v_proj\", and \"output_proj\"].\\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n  # layers outside of the self-attention.\\n  lora_model = lora_llama2_7b(lora_attn_modules=[\"q_proj\", \"v_proj\"])\\n\\n.. note::\\n\\n    Calling :func:`lora_llama_2\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "The",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " attention type used by Llama3-8B is grouped-query",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " attention.",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Instead of the standard multi-head attention, what attention type does Llama3-8B use?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='insert_into_memory', description='Insert documents into memory', parameters={}), ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "{\n",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "    \"type\": \"function\",\n   ",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " \"name\": \"knowledge_search",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "\",\n",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "    \"parameters\": {\n        \"query\": \"Llama3-8B attention type\"\n    }\n}",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "succeeded"
+            },
+            "tool_call": {
+              "arguments": {
+                "query": "Llama3-8B attention type"
+              },
+              "call_id": "0c3a0c26-e211-4486-8efa-21e069966368",
+              "tool_name": "knowledge_search"
+            },
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Instead of the standard multi-head attention, what attention type does Llama3-8B use?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "started"
+            },
+            "tool_call": "",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "{\"type\": \"function\", \"name\":",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": " \"knowledge_search\", \"parameters\": {\"query",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "\": \"Llama3-8B attention type\"}}",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "succeeded"
+            },
+            "tool_call": {
+              "arguments": {
+                "query": "Llama3-8B attention type"
+              },
+              "call_id": "72d29867-cd40-4697-b8af-663b276e0ef0",
+              "tool_name": "knowledge_search"
+            },
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Search the web and tell me who the current CEO of Meta is.', context=None), CompletionMessage(role='assistant', content='', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[ToolCall(call_id='<UUID>', tool_name=<BuiltinTool.brave_search: 'brave_search'>, arguments={'query': 'current CEO of Meta'})]), ToolResponseMessage(role='tool', call_id='<UUID>', tool_name=<BuiltinTool.brave_search: 'brave_search'>, content='{\"query\": \"current CEO of Meta\", \"top_k\": [{\"title\": \"Executives - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/\", \"content\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer Joel Kaplan, Chief Global Affairs Officer Susan Li, Chief Financial Officer Javier Olivan, Chief Operating Officer Chris Cox, Chief Product Officer Andrew \\\\u2018Boz\\\\u2019 Bosworth, Chief Technology Officer Jennifer Newstead, Chief Legal Officer Dave Wehner, Chief Strategy Officer Will Cathcart, Head of WhatsApp Naomi Gleit, Head of Product John Hegeman, Chief Revenue Officer Adam Mosseri, Head of Instagram Erin Egan, Chief Privacy Officer, Policy Michel Protti, Chief Privacy Officer, Product Alex Schultz, Chief Marketing Officer and VP of Analytics Tom Alison, Head of Facebook Nicola Mendelsohn, Head of Global Business Group Ahmad Al-Dahle, VP and Head of GenAI at Meta Joelle Pineau, Vice President of AI Research and Head of FAIR at Meta\", \"score\": 0.8190992, \"raw_content\": null}, {\"title\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/mark-zuckerberg/\", \"content\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer | Meta Meta Quest Ray-Ban Meta Meta Horizon Meta AI Meta Verified Meta Pay Meta Horizon Workrooms Meta and you Learn about our community Shop Meta Meta Quest Meta Portal Meta Horizon Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. In October 2021, Facebook rebranded to Meta to reflect all of its products and services across its family of apps and a focus on developing social experiences for the metaverse \\\\u2014 moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. Shop Ray-Ban Meta glassesRay-Ban StoriesPrivacy informationSupported countries \\\\u00a9 2025 Meta\", \"score\": 0.79099923, \"raw_content\": null}, {\"title\": \"Zuckerberg\\'s political pivot targets Apple, puts Meta staffers on edge\", \"url\": \"https://www.cnbc.com/2025/02/14/zuckerbergs-rightward-policy-shift-hits-meta-staffers-targets-apple.html\", \"content\": \"Meta CEO Mark Zuckerberg\\'s actions to curry favor with the president have rattled employees, but people familiar with his efforts say there\\'s a clear strategy.\", \"score\": 0.77179235, \"raw_content\": null}, {\"title\": \"Meet the Executive CSuite Team of Meta (Facebook) [2025]\", \"url\": \"https://digitaldefynd.com/IQ/meet-the-executive-csuite-team-of-meta-facebook/\", \"content\": \"Harvard University Executive Programs Free Harvard University Courses As a chief financial officer of Meta, Susan Li oversees the firm\\\\u2019s finance and facilities team to keep track of the company\\\\u2019s overall financial health. The chief operating officer of Meta, Javier Olivan, oversees the firm\\\\u2019s business team, infrastructure, and other products. Andrew Bosworth, called Boz, serves as chief technology officer at Meta and is responsible for leading the firm\\\\u2019s AR/VR organization, Reality Labs. Andrew has also served as engineering director to oversee events, mobile monetization, and feed ads and as VP of ads and business platforms to lead engineering, design, analytics, and product teams. Meta\\\\u2019s c-suite team comprises experienced and diverse executives, having extensive experience in technology, finance, legal, and all major industries.\", \"score\": 0.7602419, \"raw_content\": null}, {\"title\": \"Meta to spend up to $65 billion this year to power AI goals, Zuckerberg ...\", \"url\": \"https://www.reuters.com/technology/meta-invest-up-65-bln-capital-expenditure-this-year-2025-01-24/\", \"content\": \"Meta Platforms plans to spend as much as $65 billion this year to expand its AI infrastructure, CEO Mark Zuckerberg said on Friday, aiming to bolster the company\\'s position against rivals OpenAI\", \"score\": 0.73914057, \"raw_content\": null}]}')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=<BuiltinTool.brave_search: 'brave_search'>, description='Search the web for information', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for', required=True, default=None)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "The",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " current CEO of Meta is Mark Zuckerberg.",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Search the web and tell me who the current CEO of Meta is.', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=<BuiltinTool.brave_search: 'brave_search'>, description='Search the web for information', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for', required=True, default=None)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "started"
+            },
+            "tool_call": "",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "brave_search.call(query=\"current CEO of Meta\")",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "succeeded"
+            },
+            "tool_call": {
+              "arguments": {
+                "query": "current CEO of Meta"
+              },
+              "call_id": "b7d61df7-55ee-4edc-b961-7e14596edb7d",
+              "tool_name": {
+                "__enum__": "BuiltinTool",
+                "value": "brave_search"
+              }
+            },
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='What is the boiling point of polyjuice?', context=None), CompletionMessage(role='assistant', content='', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[ToolCall(call_id='<UUID>', tool_name='get_boiling_point', arguments={'liquid_name': 'polyjuice'})]), ToolResponseMessage(role='tool', call_id='<UUID>', tool_name='get_boiling_point', content='-100')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice='get_boiling_point', tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='str', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "The",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " function `get_boiling_point`",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " is not able to find the boiling point of polyjuice as",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " it is a fictional liquid from the Harry Potter series.",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='What is the boiling point of polyjuice?', context=None), CompletionMessage(role='assistant', content='', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[ToolCall(call_id='<UUID>', tool_name='get_boiling_point', arguments={'liquid_name': 'polyjuice'})]), ToolResponseMessage(role='tool', call_id='<UUID>', tool_name='get_boiling_point', content='-100')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='str', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)}), ToolDefinition(tool_name=<BuiltinTool.brave_search: 'brave_search'>, description='Search the web for information', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for', required=True, default=None)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "The",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " function `get_boiling_point` is not",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " able to find the boiling point of poly",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "juice as it is not a real liquid. Polyjuice",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " is a magical potion from the Harry Potter",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " series.",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='What is the boiling point of polyjuice?', context=None), CompletionMessage(role='assistant', content='', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[ToolCall(call_id='<UUID>', tool_name='get_boiling_point', arguments={'liquid_name': 'polyjuice'})]), ToolResponseMessage(role='tool', call_id='<UUID>', tool_name='get_boiling_point', content='-100')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.required: 'required'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='str', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "The",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " function `get_boiling_point` is not",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " able to find the boiling point of polyjuice as it is",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " not a real liquid. Polyjuice is",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " a magical potion from the Harry Potter series.",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='What is the boiling point of polyjuice?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice='get_boiling_point', tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='str', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "started"
+            },
+            "tool_call": "",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "{\"type\": \"function\", \"name\": \"get_bo",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "iling_point\", \"parameters\": {\"liquid",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "_name\": \"polyjuice\"}}",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "succeeded"
+            },
+            "tool_call": {
+              "arguments": {
+                "liquid_name": "polyjuice"
+              },
+              "call_id": "490c45b2-2a13-4ee1-9e37-711fabdbcc88",
+              "tool_name": "get_boiling_point"
+            },
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='What is the boiling point of polyjuice?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='str', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)}), ToolDefinition(tool_name=<BuiltinTool.brave_search: 'brave_search'>, description='Search the web for information', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for', required=True, default=None)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "started"
+            },
+            "tool_call": "",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "{\"type\": \"function\", \"name\": \"get",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "_boiling_point\", \"parameters\": {\"liquid_name\": \"polyjuice",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "\"}}",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "succeeded"
+            },
+            "tool_call": {
+              "arguments": {
+                "liquid_name": "polyjuice"
+              },
+              "call_id": "b5f6f475-f1ed-4916-9959-405e72ca0c1d",
+              "tool_name": "get_boiling_point"
+            },
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='What is the boiling point of polyjuice?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.none: 'none'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='str', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "I",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " couldn't find any information on the boiling point of Polyjuice",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": ". Polyjuice is a magical potion in",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " the Harry Potter series that allows the drinker to transform into",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " someone else. It's not a physical substance with a boiling point",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": ". If you have any other questions, I'd be happy to",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " help.",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='What is the boiling point of polyjuice?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.required: 'required'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='str', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "started"
+            },
+            "tool_call": "",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "{\"type\": \"function\", \"name\": \"",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "get_boiling_point\", \"parameters\": {\"liquid_name\": \"",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "polyjuice\"}}",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "succeeded"
+            },
+            "tool_call": {
+              "arguments": {
+                "liquid_name": "polyjuice"
+              },
+              "call_id": "3e1a2cdc-46c3-4f2f-9fca-874fdea1700c",
+              "tool_name": "get_boiling_point"
+            },
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Write code and execute it to find the answer for: What is the 100th prime number?', context=None), CompletionMessage(role='assistant', content='', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[ToolCall(call_id='<UUID>', tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, arguments={'code': 'def is_prime(n):\\n    if n <= 1:\\n        return False\\n    if n <= 3:\\n        return True\\n    if n % 2 == 0 or n % 3 == 0:\\n        return False\\n    i = 5\\n    while i * i <= n:\\n        if n % i == 0 or n % (i + 2) == 0:\\n            return False\\n        i += 6\\n    return True\\n\\ndef get_nth_prime(n):\\n    count = 0\\n    num = 2\\n    while True:\\n        if is_prime(num):\\n            count += 1\\n            if count == n:\\n                return num\\n        num += 1\\n\\nprint(get_nth_prime(100))'})]), ToolResponseMessage(role='tool', call_id='<UUID>', tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n  line 5, in <module>\\n    from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "The",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " 100th prime number is 541.",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Write code and execute it to find the answer for: What is the 100th prime number?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "started"
+            },
+            "tool_call": "",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "def is_prime(n):\n    if n",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": " <= 1:\n        return False",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "\n    if n <= 3:\n        return True\n   ",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": " if n % 2 == 0",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": " or n % 3 ==",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": " 0:\n        return False\n   ",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": " i = 5\n    while i * i <=",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": " n:\n        if n % i == ",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "0 or n % (i + 2) ==",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": " 0:\n            return False\n        i",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": " += 6\n    return",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": " True\n\ndef get_nth_prime(n):\n    count = 0",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "\n    num = 2\n   ",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": " while True:\n        if is_prime(num",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "):\n            count += 1\n            if count == n:\n",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "                return num\n        num += 1\n\nprint",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "(get_nth_prime(100))",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "succeeded"
+            },
+            "tool_call": {
+              "arguments": {
+                "code": "def is_prime(n):\n    if n <= 1:\n        return False\n    if n <= 3:\n        return True\n    if n % 2 == 0 or n % 3 == 0:\n        return False\n    i = 5\n    while i * i <= n:\n        if n % i == 0 or n % (i + 2) == 0:\n            return False\n        i += 6\n    return True\n\ndef get_nth_prime(n):\n    count = 0\n    num = 2\n    while True:\n        if is_prime(num):\n            count += 1\n            if count == n:\n                return num\n        num += 1\n\nprint(get_nth_prime(100))"
+              },
+              "call_id": "aff8c2d2-6609-4398-8773-1e4075964691",
+              "tool_name": {
+                "__enum__": "BuiltinTool",
+                "value": "code_interpreter"
+              }
+            },
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='when was Perplexity the company founded?', context=None), CompletionMessage(role='assistant', content='', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[ToolCall(call_id='<UUID>', tool_name='knowledge_search', arguments={'query': 'Perplexity company founding date'})]), ToolResponseMessage(role='tool', call_id='<UUID>', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n    Konwinski was among the founding team at Databricks.\\n    Yarats, the CTO, was an AI research scientist at Meta.\\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='Result 2:\\nDocument_id:perpl\\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='Result 3:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')]), CompletionMessage(role='assistant', content='', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[ToolCall(call_id='<UUID>', tool_name='knowledge_search', arguments={'query': 'Perplexity company founding date'})]), ToolResponseMessage(role='tool', call_id='<UUID>', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n    Konwinski was among the founding team at Databricks.\\n    Yarats, the CTO, was an AI research scientist at Meta.\\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='Result 2:\\nDocument_id:perpl\\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='Result 3:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)}), ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "Per",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "plexity the company was founded in 2022.",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='when was Perplexity the company founded?', context=None), CompletionMessage(role='assistant', content='', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[ToolCall(call_id='<UUID>', tool_name='knowledge_search', arguments={'query': 'Perplexity company founding date'})]), ToolResponseMessage(role='tool', call_id='<UUID>', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n    Konwinski was among the founding team at Databricks.\\n    Yarats, the CTO, was an AI research scientist at Meta.\\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='Result 2:\\nDocument_id:perpl\\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='Result 3:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)}), ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "{\"",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "type\": \"function\", \"name\": \"knowledge_search\",",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " \"parameters\": {\"query\":",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " \"Perplexity company founding date\"}}",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "succeeded"
+            },
+            "tool_call": {
+              "arguments": {
+                "query": "Perplexity company founding date"
+              },
+              "call_id": "e4a5ff1d-ac00-4e0a-b93b-17e19fa3bc55",
+              "tool_name": "knowledge_search"
+            },
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='when was Perplexity the company founded?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)}), ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "started"
+            },
+            "tool_call": "",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "{\"type\": \"function\", \"name\": \"knowledge_search\",",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": " \"parameters\": {\"query\": \"Perplexity company founding",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": " date\"}}",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "succeeded"
+            },
+            "tool_call": {
+              "arguments": {
+                "query": "Perplexity company founding date"
+              },
+              "call_id": "6add8292-f388-4ec5-8ec5-5071c9397492",
+              "tool_name": "knowledge_search"
+            },
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='when was the nba created?', context=None), CompletionMessage(role='assistant', content='', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[ToolCall(call_id='<UUID>', tool_name='knowledge_search', arguments={'query': 'NBA creation date'})]), ToolResponseMessage(role='tool', call_id='<UUID>', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n'), TextContentItem(type='text', text='Result 2:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n    Konwinski was among the founding team at Databricks.\\n    Yarats, the CTO, was an AI research scientist at Meta.\\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='Result 3:\\nDocument_id:perpl\\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)}), ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "The",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " NBA was created on August 3, ",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "1949, with the merger of the",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " Basketball Association of America (BAA) and the National Basketball",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": " League (NBL).",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  },
+  "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='when was the nba created?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=<ToolChoice.auto: 'auto'>, tool_prompt_format=None, system_message_behavior=<SystemMessageBehavior.append: 'append'>)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=<BuiltinTool.code_interpreter: 'code_interpreter'>, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)}), ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": {
+    "chunks": [
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "start"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "started"
+            },
+            "tool_call": "",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "{\"type\": \"function\", \"name\": \"knowledge_search",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "in_progress"
+            },
+            "tool_call": "\", \"parameters\": {\"query\": \"NBA creation date\"}}",
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": null
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "parse_status": {
+              "__enum__": "ToolCallParseStatus",
+              "value": "succeeded"
+            },
+            "tool_call": {
+              "arguments": {
+                "query": "NBA creation date"
+              },
+              "call_id": "f50656dd-201d-44b0-8f9f-ca88b970b3fd",
+              "tool_name": "knowledge_search"
+            },
+            "type": "tool_call"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "progress"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      },
+      {
+        "event": {
+          "delta": {
+            "text": "",
+            "type": "text"
+          },
+          "event_type": {
+            "__enum__": "ChatCompletionResponseEventType",
+            "value": "complete"
+          },
+          "logprobs": null,
+          "stop_reason": {
+            "__enum__": "StopReason",
+            "value": "end_of_turn"
+          }
+        },
+        "metrics": null
+      }
+    ],
+    "type": "generator"
+  }
+}
diff --git a/tests/client-sdk/fixtures/recorded_responses/chat_completion.pickle b/tests/client-sdk/fixtures/recorded_responses/chat_completion.pickle
new file mode 100644
index 0000000000000000000000000000000000000000..13803a2e0c77e083b58563cc29a47c7331282a1b
GIT binary patch
literal 168310
zcmeIbTaO&sl`bZ!yGQDhMwgL#Bz4b`LP@qVORCJu%(~Z9QZC-QsY6y%WVI+A7AG?@
zVrNElMMh*tL{>4ohsGHg5)4cMXMj`lG=4B(_+cId`M}nLf#a9+M+^go{bblb!QWbY
z$0cu7NwUc(QqiO7u8hl$wfA0o?RDGV`uA`C^Z)qWFMo;td2(9%P3tRP`ckRb6QN#q
z9KEM6-==?;S1Psg=GF2oHwgWK5f+!Uhxdm;D0+895a?Y|@;yg1iUD84UBh!j;f9Uk
zZ@q!0`$E$-OE~?`z|r&|u!B%{!{XwS_HiKm(Iy7Cl5XGw`$%X#Y9K&^!Zm~zT6)M|
z&}}!sFQFZKzPU|baJ=Z<$6;g7bH(DL#pc6O(HBAAa|6-rcz#bO`uXOPRw@R1ukYAy
zx7pWyy%*rteRbt7Unrr=dMLWX#=h6T8{NCFE|$V!9~}?9ezT8PF(Q3|#|OS>R4dhL
zwT5eS55Kn1-=6-s8G47p4I0%Y%@=(U+M(^a&AxDTCmc3vmFgm%16iRj^d6er_XmQn
zgr4U#F|ZxGOPBHM8viKC8<uAqqOrr*`R|H82)%8sNdGBbAv*AW-}id`Pz?i*(2^!+
zt=W@P*=!3-Ke9c)u`@EoSJmZ;-UqQ5?xOsoctsLC**vI>N15)@*gkx;_~;ajcJd|-
z-08{NUE$JzdH(6iUl`WFJq%7C;r3<bz9tyOIK&t2pklin;nVylf1RHE<QjP48oK0Q
zAU*e<nr6%n2Jad*clsYs7yp63__b!!w2iRY<X>L}kNdU}obH{xC62&ar`A_b-kaXs
z4Za^WWPO9^U*MOT+zY(<<1^mer=I@D)04N*8$Ue#-RV92Z3-vUPfy;T=%?ZJdY)TR
zD~LZ3V4U{F<M2AaGeU>;21XLMPOZYpmre&Kf7NXEhbC5+Z8UM&*U{a*lkcalc5LC8
zK{F6E693_}WEKAD<a??6ecwI;ztekz_Q}^AuM3`O(>Q%SAXdi62A+$X*5y6xeQMx6
z>z#kHig-G?^euV8dRvY?51IANJ?mT6chXnhvA%!uH8kJF`V4qr(AbaCPrP9j(6cx8
zEpeK(UQn%<_N>Je9gELJ$?Eh^U)GEP`rQ-07W8aKn40dITA<seR$zfsz{1Liws%?h
zb$lD^(HUwW6Oz_&gnw9h(G+3cz^JtB#O!&qE=`}alR}#7dmh$8P~l4zQxA1oQ(*ap
z`(U?wlH0I)zK%{dy-pLd)#XnnZJ#yqV(^>R8y9R(yDxO#u;k7q+RRWP_Ac7yjxCR`
z8v`H0F4V5w_(=0Q+8u1j8iXJuC<}&sce|Lo8|}W=MaOn|SC>1ptM{-kUme&^XuI@4
zrS0oS0uq?G0Q+eACvDWG2n^ru(}vb4j`0fg7Y#chYcaSFeqgl)%g_%RpBFzJ2!B}I
zX6=C0mlQGlAnzwo)Ufxtq(+Q<3sP3&79i4~ZZVVMQ+Ui5pAKwam<`fK=&g=EaFmAi
z1UeS)>DC+8o7P*{z0Vi>?@krAv*ble#y`(qLlXXPPX3an;F|7Wo@fUi-%Z~pO{Hnz
z{;Bml>(a?LFl^8pv2}-o;1utFH+moA8;JYzI^@$=d3P7)m~8sJ^}Ff2)${h`A4a#y
z-&wyq8DXpn^`XSm*(gjACxyjR>-W}wd-8P~+GlzTPUPxL+RqZW=TAP(!zA^Vaq_ij
zXh&z|<g3(`rfr^{TyAfe>ov2pQC?dUqP!|hqukzFua!4MZFOyHU6^<P+7eG9Exc1S
z`}Ne?BR%XJ=>^Brb@I0EcLzNP+yKhq$y+>XrzhW?GHj<oGCqH_{$%|@%`Oe)+rhvv
zgfP!x0?SI8c)pSRyQf-4^L`gw=H~Za+bwfROYDVF(=)M5QwA)AUrOzTO%y97m6SpP
zDW3WjTU*4pLE~Y>)+NQqjpEM7AK$uu1#QQS`cyka%&jSrU!20MMUu_ZR*X!e(Js^H
zLB$tEmoi!!#m@`8gbLf*RME4f708r0#Qle#7hnQAf*!r`SQvv427jQHS>-9<!GZ74
zJu3|R!S?d9-u4Dzg&Y(xk$THLVcL4Ri*d%rw=CiYmg!YR4!wRy_YbvyH!u!`-xhv%
zNxSI{TvPbq21~c$6L47#yJp#<qs4S+A9Xsgs(tO3o^Z5(tM?t<r8nrI4;-(pJKAoi
zgZG2@$@_x<ZQZt`$8OrL?ix0pjN5{BjV3>GU}wdxeZ+19?8)(C*DT)-z^jj=mhSqV
z3ExZnz^-fh;#fO47}Tq^&8yyL@~<uJsu#c^5?axV`@*nX4~E<@e)pHcb#2(w+MYOu
z8%H;zXKxFPW&GH6=qcI*VZlU?9*7(t@iPx>Xs*}v&@y1$E@>a4G16Ecz%UPX`+aRs
z_j)!)OW3KqQD1)PSuS4b3QR!sg?nUK4@CTdT~qIAcf0_OA&hF&KDnawE<85c4X85O
zjc(GwU9w<}xU);zUC*)c`;KkEV$<;LFpTGC@?ka1yN-CQ-G@uU2|tTSBQAoYAvH^6
z4F38qb$AyJoFUBa0LoWS+jVSM$qBhPbwjj0?@-&b4bRcR-QYeaK=+ep6h~bh1hxx%
zSo^^D27PVU>S088o$__va-e(RYxL1*96k{4?kx=tF*Xs1PaUD%MFaW(&w_c;UlG_Q
z852b1Ee(=Ds&?sSZtmW?7qvo5Coo`3V&POPo3*X#)>a+YeEpdGPvqFc9e3b3PnNXQ
z@_o)V_S{RphuJBA7O(LySOomN9AO^jUkyYMYWMVE`6?DWU*dm#==t_%p1Oje=8L}u
z-@@r)%3pVNFd|uV(X$Uc|IqimULY^iqzvISy^T47Od@Hgc>^D2U#~ZSSuxb^TV7v2
z%k4;7_|Lmo^;o?l&5unAj7aG;h{T^UBK{I<a|xy>)(f1f8ama56m#HW=<sXsfdpF2
z(O7-;YIU`u-Ey^$jL?I8#f3UNpYbEUfXW4iH_5PI^6+0@bQ}SBhK7kV?J&@whmwd3
zq>n}TN8n%lFnp|jexRen2Q#F{;e3>j=^zR&8V$p4LUK4>pVnvKvCI{ZAy?rZp=QW*
zB4=?AqkjZ*Nhr~|s`b1hdZ#Uh9!9@@U4#84;G+e#$GUH7b`S2KfE>~3_8j{VZs|dn
z)JEkNI<oz6fEj>~%%Q15FUjq#wPF0B@A$Y2r<^8^JO}$6D8|?+O2b%4SL6(ja97uX
zaw3B08k7+9Du`rP${@Ty==Z?|pvS;e!OBC&u`g7s`W8*f8jt?y0^gwZy;WbCV}&Oh
z=s^*)1+fZ&hyM2MBQMn8-RufX9;7}As{R0f^JGI(cd*&;90{{4rfloS$Hx`JZ5zC)
z6QRr1`f_b`Ia$s@8Ej~UG$CdBvN0?NmfZ=<7C5I&lUk0xQ&yCe!6c5TSYgi@R|jYX
zl5b^fc~G}>gQkWiAFe;&2h;0EV4Y(-lrEcoEKlJl7aE+`P+v?9o`1(13K1+}x<jom
zJbWAPf?;i4y>4Oki4GVMBNIRnfFpdt-z2vko`PM@v4+an6m8TtFhgr|Owk=7FcaXk
zM2Wd}e_)3~+lNb>Rt`@|DO#J0+Cv)dM^om+#IlDxfw4byO`e!rzk<Rg!ZO+~$GTS8
z2&nxs)QDc$>&U6-sQqqS2!E*iN3^6qcCnnWrm#p7La{T`jt<#^<)=+~^co}~mVvg5
zS<(zv!{BRC`jw}2HPuDLag-{idA!{59*s)}+Md#sAQSj)EMu&Dkc_DcX~j@iX#Nj{
z=xdk~<B$f9-X^D~?hfMysA&>l2^kjFerQ#+eR`Tn^kUs^g2Z2r>VmTgZAVH0k+wuG
zAYTWwKv*Kww8Zp#pwEv*Xax!h>xYRpNLoG^6`CS!8VFZ(6mneBn1~<rRO-BuPC;V>
zz6IA)2+eL2S7SYZUrmyl6Jxr2Z~52vuxUUevp>^mEiFOkkOJ|x-VS2QEPP#u4kU@;
zO+~oaX#_+z8aCl@!gd0o?L{*P$w@Xk;%HtQzIgn{g*p~}geMW^x2BtD7*wNushdR?
z;!%VmAEFarH8f5=(NbS#ubABh*?OeY$q`oD6SD`pK?^Bn1f$HWK(Y>T3AA?%aI#^5
z9FSx(+tWr8nL0}vh1<|y><rj*Mt2$7Ad;zBiuiR2G(%69R1y32^iMEQwre6*K{9__
zeymsPtF^7U#0LZ;#w4H^p9b|4e_vi-(=gl)ZCD|Rupx;m+~|A9;5g_xT@UlPl(Mh1
zN`*-nGu}wb`(i-ON|q-{*C1CQz-<>y8I}kA^7>l2?YLzFp+ATq2pgiuCT555v4b*^
zP$t1uuGY%+)p0SyGVX3X$z-?jRLgFGFM0#$Hu&$%zTu0^__UwhnpkgBwhkE+!Vul`
zZBrj&ZtNbf4^L|&_K7}^2oT;sA^&Jl^)5xSLyhbf2c{(Yx_fJE2Uyvvtyb4IrcdFc
zC&fI_eZd0V<n&2--=EFVGvc1lLjfb<-sg3oU#vKDiIq*V(nmvn;~MG<kL<oMpR#q)
z!mtl~Qi~0QpLuJRTe5(iNqZX<I%%<C=b&8<+QK$FwIU0;R;|Jyz&4Mltvl$!>iq?I
zQprD(M`vH<=qxcr)5bLVu=Mb6`1Z#yO+;y>4I`NeT)?5$C?-oUKUYYWITS3Z7!>Wl
zHsRPQVZcj>O8#RKAe$M3d|vTC`RS7%!qmm`MmQ9Uy5ufyFKGD3?r1KBO8;dJqMk3D
zH$5<V@}9QS&}xzxA_!cG92j`loB;973<)d7<3kj#Oi)P40`y3xM1GSeP<vmiYmJ6h
z&6JjJz%15W(6RzrGNt5iwCcokF+*iOUq94rt)Z<kYrzE#E8o_BhX3EmcEEn1xygW$
zNWd(D&_=e*{oQKBQ6*dZxmI7yny-GE=BsgG#dAs*?M!+9+uF|?+By>>MQNtg{ERca
z7Zzymf&0AahSt~y#GZVtiEL%pw|T&z>4K6v{hjHkU8e99WCLw6BkMBRz6ZTbDO^+P
z1MxfkBWDquSBsYcU4|%8^V=W^lPSq{A}IZj(ljvXG0nsP+=&HfNt898q5ko`(&&PL
zNw&yYZCR;m*1x8c^8Rpg8F8;oqi)vA>+996^6JXgW_fdCW2IaZtJMwUtF5kY)lRKH
zTL0<f+v7F{C#G3{LOR-;bnEmt|0hoA%v4a%P!W0Rc*i<ZNgGv@qHTo~m9$7t>z=aV
z8JqR2?Q+V7m2RK7Z{izt5!>6S(4JpxGMy>GHIL|cro~HirvEvA`AlWtnHh%vd6oq-
zZ)V^5xtTA_`xAckf@KuGaPJS2hT?-sqgi(sDA90S>dbI<q!SM575FoX<Fe?$CEkw&
zO>JBLrfca1J^Iwj<Wsbxryh#zwlRTpX5%E1<O8Dy1Y}b3URsBrE|ijIr+0#`O0guL
zRNa&vI6%6~6opPc8EbU%r;<(bM@p}e7x*)2zr>)onCb`JPIQap74=$5xfs1i_Y(n?
z+>18&1NXq?Eq<De`oa@S=j6jE!4#>T^6-&);64mOBy#$X4%~f2>P4HP^`)Li@ibwO
zL!IK@8WLziIczxWDKw-~Ahf!Jvqu9*T#>!+;B2JUKKAuKf|EE&W%nqR2C-EeLve?n
z-19<C9KGTD<PYBS5Q3%_G|DwMag-x|y9mOT6=j7yIQL2h=jLR}ksw{}369T&&V2}U
z#?i|RPkp|pO@?<U@UymBdvybFH{ZNqD%gj}exp%EE?00wrx09DeU<B}ja%VFNbPA3
zGayhhBkDfq!j5*EhjjS*n#_E@g(Lpap?_SZZ&19ic!{wLDE;aP$<|2rK?D)|f{p`0
z06d_iDMWB6u@`Aog`NEyckVWC-nxC`O5wmIj0EaCpbxN<j|QLUEyqKC5+|d$$X`Lm
ztQ}S;-y?@MxLD8z^JRRD&+rV5JKt2E6Ioz-FwM#^y!d0KP*P$Ur9L8q+FZD#-KOPD
z=iW2*r4<&;4(z7Bi3B~}Y(gNH3d=~<UG8|u0QQ4r<FicZ`>W-np|vrrnYG&E?&fCA
z2_9!k&-a4Ba*Z_u7%$bXyS%>~_WId|iNCoOI^8g+EgSZO->zmGV)@~XJ)|fj{}Snq
z74TAF5sZG%I}WmSaQ{Raw1qv+*P)zlA2jjaic?vGo{JOS8Tzx|<#b=<jgvIkMgpkW
zsRWkZ7mL{r+mA27mBqs&o&KeQ`l+(wGaa-a^Jo#s6pff8+!97j&E_K5E>q&7fn)zb
zZ~;f}^2q3=Mh@#$PO@fZ8{~P9&|spGyRP72E&}7d3uT=bMZ|j;J(IC(E>;YxT3=t^
zG7y8(je4iNx>;+N_4dYUd81uh->8Y&+Scj@Vo;xa4bX2CZ~9~bfBfdZLA2={edT<O
zVRFx&wrh^=TQhf#b7l8z=^62I%txd0Yh(>fN|0zFkdJSk7G|uXGx+!nPI@haLI2%F
z3kKz$i0AS|yzZf{DT468O<8UT_DmR0X&=ubJx%Iw6jC`u1Dz?ws0}5z;%1&gjwP>4
zWn`W<Qj#q7y4>XA_s90+*|IDT8s$NwR~foA_wT_aHx0xyGc>m;N4UOnD}2$|movgu
zmvN3+(1~J#Czhw8;q@PLqGd$ecR!m{#j_%1iV2VrDc^8Yrbk9Jd>5{|ag!$_QVN>s
z;mEoRC=c-%7~K_ZVPW4>5RnMSfu}R03_e5-s4Z~}4G;kgC8MHk8B*3DxDbMT;b>nU
zFj$nR6t8|jlL)5+Xq=9W{UP4z4*<QzxCwTU(Qwi+MI3rW0*p?x&A?Q0V52Z3b)Yo>
zpowl~MB|TJ1fvoX<bVep0*--3!%E|s5m`T-I*+OGl+djiQ3p(U3NcSv9tnR|78YV_
z*UgB|9S9f#*$1{%N3*<RrkZ#!4*k<qM{AyP3aMKRw_|6uz5gOI%up}@9(n~%GNdpb
zhy!#FWA<(@EVNp!{xGyWcVWbA%xI{8s^X04K;xw2u9y*Z^^qu1ad81;-lcN|S}^DV
z&yAtJh!-d#qD~wnNzgA72mVJCRsgn-Sz27ww1`S6k^CSlrd-xKfPmy6mI5eB28swK
zWUw^B8BOyF)(*k#a6C|f+KtXC5gRRjNpMD_R>t@bm|`JxA;NW(V0<y%Smz518Bw{s
zPdGfs1ii^_aiwv95vMZ5QbuGVKYHf8Gk8%(bX|(Y%gpJ0b<_O*mxb3M&F}R-J29de
z$Q_y-tZ^z(?odXwOa*y!hw>M73gIckG`FCIP4j4_BO4Pg13ES`<}`>E`PtUV^BB=7
z<Lg{w_8g;<%?v76mNwE;?Moehb~j=tK!t`OF|(e>+?mhppzp<o$ALRlzMLmv4vHh%
z_(2h2D|%<9LLP;H&eV*jDkP~l$5qrbREE@Bqq^c4i8D8K9wX+^r&FKKph;pdM%K#A
zFH_~7=dL3_SpMf%%)9Hrfkz0(sLRJtjQ~uJCzkda06Y?C72&B1s>zmPR$btFd3n(3
zq6M8MfupiaD*xLlqCRpw!QO`BG<58Cr9Y&<SkrVu^0JVsnQ3a|&!kUhB5`7L6JnQ8
zrgW|#=>o8{I0y>Y3LVg)oIi3((@zql@TNU1(vRXJnLv+vfLUe{?=&OI;8ckyW6zo_
zDA68D0AD&$grl*>0BPDds3bRxOetSW_NfI$4}kbo8ac)mdPbLOVzh^;o>ej>`LY>S
z2#a{}VZyh?N2pJuyCvz(#Innj!k?#i`-Q#}R!~3|ILambC$K*gjZ&?;w2n+^+{%>D
z-3p1Q(!|8WiD?Q{!I``Pv}6?GpsKnk%i=eRnUcTiK~~Z5ayg??aaU<zzXf+d1G^RR
zv3lyUqqhZW%w>4o;GI&Dh*Ja&nV?d(gLo+#wejKV0#v(1$O;)Bh2R+u5V9&=tR(Nv
znz^BG0$FHfW4#Tap-oZVs;!%4b8U5PRiG|jbqiRL<4NA%LZ<g~=nwPn;Los!KF==p
zO#8?@lKd%T>2pi7XOSmUSM(ffisY8!>en*i{Qvr9?h}70g$w6A-8>CXXo?^@&to-m
zkUmec#zCpYJjwd$lB{!|c<vLA4&YCTy5&CcSt$j7Mn3VUlo4NqtDoGy*MM6#f63ry
z-+2pDcV2aImJd;ehRzCDCk_!p0EmJ~=Qz`72}VAv$eT9R5|H1Y!=shh0tfDd3t^^H
z=`3SFMa9UUnhK;KI5pF^BJVw~djkf6Z0KAMgM3W77$TEEUW=^!DK>$8AThM%Z)p`2
zzUV^Ib8;((rrZz(>~p+7B#lSKmm>fw)X?rT0#%Po48p6|s8#?-g4(N8U%QWATZkF<
z^vBK6J4AK41~P<w(TCrfkU5$tG=^e@!$z$F;N&H(R7CYS)E7jP>}sJaAp{Xs{e`!X
zE|XZ|A0>H%BWH~rzMe<X^2#rJgZB9#!!r(0y-7)p8D#|CkXas_QHkQe91%%Nca6hm
zD#uoBBNM^(7oIaXW8i+w#YP{Ob)4Fv<&9FER3?*?LMb?r(U?{lScmduhf_(5hA|HD
zWJKq+3XdPpFf#z{c|3TKh6s{j)+17BG)K}YsA4NUni1(c%!2{WblgQ1Q{V?u1{CPQ
z3B>e45b%TpXk2YnCCiBBm10s$S>=84xhaHH`5;Af>;rXTQ5+UKS$0xkOG*Xcbq@gj
zA;*>YIU*59QYf89#e@|GFo%LO8rwx)ayq!c0$+~O6)&@5hUcd$qjSsyXxdaAoROMk
zaGilxILj`hVSGP6LZzFZprkIXaK#D+prNVmu`_(((*A{E@&E4^4G8uD2`ubgG|X)e
zRk(3JvhUF`Ajn=6dmnx>unjQ*2Acz*z9ImqMXJ-HCbr2F3tM6Y;q)HYw41>Jy5`uQ
z4s4VmceQK6G3-G=r)+QP7NmwHY(n83DY+;gZ;~9DukzwJIs0HldOJ!%mkr{nvnnqx
zp-`lxZjO((VM&dD^?@Z=Zeu4@$o!KFA{l}-HC(!lhP0y(oUp;x+Y?xHWS3nRsAbCb
zp0wGhUyYrs1KSC0q-uLkrR_t%X$H`33=43y>7NsoF7qP9UuY5Hi<M0Hlg|qTU{~1I
z3Y~##kdJ`MkQ2wyHLT)H6IQlHWeAa$EPD|6c_Fp%iQsta$dv3i3QwM7Nx(!^s`grE
zy|cQpR_@e<Szbk=NO@~(4fUz2Yht5r=vAYJ+|WOsd}l&zpK?K}ucwCj^yKYilwZr-
zfgk+E1<M_HMs4Y$Mlq%;2k8WX6t_{`p_tvih*0;`k4IkTVi{E+2x-6huvCOorSG|c
zXv%}k*nRgrS4giZVHrSYZL&0?BNg}6mAksH_kt4Ks_-avhmC!&e>b|9J9D+7-iHg?
zEMAe`XLczA)1}uBr;IY)rLlcDQ>mFcnYo&YF&i0#T+JLE4^s9&9SDF;K=GDZu!cpB
zVLMCPr-<02(<eBc@`m-M_0}W&@piiU<3$Hb%doo6Wi8S!wbq1+(IQ6x99S8-eu{v3
zki}YHH34L5mPaBZ@_x>q5IV<Wv&RGj1qUPo7N~H>HZt%lB7aFnB%)Y1IYoPV*T(S@
zxNpde(73o7jpm2CkCk@U3qz4rVB%})?BlEJ!MptOJb3WJAL~PkE0O+VD&IZ!Vy>%4
zOBGX1lZmpbIxypkP>Iezr22u7PSx4oKKUV>RgFGHM`&3yL%AhZy>h)QcmL(?KVO)Q
z>F&Q*NI$t)Zm`Sey20Rx%F?5AQsWqjbfwSd2FsM(_gHHoLY0G@oO2;gb6qsg*jlY>
z=qs!3@>*MmKg?X~l(#yam9nw6Zi<by^^UQ%c7|uHbTZR5cJfu#b<}gGo}Ug99J@bS
zf3p7YPgaqVXMS(J%2mGmZ@n~M*~QgMUOz|MNMx%>?3?@E@<d9{O$8Kl-y4~^CH4R}
zDTvl6zKjs>E2QI11MC01kdc$3d`_HmG}4$6k=l8xT}E{MRVuQol)8*4`8MT(kTTiP
zoq)B;M5}sX=)hBqm48ipe*bWC*)*!1&2@97TyJma<<+%zwY(|THp`ox)h%OdYqeh6
zsy??K`4)Hj^;9JPx0e@m%@s*_*qU(FIzfYDYv-Ojo4VJ}6|v1uk$l~ia+U?TR-S9+
zxmNBuoOY4Qd(5@+sdc+%94E>!GQ4p{`Zt9YDIo{J`>ro=93#p9eRTtQ|L+?w&0#iQ
zVD1@{%l?dGaZ!>8Qq!V8DQkK%nTey8fB_x`q3AVnpeO)BZ?i2>*T(kz#?F1daYtUi
zsxDV#iWC~pbERH`l>Ar7Uo*wby@c?@KPDFT@2%gds6}b-1!u*d{OIhMT&Cfzm5C&#
z^Bgx?Ayb0C5$Diev;f?SsOfdXG|H>%#!7j$gNpTA9YZf~Zmf1pq1QI5RpSiD$?W*s
zYnhr}dU@ft7pgtHe#ZF|WQWS*SNBxjxZ2lCzULq|K-vN2Gr{Z9w9Q6w=i`rWUB7}W
ztTylwt*Ks?JT=|9CUHh0T`O0+@{^FMOzl@b6>3VDEJK6*BByGv&6%n_nlhbHq)MZl
z_L-^L@NHy7*YcdH+OK@d^#7fET3+?jXf6)iVY{dQ<Q|qB^x$>e>c4y|qkZ7N!r6*}
zW4c9Dkfno9y319=;-bA&h<(~wsH{&L6;cPDrXD=9-Hb-`Di2utxRcsLaUF0su>8DZ
z33y_n;x!7G(D6eY6m|nTVGOJj)DcR2Z7(dFWs1JJs|8+9K+0+YSv5e53X6Os52`}n
z8U}o<D;Ts<?9K(K*s&d2xHEE+CnLh^glxsgXIv|ucvvD~;dm!%IOF*Cl6I?TYHb1F
zseYdhd}c)HUuc$a`mb)>>i@{KsaH{(n(9-^BUPLqAaI5;XnbBAg`3!iqmE~KMmhnY
z9pJSaIbTob$zOf&s60}Y&gq<XnxE1qCWB-r7ytCw<#|qzWg4J*hq2p6Ae=5*q-u4m
zY9O(<UaspkM5=_SmAAyoMtP%F>*#H>Z5W%Id8CRDwTZ!R-pwObo{JQZ7gdC5>c&1-
zN^Tyh0<^&wj)*^XK5|7XSG2?gO6M!2Xif1IWEh+JH)p~KWn|c|lM{+VJ#)bbMX{-j
zsNGO7LOPs&4EGu12u%m<dF7)(|8mj%7PnkvgX&ylfc&fEQ!{nn)Ox`2waXUWB6J;>
zyjM#Mv6KAP=n$i3ddP*o?dUyyrM!7n8w5E}HW}?IO@ttFD0vqDc@<(thpu<*2orh1
z5{Pcxx1ivc;UONWzXnujovZP-Q3Qg3l14UK2Dh67lN!DZpjeK7lH3_n;PEQjHQgmo
zst#vIIPghw=x%p_oMuP~7c2|kFvvU<!(%|^2Sinrv%>(@$wtU%PNavvGI)BEX1|qT
zZtUNjl^dI>9se_}FwW6aQe&CBGNtp!<Z4J{Xe@DA!d^7b!$#e#Z*8t`lnryWibUD1
zjdFWa@08cqSK9TBbx{=^K3x3m@u8RK&2P!!KKa3{{+_;;xw8M|-3xZo=B4cX7dEqY
zoHwyMq}>(KmQ|d^q+iHU*C|9q3APXS#pCdrNBdZK3+HvBl6$xcQbYWwQ5=7rlXn3f
zJ3#z=O*5>4dl+mVxL0p{aBGh$Nlbqpz>3iIg35tQIo5N078%8OL93zZ>z;wiL~e*2
z;%#@(E7uNOMf0|%t!O(|81{qh<z<J|!%9ztdIgmf<<FHXt)`5ayW&{u=sg>itw3eZ
zGZFU1Ud<Jy5G);PckrUNdrJncQ5e)m*{eVs01uL5+$cWCpxC_QclZ)dTV1t!;3m!L
zhVKQ0586Vc8>#~CB=vvVP#UbB?jHsT>DHjzg;vr*{VLGDOnoSGcb18&GJmK{n(rmj
zeaa|)h8Vi-0@z-51ii%2UK{vSVF*2hmhD4n!T*QHUh3O{{TccMa=5eHthTbPL8FF7
zu5DDWBDPH3g?Sf3ne=TaJjWFMb{tH(K_yA#Xy6TMOO7Kx4gE=d5LOmOx(DjO9Mt;&
z`crr;ge&#;dU->Bjla&?yzB#J`oJxZD)|AhbA4~nM@OZ0mTX$_wCnz3`>5ji-DSNU
zEZ0}6Yn9qceRYjz{wWL(X$&$|MB^wevIdre)r%B{BhS#=vj3%8eRHY427@^H(n@^^
z<tfw6DRx6;(~15;qm3&=arsJ&DivFU+9^qiShds&?L&SdVVf?p5(aJNc8|(j+RM>1
zW1VkB%4**h&{*TKOEu8-ysjgbM;=5gX`1I<yaly*>*p=rKZik+)+z}D#XZP*5q?jY
zHq~D=ZAi9&u!FH)Y?L#+bNenH93o1aW`YTRgaJ%VT6ULeppj6}j&x^$_3qJm3uE9=
z>ktEwX{zOx?}a*0iThr_g>ta8MXxPPD)f~SMPtjUR$Z-Bp@^-57ifY0J^c&7wkWI9
z)pWzaK~DN+yqL?h<NE_61h3EW54Cm^2GoeifeS#%99Uk1fWCyD@DihzF<Rwt0O6;h
zE+LN{1X@f$MZ0PHLAazjf--tc@7Q&y_Bpk{f<`XP52eJ#`D%z<?YG`Q3OQGxS@fsl
zQRdS4X9Ea%DgRnt2hJoCk1R?1WDrhURVhjCu=irQ?Rf2F5|7LGZtPyabEDETX|dv)
zU6Pr}kj4a84%|I##Z=<<5N0O|;Bxj)*M|_|VuI<updr-2m3?fnP>#tO+hhVciNUh0
znhPTEA^N{ch0DN|L8)=Ly-nO@(RB^~(6}dKn(7@5L$psFxP`ek`AANzOpz1S)F!{(
zif2(yGd3y7WMCAm04$pwIl4_pZwu#2OLk;mpkxKDgJ>gSd~R-{o-plvj9M;JoBLoQ
z^*-7R6+<bDpB7}$7lsYafbyiieQaSmWW9%ok+FT3)&70pGEY{t`!*>SH0P;?j{$+6
z?DaAUjaQ0nFaF4qX3LDQV;$w>z)JoA#G)1=RU9M_g=AX(6qg(<Vn2^q9ioK48=wGE
z+#=Nm(7ZlMe}lzSst+#!rVte%^@{cpDE5!-03QFCU*$^Tl7i~Z#N$UgCAbS?gYwMM
z*=HNzuAUCH%m;@LrC-^xyRPgnb2!ON*Tm8!VhwWt&}`g96%GDuvKrOo$niPATWS`%
zsH~P+q0B9bs%Uyfu*{=VHsF^G%Fu?OWKoTdGAn%{ZpS)Vxz4hXMu_@6!4E9H3~b<m
zppgZeFjrt8LmiTqv@+d9XISAGUu-l$(PG;Pt~|W8cYptpwzG5pxA(7Hk!z5I6*t1^
zb)A7jJ}3G_$bXY2XCbc<6&+F2(<WzYh~c8uyR);b+L6CMw11-A()1qfB!iyrR<wu9
z@`E1=$Uo|yFMgu!p)MhDRZjrW9P@#fioHl*W`6P^aBR#FR8MHo@n^N2<p4w<xI9KH
z$+B%xNm&bJ+%ToFe0sh~fu=vN+$k#kWc3`mb1+{ocm7^@WSsmxcXb~u5zjNUaT_+q
zFj4Yg0b+hIutPbAMmW$NJb-lylf+COWGx;{WJkk?ywBSrJiwlItUHHj0*Z0hlJegE
zjJIP~6OXAVkIGj~ac8^`)Ys!FiEm(jVbWnkCFeXC=omc1djaA+VjCkx3`Ct%b9t3?
z4!NN!Wu%p8vSac|ao-F>?I~9un{<hSoN$%=N?RB@NePsWAf=t6R70?Bn+y=rH%Wuw
zZH&^trCNX`=PS`oi<FDV^Xv><C8nd4TlooEe2}mZuw67cCS)gvhjNF#0jHcp?hp7i
zC^)`=9EA2KX&@C&wjR&O3=v$>J`k>eE7}<@v6zl~Mg56oGfSc58>~jj4~#EH&TS^$
zb8OaR#b}5Yu24Fnq0tiOYQc~Oa->L0o7#NDMormzGwNRRC}=9V*px%g@^GI<U$XNU
zIf3@mM3C|v&AW3;J(T5;TtES~nOk6wBXaCx-=h@+9TI<|7x)|e<ChaD2FE`uX4nsO
z*Ty1M+29i7<iM4LP8H|<A?6<INDhtqR6~0xf76osJo%G;7U<|SIcjC&g{86E^V(pM
zj9Z%`<5p6-m3G^^w@bpExuUMD&GSZ<I`n{2;^8AOEK&;NO%D~i$;X1sbNqX(aW!s7
zIyQ@t{V<8hfKRGpcM)RY6n1cp5_Taey)d3U?+`&j5==xbo>ohFgl;Uygd{ahSQPYK
zT7g6kTAoyK)6*o2xJ3P`u!svP4r%Xo4Spfd4>kF#xzUE1;rV(q{w-(vl17zB&1g)c
z&r_;CkBBtzXl7~IH>DJ9B(0WUb<^ETEO1r9Q>YVcbp^cWM?Z@mLZd^85(86Wc@Tdj
zJW^CqTMvZBN?D}%p<TF;5N?GA7TKI+c##gtWYQcGyN4bIja$GtVuXc`7XsmQ%CS2p
zo_@pf`17QiJuG}mtNs&;h8)I!9YueUxGG>;`h!qjdK7(zbUds$R7R&xOI~Vnk3+B7
zbNU=wBYly%EWRV#k~hI4qbw-TNT7lpD!|I#C^H*x1*9B80MLdx+ApSvzZ_4}$b6Uh
z9)`iCS%f|Vc*#_+RbL<Rd_vWW_EI81Lu}FED$v_hDQdfwOlh+|_cFh+cbzCd-|LFN
zV_xDf50Gzjx#V+_{jm#X8X*7YFMsLFU!s5V%5$i=_YE3_S-JUBveoM2$um%AIKpu|
z&YyvQf5fkzQD)ZH6G~s*Q(}Md^2(IWhfhS#u{_&~ecVloia6o0QL9vC`Bi9*LhnJk
z?W4d4yp$zIDQZgXlP*(M6|17M?gR80ct09DuWq&#%tPVBKY^pRz+>!r{%~BjgJJLl
z+`2N(in5D%7TzB9Y&y;Ai2{5|9Tek)Zwcl!u7!+$C{OI-TB<D2-?84s(W*)DZCyI|
zJoJ0kI}(yGdGcEdqww77tTW(nzw-G)Mti_}Z1ql7s#w7Fj7WM{D-b`E6+X_<8Bz2`
zVJWNm`GDsqkpUQ$&uMP>p8`ka&&(_1XP+!Qc^M-&C1Y{CXhqdn(W^#%y)5drT6uMK
za~(x1I(oTY6V>&tEu@#N^NF@RudHM#p@Nfb<@fTuGDb31$cC|R5IY=H2g;vn6!CX}
zqi;<p7{QA@2RAO2@Z~(OY_5=V<QC2I%2M(AJg+RVLz8giSRzcGS4J@xN>!nFld>;c
zBk5wv*yVX;FFYA9&nr7Cy^P`~H#tEgmCDLN3XYWId1Xj{Mr<R`D^uBl(lweSsN{KN
zlsK2?l|@;e%pY}?A(7{m<#}aO&&uR^WqDp1LT!0o87J9swojf{mgkjG;*?CI%Ja(d
zys}+R%*yl1G?_Y>=amH|1m$6Ebcc<7uYdPG<>Mj)>At!c9ZH60y$?q><qsqK89BM4
zQLW&hWewNp9)4}X2i(&it9;@{H7}GqS|~S91(2tRk=-DXXDChf&l=-pGXLP@%eP-f
zsCLSS--*-H_&|wDP|Jvzm5BrADYrN02~UDpWkl+`spv)Ogd@%jzq(0v|NR>mE#S<@
z<iZfC@7SBTc0imk#YYx6<9<I@?=+mC>m!E>#$a%aBiax=5}Y3gPuK;m2ZuO<`8XuV
zC=MY5Y=beP@-*!n$1KM&+e>oWo;TfXN>)&gV>US`C+|<DyG_9{%M`-wQk-uVOWIK>
ze9>UcRyNiRV|Akj+_KJkd39~QTHZ1?^zvG5Yi-jo#p+glg8<aeNjLlcjPAakC}!`J
z{%kk@S02i22BYRkX5$%W9n|_BIfx4-knn6+nZz-4!q-^R)X~8II*b)dx{gDUL4{6w
zH`T8e>{-!cBpwDU1(=fwwBrz9ICM(V9Uiy<?+S4y6PPZlaXMLv<DR;6tPgQcUIE`o
z0F$BCY6afFH$<zogum%C&5n)Zjr5!18@LZIP1tI&-b*K26)6X9o6f#M<xQW(m4wXO
z8&1>!Mux&E!a?FO@($cqD?MH~zu^pexTt6wxfg^(2mJt8$-rkSL2^=;tko(67Ebhc
z`(Z)$2FGZT$>|-q7#)W3U`!VRRipFAK*XqMyH4nJ1<XKyNy9MBKTT8%TMKX)*)b1*
zON2Acj!mFG-W;dL#||b(j1ZkkPV<92Upn@JbivD$%q!7bd`5UhddvuVOddQ2(2qQa
zk7-orl2gx=3d>=yKMqY7BR?$zHx6iax+9T=s8xB2x`1P_ls0Y#Xf#c3NnWRW@pl{X
zU#TPTk|!I5Pmo}nJ{q6!XQM!WFZYJLG%Di{GSfB+4C|0?V1!{LGodBmJ9*uKnE4=c
zRwLmbMQL=}mk>Q-6s4iwbE3h$ghUfE`f%V~_B`$@*nmbs)4<U<YVH#3;Wh?Eow6S}
zjlQC$GCg_n<-Q(T+uE~DQbeO-uk#3sxNt-qMV*k-wXM-CMAwq3+UCVLGF8z-JWc2~
zfgDEHrc4o9P*XJZA)cOXEH}Blsb{G(*fdlGr9(@+kA!ZLS0u+2mUEm@%>oXGvvo6)
zwn?7j7^D^>nM5P9d{nR0;h|m@?s8u@4gnE{)Nq9svP|v4k$@9fIInL8m!h{S=?}?l
zOFGI9sR217dwE^%0Gi=|YJ!1-IEE-1O1}c&COGd+^a#HL*>CtC{Aqv&;*JpJ2;iWS
z^Bl6xV1Oh2fJ;&0pIjW|<RYqY4T7FA6trz(>1`$Y=>WcDVujkk&&c-FgLI6B!MIe#
zLK5Q)C_o5EKy?^{_+Ts{?U<w^pg0f)09qZ-Oel|}5lQh<LHe2m%{mqbMIf>em_Y=u
zBGb7L7)ZaM#s|X%hz}Z3IDaAQ8OaWR9=L$Fq_G?$N+LE97M2H@rXa6WJQHYaa@_6!
zX9<ujgwC_g^RnGa?b%B5_T)pAWWiR{d;5{JZ9#-!wlFjd<Pw3U5I$)H15rBv1FF?)
z*s%5$0#<7gBd2<NfPMfi0?>WnNp#OJfL+d0&X7;G58ToSoJPWNBpg34FhPXvHjV*l
zv{5oDg+GD?De)U<`UprwLb&mE*f`@Kq<7MM?@Krxm7c{J2LxSl;GXeng9HID4T%cI
z?l@#LV<s8SrcF>GEk?kIv8l$A!E4oZpdzks638g@p=3Fg1!s&wY%@P9w4YEImTSXy
z#q1J?K>;}CZ$<wl8AQ__3woX`H6mzV^-snM<6dF~sCA_~ICNq{b^%#m)=yyavGnP4
z5h@%s7MxIlOa){feL!It9k}Q=(B6&#IZ9wK(M*geaB^u9ou3SPeR_Kv!faa(lX`<#
z2#jN+UPD^<+hJe}c_RYo1L-V`yFNgDw>98hNVx%api5;NoX=QnxVh~C^AUn(yM+U(
zfUqpLJ%F_RZB6w4wxvBFrHrPHXAx+=K3bA(Y1+@^?etjCh(nqM!2Su;5m(I|P)UHO
z-}p0mf7jIrs_e)HrY)gN()h$XGjAF3lqD~HC^2d<yfk$qL#og$qy2p19q|~!O~M3K
z0yn|OQJ+qO<U}1kd(a$6o$>&i0(5c_4clEGKRzAUfC3djwM^)^3PDOJ$+qq*k<Zf{
z_i`s1j%X&6SMS8|lreWu%!Fi3^18Zm*4s3jjLj6GKGKc>L9Q5%fp`c+Sgi($0J#%A
zEU|c!_5oBxEP|y#BlwH43>50R<l%XiIwMJAQcb-N4A(M&_n?Rg@8Bd_fdr-d*zxGz
zbU8qjVCRFOMw*RI)pGzdS_Vq=A>=zVmIP-dEFT{S&)K9)m>`0zp`0RDa6Jl0O_ZKE
zgk-@O5%8xFgz-!Rrfy+R@&zG|5|hUi27r?z*2KW{JV1O&#Xuo9$?Oa?8C7bA9UwO`
zqyz()ks*T!hmqD2T2NXA=AHI+`gYu{s3T|$QCE~)j(~O~6c6pTp8&cLn=3CT7Q^Y!
za+lGC30$5LWlnOTl!l}X4BUu|5nZu_aR~JZGS3nqIab0Xk`htdVk{&!H;Fu+kC-8Z
zTRFB5A=RL4GR`#pTE2%lYnEgun!3{!ZC_txU`SFdN$i?-0HZqssD%cpCK|XzzV{f&
znzGa9eCM`$tvT`<K2#yzy*f;ZExBNMR!h+3ygs0QrM8t4(7g2%`gw{K7QsEV>PWnk
z1R{hy3Xucgk3j%PT!MLkrYjMKp%co=*dZa{LaInqUY7b6VC;a>-Ui2DBolU%!~wL{
z1W+X07ZA;0S1AwiS;iV=>}%fMdSDbc9P~gK8_2C)Q|~=U)Q~<r8uNk7MkoHpL1s5M
zx9aVbBGMuT2Y?fN56s;#{)1z^&x}R9gY`W_wIkH0N?&xs3Jv{j?Ap5DLM!NAtA$;?
zX#z5eQK{gBph+X0r|SSA9&pS}a0ym=n-w0uwg1uekJMAw<-I{D;4Q#gsS_{iiIJu{
zf!XJFaP{VZP$lof^dW_tOa|>Pgp=c7;!8L0U%v|@6_5Shx&NpS?Me#-J&GtMs^Bn@
z`#RegT8kP^PZG<0UNdYOVKacpTP8XyR@ehjFUA8@O!R`Z#(6c(XoAPlJjXT7>iYY{
zk;MPZl~S*d1#vVp5kwcQ;z+xKKrw;iX%Mu2f=A5@P3b$d-b@^+gp15n8x?tqrl~&2
zQKGfR9xcy}OdLodDZ{8F!4W~QvDoT+z~TVCp+(}FO~%oEk0p&lhD+@#BkK-5hF6e)
z^x$=XEi6BTu^=rC(ogn*2Y{zTD&+F2+(>ykxW6e$qPm_|^~QGB6u+kaq^Z40{viUe
zR6aIfz|mGhlM{K?xM%8fWDv>ieq4dV&LH=1MNKSCQ%4Yi?neq2+)>JnB0;!u_P@Sv
z*0#)gt-9ISsv8@tTdOE++cwua)lR3jTCLVs^!nPeY&w9=4yeC~qDp@#g>XvkiF8rc
zUOEIrj_8y;VMKBmLme3^3bvZIiP)eeBTsrJX``0f4&I?nR)HPTCOJdV!R-B3NpQU5
zl5Wo*Od6y;T7EAAbe_;o!4i@{Qm<GJQ~8Jw?tUC-CEl3PF{16)_jd2_S^^Rx4!fgF
z$NC3X7bDR#;`V}$GzuP0gq6##i!CV7YV;>|LjyoPev__gb*K)hVNOdMJS_bU3fXXZ
z<0`YN5>e@uo0=34i1)OJu%pookOQ1HUZNlC&_8oZlnaISMXVqb{y4dI1b`5+!C}J4
zOHH#S9V5w_h0injAzkN<hww@=7dKP4Xl*}Am>_klfyZO!NL_EzEAXn2gu<pou7OB6
zp>?}6B!Z}`iYPTo74ksBT@MijxN+?5)cHa|@|hghW>oY;uKHF%i3a&C+5usKVH{Y{
zaV?Ov0jrQd4F|)-i&s*EzNuxWyhT(6hUH9p!XoC4-U0ctpaG^Sawd&1N(*EF-HAsZ
zvsu#N;J{0X>cMPFl2+op9zizK?gNd2298!jsV(18N)E<=tz&2>F>}!Vc?^-sB=4hr
zz_A-p1Tn(VdSb(Knyn`nP}F&H4WYgZ24e`#N+NkKX>G_oh(N+UkF4~R38-sr#Bj!;
zP%*&3YXT!nMAWoStww6*3Tmk-h-P49sU{xkOWGaQ5O@79SKfhzs7d*YHBO^9;+^KN
zIc$~z5mi3G*V3?<+rYdnN;uF1WIP=dB{b$i@lomp(z{WOCUr%3`j);8`9jw!6{h<E
zfplU&BWa5%e_B0Z91(Y15o9mafvaLwG4oD$y>GUm=@Ag_b*O%6_a%9A&TAq@5?yJ=
z?M!Hl=SWp@nX#}RopT9f+%&}zVYiRU&6VX%DL0#=PaPc4AY%dIXz+;Is+Q{vX5km6
z@x|oOjdWPfa`YkKQUZ{Tz9gNxG@h_U2=-rE?$Vi?(rZ{DXXk_rZA#E+R#J9i?V3jA
z3ZjCLTEKV(8ao1TmDugi2(zCKpl!kOVDUjFz0x$IBfxBbK*^DUGH&@(YPv<zSE?kf
zR@9JE^=ZR_R}L10IKqa^l5^CY`pL9`O*YIIG_|9cxiOk<diEz)Zwk~lM)EC0;I`Vt
ztF(?Awe`sxyqp^=^^rRf3rOJJbYszoHfdV;_Oua8<ks`C;>5s2JV_F7Vn`s+Qb4~G
z@Wzv3P0FySB`Mcd;P|B0qajFtaisIn4b}f>=j69PCfI)FUSKbqq*6n=3RD!J?GCmd
zt)xU#k%|F@j>bb2i5*L?M0PC3G`ET;tYkG0yBF6g&Viv{LoClJz+W(<4BzJHWTTh^
zY+h7=%{O4+WOP0R<-B0OVvFHDB5?>1>O5P*oRkD@MIKeih{`fk2P%CSO3`VF?S+Nu
zWuG%5vNQ`9bUaZyBl_OgK2IPyRTz>SR+JY(J~Zjw$%r^*Tcn~CY1biTZYMea1%+o@
z+Orl@M*$WAnfvV6w}1CkfV-BJkaN~|0gw4VEanNzaH-vq86~e4M&^Fxpcy?#kS8qX
z3Cr{6_2vo7vQ%igj*=#ekS1QBD8(^KJx*r;OEWl6Sk4oc?~DS|mT#%p*UYSPGR4V<
zqS#A$BFB8xlU$f5EThnTp0FGuI!W9U#hCJhWmpPPtRqiYHn+#qNGMl@oxxEeKq^Dx
z?4;wHdBSp@u$(6>L)c;soss38CoJa)%Xz|bp0KPA&*+G;abP!<Oq?eyZ)<tN@<ddY
zFI@c4><pObaZNK%Sk4oc^Mqwekj@j9^MvI*VL4A&MkWKYQ>Q>`<O$1p!gA`Iol*g3
zFw660iH@gg<_XL337<S+c_e2yPgv%B{*j|TdBQS$pJQhsafN~k@yUWe(__3(k+A%f
zD2dV8vbj?r<5WaCv=!$f(ylT+8*$#Oz`3&XqT?rYs)$dsjAhB;^we{g8GeLA$R8us
zax`O$lP^;RjrRq)-h@Vn{9uI9@DV{E07vCQ9L~uBs|TUCtrc^?>Kw2-m8JSTIjXBu
zb5u{h@_FG!XUER?^7q1FUlg{rLX<8}sm>YEa?#VTe>6qFM^f`8<Tuj{PU3o7|GM}O
z$*G7xoLpXCU0vNYjFqxctLf#{whoKhG>o!XYj2vhmG<W5W@ipe>Tjw1@~DHaCl>Yp
zx1XnW=c(Nv-@1MUYES5SPLq_qMsdnXELMu#u<>vos?jylklgSVaO|Q2R=7%Sklr-$
zIyVjUj{#r_)&Tu7a1R4yGG4v$!L2<4BN+djeBKwqzyX%h(;k7(Q@iujZa_g$WIe~C
z&atR-Eb1V~qRz3XeITv_QAPnKr@>ruENX(<Q3Jz(Q=+3o^U!iEYD)Rev8eCuLI;h|
z5_2r-9E*AefOnqSou_u^soi;Mcb?kqUiS>fR&1gG!Z-wBp4y$Kb`vgk1oK7*R3q&w
z$D*b<e2zs;Senrh)EtW%xb8U?b&f^N$ogExLJw>}>*rY1vZ$5<bB~~oaxCf`i<+~b
z164v2RR^)1Fot)IMGd_19E;jGMCG*{i<<J4d8JhH)NWv;GZ1&OJo41;yh4*UN486z
z+AV=sU%Axor%bf}B2v@$kz;iYnJFboPl0o+QQY}>uJkKCDpu0?yoj8Ae~6sq;#iL2
zpD(IsRF8K&^=lk-fKH<1@K!Nkc4RTp`G?5c65}Y3FXuc4n%CncS1s014sR6|zRT<J
z=Jj~<dc1i(-Z=GLp&wC+*K`bYToK99k8<>*9Q`OqKg!Fs=H*)Ra;->tfK@RA(B`#9
zKUx{n6UK9;RA|%JZI`O@axqrU4*7Io8;2+{e&BvGK#A(0FAUU1Mv*Iog*~@pt7lQr
zp8OrCWDlGL8>N5geBY8LDqVbda@1c$NQAy~;G()Pa(k#!DHYvDiD0fmoGN$<7erL)
zR01)OQz96kY&xnO`&>{ysAxB->^N0cr$UdZw~sB`uo$?(_nZS){&WCIuA`K&J`}zz
zv#Jh+QMqfXhiOyYaxW?rA0a0lxIfccEp`))*7&_`2jN2$QGSHs)D1)Q(Ft9ntY13B
zh#Q!qAesuEE~ZL-ACC~x{1y5t7v!V}49_Q!7HF6>jlL}_3DZ}YNJTr|p$6Fdhf2-f
zCqfoD<Mu1jyRug2jCT*kaQ2(B;^~YxWx48^U-bqdirhEp{)`4NlvJUXN|W=m)99c+
zJg{sB1FEt;xUQ@kK=dE;Mt&+T_+@fkX}gDSq8_LmR?N!4*9}@08V1V_uwrlw5XG!9
z;apD{f6M8zT?6%N18{&UTRRB!E}p^>RbQ9`l)=`*XxU+P2@@kMIb%vC)vIvp&gl1@
z+joh<xWGIbbOTiO?NNbi;!3nh^F7+49G`9Nj7GV)oabP>x-&VV%nke0fBbI{4B%{r
zY)B6|2J;BYM|<GtRI<CTqmXh5VxdCBI=b5(&}i^?0>G)Dk~a;h2L_}v=_sO%#f3WT
zdT<!<Ja8FulxOc?nWG$i!YF}OMxqJ|vf3RxdRI-0dWh#r@^(_+oL>>a5>5^KsFbYt
z`VQkQrSb;py*oq-n&X8=XZ1r`)1ucFT)95bO2GhvK@YSKM6ajw^#1BtxOJ^oS-Z+V
ztN3G)KZDLuo8Yi%V~Pq|l(Gn!)bD$KIAhvcE#{MbOMn;>cYF+$r87~66VR%fZL7t@
z2(IZkdY4MwE4hu~^C95uig;-vS{tcYJj#vFUqX4Qkdh7sln$QUcb=)pG1X*W8O!aU
z`GY<Mu{o5E{?Kcp2T@R$Zt}3p*G9$rnH7(xbmG3ijG43<&Dw}qUIz&E2-+Y>ctc$2
z25%QE^&t)EDwD*s9q&A{SiJ3qBv!Dh4D2=ZXw=%%9*SQ@^Jh6EqFqQ!$MS*$H-+Xk
z<xiT+2L$C5S0LhAVW}K7o{I6tS&h-PBi*+}IHa*jB!^rDkm9sg(Bc2vA|y!{>W5fN
zs8sD8k|<ZGO?Vpn?f^qNLN{;eadbm|4uYNj0538*yiCWn&UA&ZGs<<<qat6NQ&ewm
zw3lQF?fAFc%P(nH2exBIv_+pMC5@+FZfwM}R06!oY=8sXaVw?8s2#`?%~Zz{p*)4G
zX^?14Qk#ejx!v{j5Hb&4g$UwjXr<6?`qCxsfzViIbg`+CB7%0Yrz#n$_S$3oM&Wo#
zCe5K#l`v@B0>&}g(*ogi%JJqNPrrd`-Sl}JJ3YIU_!0}@Q4tY+MsRIcu49i*PcCa&
zQyrr)y5Oa_kvGx5Q5KY2m{bjAZ&a7@R$!a)N*(PNQ^aE(Pg0yyMk8CAP#050<+Zq&
zJkRIY7N%(w5T@O9n?Q)RrRK>z(BwJYi5FK?xkxfdO7zilS0E@Hk0z2-9@?oaC^HZ}
zBsO@CZK-~B)KsUW7VG7t?NCv5?yIKV2PW~hFFM<;ws5@T9mRG{){5x4CN~h^yToK_
zL1L36y$fx`E7x|(Itir;B;_DDgU6Bd=QdP)R`Za~ia{UptK$nZF-$ILccEuXU9Lmc
zfYhW&M2t>Zq}H?g6D>jhDoy*9D_5YTprKN-^eU1Q9#$V!XlKA=LBYU5VEaR%o4b+z
zTB6IER+38k;<k31&5V-mHakKO2Y@YUtX8+ymq;xeztpz%pm7r%Ex2_wBlY#?ZbmBa
z&)tk%I;_V(cUyfgXeOE%n!D$PP&V&+0W+fR+=$gQ5_FoztApfAGQ$DPRc5c-EW=oW
zOQCcu?5-7Jhf2?7vO=GAE`b23=2K2HX2|rtKE?$dxGw)H^&8bn6)KwBcc3?q%%}FW
z8}r)O#=K=3?TrKXnnc*yhGLkUgpkF!u8@j(6HAU)4OEC<@n!;n919A7C@Ms8xq{^r
z`@r+_ZRoZ;pYlLOA^%jf@uHow-C|FmjXgHN<eFM3Z(c1&WeijE1G;oAfLMbBGR2V%
z`(i*p6Cx|xEmD7Ap(y>HHxHIx@iz`!k}V|FMBl@v#;wAf>){W`6}k1R+K9|2kbUrn
zkoEwhus`s@gaUp8+CQ<-ZG&vV9vOMmZbV>N5y;~he;c+FNwL|Y2qq>Kp7sZ@m`0{a
zcDt!sEtjQEBlq={<;|;7H63|MeJOn=YM=Zhr1NwJ5a^Mc!H1)#FCaM42S|3C>;@k)
zrw?6}hF5+IhLeJIAnIBzCe#VY+MpeowuERPu=SA0ha#8SZee~a;jbPm(_<PeZQsWx
z8E89(V>hFpwDrKX1bsr7>|EmZUe3TN(}hH`252c^9QHkkb=cO)kEUt+jh)foS4Lh8
zJ;bAs0_s=#dT32NyPVvY`~Sf~Bp${-ss!k?57HR3O)d}6dpb|xv{jT)DyKOjPEKB{
zHQp6WD{Nmg+VSL?=P8f!6to@s*^GWhD?Vldy^Xnq*945=YJrZm1GZy3I7JId7%&dY
zWONXFx(DAu$8(?_@XE$b2$(LvhxG+L6dt=sP?;!w3fB%TVYG7KrW%&dVgr|^wdzTv
zzmE&E<VE_ZDMY_)(rVvf<2nejGMdRV!e@EO>v+oQq7L!;A_Wu5iCjJ~KcL{G?K%<{
zEUQYi@K}jljMkc_BqRABQe$uQ)u<c&Cao4nha!cg0+rP8`a_Ld0z1KPtCRsECerdH
zA(eR=B&iI~7_$J|I#Nc!R`hN1Q3BYFHY-SP5D0IoRP8}Vk8CQbdqHSIE}&?!+ZDX2
zlK%{L7TP9Nk{qUnWs4)sO?wD)jb<M{B}qQbgLn)-Xd-|nMli(*;D9-XiDVzM8@|MD
zLo6R!<MP$y!%E;KV;zk%dQ6_<f_#C*I<En#S@T1!0wlQH6Io(?<YJeKy?)r3ApGEW
z3gIy)O61Z{SWCIFFgCPe%#=uWR9r84!tuIY_z-D1q|SGeBNJmLo1a*3%6I8v-N=;(
zzabg{yRqscEhk+v6IhpxI-;}GBcE5$gIOW`#W8h>8l785d5YBLM~lserJ~%3V9UxS
zjkJh8Y~FAnmWmJ?Fe3rPB+Ulgy05O>)qTAelz7_~-C<+j>)(y;-KV`A7bBQKuixx*
zvPG{ia0l8p>0;Gt4P!|6@M{a7>*<f1EVF|K6g6M;MaW8LvoGM>3x|zbrMf7|3gHIs
zp-DOw!&evwQF0Ysri@*VsFdUl%Y!b{*x~E^cZC3wbDU(ElT5?H^Ld@T3Owc|p716i
z?KX;-WbgpS6-oq)CoJtoaUWbO;~J1k0fIK*UvN+brZI~)9dRqnHB{HwH66fDDrmhz
zFe9vS?D=L;LA$;{Xbc{FHvj1}jO8S;yq_#ST71N4v2R#!T5ml%Em?1;;bXtKXMM~1
zPWs9_*1J<7i>*ro>++uU{(nFdg@3Y&dxN%B+Orm4Fi!Hmmk~Dc&uDjHNg|QM*bv~2
zpClu01)=B%+Y1Y|N=6j^0w4k~2q5!VR=dIwK|rw$I9v}A%Ya6VkdHH9B{?>fBAKn-
z&WP-vO1qY~mdZlCLgB!)Vpx~EPDa!%^Y}!;WffJG;Z!Wt5yx6(VWpyFMCKKZrH#}V
z*ob41gN@rKLr6t{Xlo?5I`>I;`WAVhoq-54qWb5Rg;mV=KKb2J`h~KRL2Sw#&4|QH
zte!xolFiBHMWmEh78X9Fh`ycG==~7W7&#6|aaaCFh^dUm>OG}saX5r#FSW8WBB=_2
z+-kLCRAXU|fJR1i{&f=aNC#3fqUO8gO<!0T3EpHxN~O*oH8>T(?IS^>g~V;0M2s?`
zatZN|MH(<t<0AWYR+K6u8h?U0m64HI(XosOTW(|E;Ch=kB$pAHm!)(|g#wA}u{dBx
zwEg@k;*S}Tcu9tNsJe)Z)Zj02B&Fk?8IksW;Za4hASv~;f~^@5ciHKmALPx5w(pZC
zZ$t-{e!+~$sUbu_`Hq;&xgy#NPDX_OHN{G#FAcd4aTq-#YQB*U-)BU__d}NkWZ)ol
zoX$eZh@8JsBI*%u>Paw`!|fT-^drgCv!eVNQT0Q(MUY%MqU$ekT0uqx70*a#fMAH{
zG9&WdmBAwlri=&lGoq*zdDG>z$vhtEjTy}b$%ww6FOmhfu)u*sIw2vISm(kyiAFa<
z^0i2}5haevk(5#RR#w~2-_lN#&iHI4@jaO)4NA%Vtd3Hoy<Hhi{!b?~45>BJ=(c#k
zIg=nG>MkL|LdmrV{zH>Z(L>i|9K4kgY40afiy&SqJUXJWWklT1X;#x|h-c_)8Ikyt
z#G-<uFRC!hwgJTfz<(JLhWu#GYd}^DN<67l9t<Z%cllIE;z-Si!YWh(3WL)cO*wF9
zJ5n>E^F7YYP?=ZKPe@6Y3pBhLkyL%!RH}^V{NeNzE^O2p5wsF12%IrM8DD2{pG?5X
zh}12q_>E=_N@FltH<1rf2I19NfQ$&fRna2(2Qul5C5`O}Wt0)4&Z|w&mXk54$qxMU
z|NP`j{Lgsoco9M8cVt*Oi8Fso#hK3yS<k3-``1X)8(ZkK;3moXVyXGNrwX83;^Z%(
zJ2btH%HKHs-RZp)iD!+B82rX~>w*<DRY5YO(;2}LwkZd>M5K^V8kS6fK>Cm!{sNf`
z;HjkSxJ*0&nh5eb12J03obpiUd0j!s;cre&;o6-$bmRnySd@rSA%mgG*^qsoqM^uA
z8NXD#qP{$5ZO=<wTAdE~9U&1ENl)A<J`amCev~zb?U;Ip<gvoe{*60#n>TOWzHy~+
z;703+RETjh=q)yCNSBOG4A^0%NGTh6Y7sJwXrg$UG4v?qrZTKJu?6yFhy;vYvqw1(
zT>zO1oX501B)#6}8;XCfs>>DF4#G7lXNmtPUXi&_lg)#*;sh?b8{oo2Vl581%k&<8
z>zR3PBoFAuV_^(J$sPDo62%AVA#u!e6=J6bh|Prj2--h8S4b|wsU4lDaEr!9I&weG
zVFEMDTsX>MM4(^SC|#YRca*I((!&VXk;eBf<;11F%@5`|R3+=FcEKt2!Y^6BxBfBN
zRDW;%?!}~UO@8{Mh|`~Ra<~r}hkSYbuWv4x9mH?@W(8S@NLvn$N`>W9koYfika*T!
zcz$Br^m^(i-^HoSW#n|@oN};ie71Zvv^Fv&=m*1^S*tznZf@3`;BgRGuCZny<FlG6
zRp0Kq%lpe=ukUV!PB#o{nNsuh<xhVb{A%Uj{qnZ$W=hD9aG0I4wJYG{!lLr2nw^RT
z?k+7ZW=Wg%uj$0jKb%|^>&EJ4d$n3#U#qT`SF5Wl<@QQPFR!nzw#|)>-Z874Q|phP
zeC;b=`jYjhC#Rn*;E$7koO2ORSw^!Md7PC}lX=c?28opTqxC224@#8%-ulnUjZq0_
zS;?K}lOIp1*U$M~=<xsRS92ZywbS7*D%SS1m`iCaZl#q@+u9G@$-O;o5aG}dhYCxH
zeEcF1yq~2<Y<j`~EJO!h%mWulN6-O$;B_EF)evo@QR8F@q30kSlrUb%{)Fo~z{yxT
znZpMJ*Z82(jtpjBkq34-@34LB>wVaxlrk!{pdQj*58OK(_TKZtn;wpD-SB-6M(Q4p
zCh>KKt1`7>@^%q~Ei1|jxn)?SvnRP_m|KSV>6%yBGK^&6Ov^A!#rfp>$}SyOX)`72
zFJYG^3U#KWygO=_rq%gu=_0#yx-pR{U0YKu!^Bi1e;G{0<U-^ozi9U3y0HQSa%-);
zwx(~ue%yrpxMtSM>q4y7+eTZgb@WO5@jqQ4`!N~gGY9>(v@$mqaw{{pGI4Gdr^X2g
z6vvQv?&HTj{=0(k2*)4?zKbwP>Q}66llg;mU2@hFfY+H7ou0)e%dO0dVrAyO3yw>i
z<GV<CGt_R9x{}(%b00_U;~)SC7M!mWu*S0p`H`S;9{r}OhI#Zm0v34zIbd(>XO_CV
zroo#`U|=#w1-THWZ2;ScaE%o<dn@ttwv-1rmHIUUXXW{#STl0v{tcZ7AmFPGxuxmk
z0)FFvj(rG-(u{~g$!cNfDgj%QgGGlJ7~do!m{1X+juO_?KIKI}#_(na(f|-tN(G~Y
zH$@j`PmYr=Ge7%wU#2H6WHi1tP!Q9}tSHakpv)?O&wPY7Ix=t1bBlzodz4k8^hX-N
zuX;A}?I@R*3Rp<sn*{168ugf24qfjUOo7}m39keoNPrD9uv(ZEsL-m`1?XNnKMF8}
zfI6TA+iUlKEfWi=Z%7_cobh>9yzxFk&lSkaUf9+O(byH17Wg`4ztQ!K7`HM$yAfSU
zdKswuoM{EU8BzH&t-u*>G*FQ}kjjfKFnLcFo@AFtUmwk*2VS7h33~}tS%l*svF8g%
zyLw?+77yOMVE(UTf*w6aQrunP19b7RjmiKr)gCny0Di(du`nkxrSt6IJO-R+2P0cJ
z&kla2vxBRjPf;(lLZn(uZt<DM@Yb}#sB)3)?J4<9=|r*Y#;M3mN&XsT-iyMMCs|T4
zk%hXBlVh9pt$Mk$vbkAa6~<b5lm4|<-KZH`D_a|@TXjScPrg0Q0H>6bdh!;lRHrBD
zU2}ZYx!<R+WfZS)(V}?Is>NK?7{j!aF6qcrbY`V^#;Rxj`y$-_(};;u7a#76$Kf?u
zHRBe}LPdblxC&=8AFP{rozGKA#Zp@V`1!y+43Nus^~MLc_DItoUvlz!U|gdp43)Sk
z0xx=w&*BsxUeIb|l|Ep@_W{MNXx>h*;I!ULDzVaiVJmC(YQ0VePIrBr@WNTS`@mDS
zk96Ri?z-ks`?sD8EdJooUIHen3y;-ragd1WRN~QJdX@`C`a__>3l}K;g34ktU_2#v
zc71Yn!OI22W&A87=BbXAp>5p{QNPci;vsaL5cP_>48C02y|u_~_Xr*y6(SrQd?HA5
z^i^hmCiJmcf8!%m`SAQhIyo3>AK@6<?kzeA0}m1Yz|iu+DXxr2mUY}6whVNX{g5Z?
zc|iD>J;1xErR#L4%12F9niqOg_8T)#&V+sX>cm$7kZ&WAmFjn*g*&KEpx&hs;`i>S
zsAU?XaibT0H2|UrKL{fccY$Dyl38{rYmWf8oJ#${LGys}WPxKYL|A#ahJ4zmW1I0(
z^5bm7^Lb&Y9WQ9>w)N(5iplpe9eY=Ic{Zp3X9z~X80`+aRK|8?NvmzGZeg+_6jg@2
z$1uwBc~?<yQiN?9(A^;Lz>iT$t6e+@cd}Nxy1Oeml7?iD?}KSmFWwe<cL0$5y{oqu
z=U>A)5`G@AqtuT)VAo?<Z5+srw*EX|_W}cUU->*Ur}Ct;`%G&zIal^F19d-`Wrt?u
z<bGc(oTmws1^N;OYw`f{gzYI-^|el?W|nmvsD<rW)ywUzm3FzdA!=J4eWh)zt>v~S
z757NyOPxQ*ImQ3yTd#=+J-0nWByqw@j1g6KKK}UD^($mI;zpCPEhdaddS05FNCM4m
z&)oLRZBJ#_XJskpwkP~i*WhwT$vR|)jDkk#$QGg6h~y26<W>=Xznld4Y0@)yBj#?z
z+>MyK5y>}Csuup!C@NohF3NLkgB;r+N4+ByygaYvDf3!>lm@zb!zx&XlQ-a$dKpfq
zDW5J)OXiyr)O>YQDL=fhsgx80Lj+D!k+wW{k`C*>ibb3Avt_7c51Y18%#l^Ofb}<?
zpz?Hv)v2dM7cxXvtvp@sWR?klCsir}I4gIOQX^Pc)5)HTn6B7atFEuDn`XIQMcfET
znbq=UXRA{-^v%uo)<zWwn&uoKqwh>~^!3b7eDlKQC!VJoyz-vJ2@cCM5XUnRUzg-8
z<up1kQ!W*lC+?5Zq^H;-d4lDiQG(^TuknH=Smqgsc?M#hftV=Hc?Kf#lEzCly<U(Z
zKRP>AJR^tZ-<*}Ao)Jyg(;4p1ni`)G?cdZ!Qu1Fk6))^W{o5DK4NS0v1hLU<dp6M7
zyUo6b!#jLjM|Xo`oKAT_$U8VBMC!15NRtoYz(wD4hMx><L*#DbSJiD?oS1zC+DDEs
z++&c7862Pk@95KkO$FJsYr-+?L7)M_>85V^!nK4=Vf&E{Sw7AQlk*{Yjgzxa0Skbs
zL<fFkgLvw!Qe8`^YA30i<D+ev>Nzq94=jNd;1iS%%H$E~$K--Yh9FJNP1M9iML$)r
zva#<E#NrbwS;tP=H>@|Uw;sV!dv3|bcb}?A;$D)D{Jb0f#q#4WpY6w;;O0ynxrHf?
z+|&YKJfJKqdHpyR;6$ieAw*ROF1={S;B{ktYpb)VmseK|{Hs%Km-V)}TCT2bZEV!*
z?bW&v<X8UA6du4i_|nPOQ$v4x@^;+!)03~NuAiQ~Nj*OuB&+B3DVp^0MJs&z!jz8-
z>zJP4Ol%bC9M=?sVxKC;9E!(M`&W^8^b2uSPopQ!{4Lk2RjRGpd{`>t)HPR`RV771
zPh9ge(2-5dsv~9MQ1#FI>dIZdPzrF)ITYPtW8drFjqc&FGhd9(-~(d2k5_RjAHFpm
z8?0zlE2!XCqXQPWhhJMrUGC|RRngx@m3I6-(wHge+r%kx-3f<{TBRxrmEx;7_l+ib
z!(Gx!MLwv3-gWHP5?eM`<cJYUtZb@$uw0QVSt<))28d~fV6Vk9%t<mhN&Nhm|LNK*
JoAWiO{Qp4^!&m?S

literal 0
HcmV?d00001

diff --git a/tests/client-sdk/fixtures/recorded_responses/invoke_tool.json b/tests/client-sdk/fixtures/recorded_responses/invoke_tool.json
new file mode 100644
index 000000000..d33838750
--- /dev/null
+++ b/tests/client-sdk/fixtures/recorded_responses/invoke_tool.json
@@ -0,0 +1,266 @@
+{
+  "()_[('kwargs', {'session_id': '<UUID>', 'code': \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n\\n# Convert date column to datetime\\ndf['date'] = pd.to_datetime(df['date'])\\n\\n# Group by year and calculate average inflation\\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\\n\\n# Plot time series\\nplt.figure(figsize=(10,6))\\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\\nplt.title('Average Yearly Inflation')\\nplt.xlabel('Year')\\nplt.ylabel('Average Inflation')\\nplt.grid(True)\\nplt.show()\"}), ('tool_name', 'code_interpreter')]": {
+    "type": "value",
+    "value": {
+      "content": "completed\n[stderr]\nTraceback (most recent call last):\n  line 5, in <module>\n    from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]",
+      "error_code": null,
+      "error_message": null,
+      "metadata": null
+    }
+  },
+  "()_[('kwargs', {'session_id': '<UUID>', 'code': 'def is_prime(n):\\n    if n <= 1:\\n        return False\\n    if n <= 3:\\n        return True\\n    if n % 2 == 0 or n % 3 == 0:\\n        return False\\n    i = 5\\n    while i * i <= n:\\n        if n % i == 0 or n % (i + 2) == 0:\\n            return False\\n        i += 6\\n    return True\\n\\ndef get_nth_prime(n):\\n    count = 0\\n    num = 2\\n    while True:\\n        if is_prime(num):\\n            count += 1\\n            if count == n:\\n                return num\\n        num += 1\\n\\nprint(get_nth_prime(100))'}), ('tool_name', 'code_interpreter')]": {
+    "type": "value",
+    "value": {
+      "content": "completed\n[stderr]\nTraceback (most recent call last):\n  line 5, in <module>\n    from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]",
+      "error_code": null,
+      "error_message": null,
+      "metadata": null
+    }
+  },
+  "()_[('kwargs', {'session_id': '<UUID>', 'code': 'import pandas as pd\\n# Load data\\ndf = pd.read_csv(\"<TEMP_FILE>\")\\n# Rows\\nprint(\"Number of rows and columns in the data:\", df.shape)\\n# Columns\\nprint(\"Columns of the data are:\", len(df.columns))\\n# Column names\\nprint(\"Columns of the data are:\", df.columns)\\n# Column dtypes\\nprint(\"Datatype of the columns are:\", df.dtypes)'}), ('tool_name', 'code_interpreter')]": {
+    "type": "value",
+    "value": {
+      "content": "completed\n[stderr]\nTraceback (most recent call last):\n  line 5, in <module>\n    from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]",
+      "error_code": null,
+      "error_message": null,
+      "metadata": null
+    }
+  },
+  "()_[('kwargs', {'session_id': '<UUID>', 'code': 'import pandas as pd\\ndf = pd.read_csv(\"<TEMP_FILE>\")\\nprint(df.head())'}), ('tool_name', 'code_interpreter')]": {
+    "type": "value",
+    "value": {
+      "content": "completed\n[stderr]\nTraceback (most recent call last):\n  line 5, in <module>\n    from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]",
+      "error_code": null,
+      "error_message": null,
+      "metadata": null
+    }
+  },
+  "()_[('kwargs', {'session_id': '<UUID>', 'code': 'import pandas as pd\\ndf = pd.read_csv(\"<TEMP_FILE>\")\\nprint(df.head())\\nprint(df.info())\\nprint(df.describe())'}), ('tool_name', 'code_interpreter')]": {
+    "type": "value",
+    "value": {
+      "content": "completed\n[stderr]\nTraceback (most recent call last):\n  line 5, in <module>\n    from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]",
+      "error_code": null,
+      "error_message": null,
+      "metadata": null
+    }
+  },
+  "()_[('kwargs', {'session_id': '<UUID>', 'query': 'How to use LoRA', 'vector_db_ids': ['vector_db_<UUID>']}), ('tool_name', 'knowledge_search')]": {
+    "type": "value",
+    "value": {
+      "content": [
+        {
+          "text": "knowledge_search tool found 5 chunks:\nBEGIN of knowledge_search tool results.\n",
+          "type": "text"
+        },
+        {
+          "text": "Result 1:\nDocument_id:cbc88\nContent: .. _lora_finetune_label:\n\n============================\nFine-Tuning Llama2 with LoRA\n============================\n\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\nIf you already know what LoRA is and want to get straight to running\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\n\n.. grid:: 2\n\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\n\n      * What LoRA is and how it saves memory during finetuning\n      * An overview of LoRA components in torchtune\n      * How to run a LoRA finetune using torchtune\n      * How to experiment with different LoRA configurations\n\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\n\n      * Be familiar with :ref:`torchtune<overview_label>`\n      * Make sure to :ref:`install torchtune<install_label>`\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\n\nWhat is LoRA?\n-------------\n\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\ntransformer models, in which case it is common to add the low-rank matrices\nto some of the linear projections in each transformer layer's self-attention.\n\n.. note::\n\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\n\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\nyou can expect to see memory savings due to a substantial reduction in the\nnumber of parameters with gradients. When using an optimizer with momentum,\nlike `AdamW <https://py\n",
+          "type": "text"
+        },
+        {
+          "text": "Result 2:\nDocument_id:cbc88\nContent: 06% of all params are trainable.\n\n.. note::\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\n    of in the recipe.\n\n\n.. _lora_recipe_label:\n\nLoRA finetuning recipe in torchtune\n-----------------------------------\n\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\n\n.. code-block:: bash\n\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\n\n.. note::\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\n    for more details on how you can easily clone and modify torchtune configs.\n\n.. note::\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\n    and (b) the memory constraints of your hardware.\n\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\n\n.. code-block:: yaml\n\n  # Model Arguments\n  model:\n    _component_: lora_llama2_7b\n    lora_attn_modules: ['q_proj', 'v_proj']\n    lora_rank: 8\n    lora_alpha: 16\n  ...\n\nWe see that the\n",
+          "type": "text"
+        },
+        {
+          "text": "Result 3:\nDocument_id:8892b\nContent:  with training with LoRA quickly,\njust specify any config with ``_lora`` in its name, e.g:\n\n.. code-block:: bash\n\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device\n\n\nThere are two sets of parameters to customize LoRA to suit your needs. Firstly, the parameters which control\nwhich linear layers LoRA should be applied to in the model:\n\n* ``lora_attn_modules: List[str]`` accepts a list of strings specifying which layers of the model to apply\n  LoRA to:\n\n  * ``q_proj`` applies LoRA to the query projection layer.\n  * ``k_proj`` applies LoRA to the key projection layer.\n  * ``v_proj`` applies LoRA to the value projection layer.\n  * ``output_proj`` applies LoRA to the attention output projection layer.\n\n  Whilst adding more layers to be fine-tuned may improve model accuracy,\n  this will come at the cost of increased memory usage and reduced training speed.\n\n* ``apply_lora_to_mlp: Bool`` applies LoRA to the MLP in each transformer layer.\n* ``apply_lora_to_output: Bool`` applies LoRA to the model's final output projection.\n  This is usually a projection to vocabulary space (e.g. in language models), but\n  other modelling tasks may have different projections - classifier models will project\n  to the number of classes, for example\n\n.. note::\n\n  Models which use tied embeddings (such as Gemma and Qwen2 1.5B and 0.5B) for the\n  final output projection do not support ``apply_lora_to_output``.\n\nThese are all specified under the ``model`` flag or config entry, i.e:\n\n.. code-block:: bash\n\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device  \\\n  model.apply_lora_to_mlp=True \\\n  model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\",\"output_proj\"]\n\n.. code-block:: yaml\n\n  model:\n    _component_: torchtune.models.llama3.lora_llama3_8b\n    apply_lora_to_mlp: True\n    model.lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\",\"output_proj\"]\n\nSecondly, parameters which control the scale of the impact of LoRA on the model:\n\n* ``lora_rank: int`` affects the scale of\n",
+          "type": "text"
+        },
+        {
+          "text": "Result 4:\nDocument_id:cbc88\nContent:  LoRA to Llama2 models\n------------------------------\n\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\n\n.. code-block:: python\n\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\n\n  # Build Llama2 without any LoRA layers\n  base_model = llama2_7b()\n\n  # The default settings for lora_llama2_7b will match those for llama2_7b\n  # We just need to define which layers we want LoRA applied to.\n  # Within each self-attention, we can choose from [\"q_proj\", \"k_proj\", \"v_proj\", and \"output_proj\"].\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\n  # layers outside of the self-attention.\n  lora_model = lora_llama2_7b(lora_attn_modules=[\"q_proj\", \"v_proj\"])\n\n.. note::\n\n    Calling :func:`lora_llama_2_7b <torchtune.models.llama2.lora_llama2_7b>` alone will not handle the definition of which parameters are trainable.\n    See :ref:`below<setting_trainable_params>` for how to do this.\n\nLet's inspect each of these models a bit more closely.\n\n.. code-block:: bash\n\n  # Print the first layer's self-attention in the usual Llama2 model\n  >>> print(base_model.layers[0].attn)\n  MultiHeadAttention(\n    (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\n    (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\n    (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\n    (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\n    (pos_embeddings): RotaryPositionalEmbeddings()\n  )\n\n  # Print the same for Llama2 with LoRA weights\n  >>> print(lora_model.layers[0].attn)\n  MultiHeadAttention(\n    (q_proj): LoRALinear(\n      (dropout): Dropout(p=0.0, inplace=False)\n     \n",
+          "type": "text"
+        },
+        {
+          "text": "Result 5:\nDocument_id:9dcb7\nContent: ora_finetune_label>`.\nFor more on QLoRA in torchtune, see our :ref:`QLoRA Tutorial <qlora_finetune_label>`.\n\nLet's take a look at how we can fine-tune Llama3-8B-Instruct with LoRA on a single device using torchtune. In this example, we will fine-tune\nfor one epoch on a common instruct dataset for illustrative purposes. The basic command for a single-device LoRA fine-tune is\n\n.. code-block:: bash\n\n    tune run lora_finetune_single_device --config llama3/8B_lora_single_device\n\n.. note::\n    To see a full list of recipes and their corresponding configs, simply run ``tune ls`` from the command line.\n\nWe can also add :ref:`command-line overrides <cli_override>` as needed, e.g.\n\n.. code-block:: bash\n\n    tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\n        checkpointer.checkpoint_dir=<checkpoint_dir> \\\n        tokenizer.path=<checkpoint_dir>/tokenizer.model \\\n        checkpointer.output_dir=<checkpoint_dir>\n\nThis will load the Llama3-8B-Instruct checkpoint and tokenizer from ``<checkpoint_dir>`` used in the :ref:`tune download <tune_download_label>` command above,\nthen save a final checkpoint in the same directory following the original format. For more details on the\ncheckpoint formats supported in torchtune, see our :ref:`checkpointing deep-dive <understand_checkpointer>`.\n\n.. note::\n    To see the full set of configurable parameters for this (and other) configs we can use :ref:`tune cp <tune_cp_cli_label>` to copy (and modify)\n    the default config. :ref:`tune cp <tune_cp_cli_label>` can be used with recipe scripts too, in case you want to make more custom changes\n    that cannot be achieved by directly modifying existing configurable parameters. For more on :ref:`tune cp <tune_cp_cli_label>` see the section on\n    :ref:`modifying configs <tune_cp_label>` in our \":ref:`finetune_llama_label`\" tutorial.\n\nOnce training is complete, the model checkpoints will be saved and their locations will be logged. For\nLoRA fine-tuning, the final checkpoint will contain the merged weights, and a copy of just the (much smaller) LoRA weights\nwill\n",
+          "type": "text"
+        },
+        {
+          "text": "END of knowledge_search tool results.\n",
+          "type": "text"
+        }
+      ],
+      "error_code": null,
+      "error_message": null,
+      "metadata": {
+        "document_ids": [
+          "cbc884b1-9d88-4d5c-aff4-7a4b3a56618c",
+          "cbc884b1-9d88-4d5c-aff4-7a4b3a56618c",
+          "8892b092-6394-471e-b143-a23c6cc374f8",
+          "cbc884b1-9d88-4d5c-aff4-7a4b3a56618c",
+          "9dcb747d-0627-40cc-a23c-0bee2b6b05af"
+        ]
+      }
+    }
+  },
+  "()_[('kwargs', {'session_id': '<UUID>', 'query': 'Llama3-8B attention type', 'vector_db_ids': ['test-vector-db-<UUID>']}), ('tool_name', 'knowledge_search')]": {
+    "type": "value",
+    "value": {
+      "content": [
+        {
+          "text": "knowledge_search tool found 5 chunks:\nBEGIN of knowledge_search tool results.\n",
+          "type": "text"
+        },
+        {
+          "text": "Result 1:\nDocument_id:num-1\nContent:  3 <https://llama.meta.com/llama3>`_ is a new family of models released by Meta AI that improves upon the performance of the Llama2 family\nof models across a `range of different benchmarks <https://huggingface.co/meta-llama/Meta-Llama-3-8B#base-pretrained-models>`_.\nCurrently there are two different sizes of Meta Llama 3: 8B and 70B. In this tutorial we will focus on the 8B size model.\nThere are a few main changes between Llama2-7B and Llama3-8B models:\n\n- Llama3-8B uses `grouped-query attention <https://arxiv.org/abs/2305.13245>`_ instead of the standard multi-head attention from Llama2-7B\n- Llama3-8B has a larger vocab size (128,256 instead of 32,000 from Llama2 models)\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken <https://github.com/openai/tiktoken>`_ instead of `sentencepiece <https://github.com/google/sentencepiece>`_)\n- Llama3-\n",
+          "type": "text"
+        },
+        {
+          "text": "Result 2:\nDocument_id:num-1\nContent:  instead of 32,000 from Llama2 models)\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken <https://github.com/openai/tiktoken>`_ instead of `sentencepiece <https://github.com/google/sentencepiece>`_)\n- Llama3-8B uses a larger intermediate dimension in its MLP layers than Llama2-7B\n- Llama3-8B uses a higher base value to calculate theta in its `rotary positional embeddings <https://arxiv.org/abs/2104.09864>`_\n\n|\n\nGetting access to Llama3-8B-Instruct\n------------------------------------\n\nFor this tutorial, we will be using the instruction-tuned version of Llama3-8B. First, let's download the model from Hugging Face. You will need to follow the instructions\non the `official Meta page <https://github.com/meta-llama/llama3/blob/main/README.md>`_ to gain access to the model.\nNext, make sure you grab your Hugging Face token from `here <https://huggingface.co/settings/tokens>`_.\n\n\n.. code-block:: bash\n\n    tune download meta-llama/Meta-Llama-3\n",
+          "type": "text"
+        },
+        {
+          "text": "Result 3:\nDocument_id:num-0\nContent: :`download Llama3 Instruct weights <llama3_label>`\n\n\nTemplate changes from Llama2 to Llama3\n--------------------------------------\n\nThe Llama2 chat model requires a specific template when prompting the pre-trained\nmodel. Since the chat model was pretrained with this prompt template, if you want to run\ninference on the model, you'll need to use the same template for optimal performance\non chat data. Otherwise, the model will just perform standard text completion, which\nmay or may not align with your intended use case.\n\nFrom the `official Llama2 prompt\ntemplate guide <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-2>`_\nfor the Llama2 chat model, we can see that special tags are added:\n\n.. code-block:: text\n\n    <s>[INST] <<SYS>>\n    You are a helpful, respectful, and honest assistant.\n    <</SYS>>\n\n    Hi! I am a human. [/INST] Hello there! Nice to meet you! I'm Meta AI, your friendly AI assistant </s>\n\nLlama3 Instruct `overhauled <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3>`\n",
+          "type": "text"
+        },
+        {
+          "text": "Result 4:\nDocument_id:num-0\nContent: 'm Meta AI, your friendly AI assistant<|eot_id|>\n\nThe tags are entirely different, and they are actually encoded differently than in\nLlama2. Let's walk through tokenizing an example with the Llama2 template and the\nLlama3 template to understand how.\n\n.. note::\n    The Llama3 Base model uses a `different prompt template\n    <https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3>`_ than Llama3 Instruct\n    because it has not yet been instruct tuned and the extra special tokens are untrained. If you\n    are running inference on the Llama3 Base model without fine-tuning we recommend the base\n    template for optimal performance. Generally, for instruct and chat data, we recommend using\n    Llama3 Instruct with its prompt template. The rest of this tutorial assumes you are using\n    Llama3 Instruct.\n\n.. _prompt_template_vs_special_tokens:\n\nTokenizing prompt templates & special tokens\n--------------------------------------------\n\nLet's say I have a sample of a single user-assistant turn accompanied with a system\nprompt:\n\n.. code-block:: python\n\n    sample = [\n        {\n            \"role\": \"system\",\n            \"\n",
+          "type": "text"
+        },
+        {
+          "text": "Result 5:\nDocument_id:num-3\nContent:  LoRA to Llama2 models\n------------------------------\n\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\n\n.. code-block:: python\n\n  from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\n\n  # Build Llama2 without any LoRA layers\n  base_model = llama2_7b()\n\n  # The default settings for lora_llama2_7b will match those for llama2_7b\n  # We just need to define which layers we want LoRA applied to.\n  # Within each self-attention, we can choose from [\"q_proj\", \"k_proj\", \"v_proj\", and \"output_proj\"].\n  # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\n  # layers outside of the self-attention.\n  lora_model = lora_llama2_7b(lora_attn_modules=[\"q_proj\", \"v_proj\"])\n\n.. note::\n\n    Calling :func:`lora_llama_2\n",
+          "type": "text"
+        },
+        {
+          "text": "END of knowledge_search tool results.\n",
+          "type": "text"
+        }
+      ],
+      "error_code": null,
+      "error_message": null,
+      "metadata": {
+        "document_ids": [
+          "num-1",
+          "num-1",
+          "num-0",
+          "num-0",
+          "num-3"
+        ]
+      }
+    }
+  },
+  "()_[('kwargs', {'session_id': '<UUID>', 'query': 'NBA creation date', 'vector_db_ids': ['test-vector-db-<UUID>']}), ('tool_name', 'knowledge_search')]": {
+    "type": "value",
+    "value": {
+      "content": [
+        {
+          "text": "knowledge_search tool found 3 chunks:\nBEGIN of knowledge_search tool results.\n",
+          "type": "text"
+        },
+        {
+          "text": "Result 1:\nDocument_id:nba_w\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\n",
+          "type": "text"
+        },
+        {
+          "text": "Result 2:\nDocument_id:perpl\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\n\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\n    Konwinski was among the founding team at Databricks.\n    Yarats, the CTO, was an AI research scientist at Meta.\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n",
+          "type": "text"
+        },
+        {
+          "text": "Result 3:\nDocument_id:perpl\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n",
+          "type": "text"
+        },
+        {
+          "text": "END of knowledge_search tool results.\n",
+          "type": "text"
+        }
+      ],
+      "error_code": null,
+      "error_message": null,
+      "metadata": {
+        "document_ids": [
+          "nba_wiki",
+          "perplexity_wiki",
+          "perplexity_wiki"
+        ]
+      }
+    }
+  },
+  "()_[('kwargs', {'session_id': '<UUID>', 'query': 'Perplexity company founding date', 'vector_db_ids': ['test-vector-db-<UUID>']}), ('tool_name', 'knowledge_search')]": {
+    "type": "value",
+    "value": {
+      "content": [
+        {
+          "text": "knowledge_search tool found 3 chunks:\nBEGIN of knowledge_search tool results.\n",
+          "type": "text"
+        },
+        {
+          "text": "Result 1:\nDocument_id:perpl\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\n\n    Srinivas, the CEO, worked at OpenAI as an AI researcher.\n    Konwinski was among the founding team at Databricks.\n    Yarats, the CTO, was an AI research scientist at Meta.\n    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n",
+          "type": "text"
+        },
+        {
+          "text": "Result 2:\nDocument_id:perpl\nContent:  Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n",
+          "type": "text"
+        },
+        {
+          "text": "Result 3:\nDocument_id:nba_w\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\n",
+          "type": "text"
+        },
+        {
+          "text": "END of knowledge_search tool results.\n",
+          "type": "text"
+        }
+      ],
+      "error_code": null,
+      "error_message": null,
+      "metadata": {
+        "document_ids": [
+          "perplexity_wiki",
+          "perplexity_wiki",
+          "nba_wiki"
+        ]
+      }
+    }
+  },
+  "()_[('kwargs', {'session_id': '<UUID>', 'query': 'Torchtune documentation', 'vector_db_ids': ['vector_db_<UUID>']}), ('tool_name', 'knowledge_search')]": {
+    "type": "value",
+    "value": {
+      "content": [
+        {
+          "text": "knowledge_search tool found 5 chunks:\nBEGIN of knowledge_search tool results.\n",
+          "type": "text"
+        },
+        {
+          "text": "Result 1:\nDocument_id:f4fd3\nContent:  conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\nlook like so:\n\n.. code-block:: python\n\n    from torchtune.datasets import chat_dataset\n    from torchtune.models.llama3 import llama3_tokenizer\n\n    tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\n    ds = chat_dataset(\n        tokenizer=tokenizer,\n        source=\"json\",\n        data_files=\"data/my_data.json\",\n        split=\"train\",\n        conversation_column=\"dialogue\",\n        conversation_style=\"sharegpt\",\n    )\n\n.. code-block:: yaml\n\n    # In config\n    tokenizer:\n      _component_: torchtune.models.llama3.llama3_tokenizer\n      path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\n\n    dataset:\n      _component_: torchtune.datasets.chat_dataset\n      source: json\n      data_files: data/my_data.json\n      split: train\n      conversation_column: dialogue\n      conversation_style: sharegpt\n\n.. note::\n    You can pass in any keyword argument for `load_dataset <https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset>`_ into all our\n    Dataset classes and they will honor them. This is useful for common parameters\n    such as specifying the data split with :code:`split` or configuration with\n    :code:`name`\n\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\nall messages according to their `recommendations <https://\n",
+          "type": "text"
+        },
+        {
+          "text": "Result 2:\nDocument_id:cbc88\nContent: .. _lora_finetune_label:\n\n============================\nFine-Tuning Llama2 with LoRA\n============================\n\nThis guide will teach you about `LoRA <https://arxiv.org/abs/2106.09685>`_, a parameter-efficient finetuning technique,\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\nIf you already know what LoRA is and want to get straight to running\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune<lora_recipe_label>`.\n\n.. grid:: 2\n\n    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\n\n      * What LoRA is and how it saves memory during finetuning\n      * An overview of LoRA components in torchtune\n      * How to run a LoRA finetune using torchtune\n      * How to experiment with different LoRA configurations\n\n    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\n\n      * Be familiar with :ref:`torchtune<overview_label>`\n      * Make sure to :ref:`install torchtune<install_label>`\n      * Make sure you have downloaded the :ref:`Llama2-7B model weights<download_llama_label>`\n\nWhat is LoRA?\n-------------\n\n`LoRA <https://arxiv.org/abs/2106.09685>`_ is an adapter-based method for\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\ntransformer models, in which case it is common to add the low-rank matrices\nto some of the linear projections in each transformer layer's self-attention.\n\n.. note::\n\n    If you're unfamiliar, check out these references for the `definition of rank <https://en.wikipedia.org/wiki/Rank_(linear_algebra)>`_\n    and discussion of `low-rank approximations <https://en.wikipedia.org/wiki/Low-rank_approximation>`_.\n\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\nyou can expect to see memory savings due to a substantial reduction in the\nnumber of parameters with gradients. When using an optimizer with momentum,\nlike `AdamW <https://py\n",
+          "type": "text"
+        },
+        {
+          "text": "Result 3:\nDocument_id:8892b\nContent: ` module, which we swap\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\n\n.. _glossary_distrib:\n\n\n.. TODO\n\n.. Distributed\n.. -----------\n\n.. .. _glossary_fsdp:\n\n.. Fully Sharded Data Parallel (FSDP)\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\n.. .. _glossary_fsdp2:\n\n",
+          "type": "text"
+        },
+        {
+          "text": "Result 4:\nDocument_id:cbc88\nContent: 06% of all params are trainable.\n\n.. note::\n    If you are directly using the LoRA recipe (as detailed :ref:`here<lora_recipe_label>`), you need only pass the\n    relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\n    of in the recipe.\n\n\n.. _lora_recipe_label:\n\nLoRA finetuning recipe in torchtune\n-----------------------------------\n\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe <https://github.com/pytorch/torchtune/blob/48626d19d2108f92c749411fbd5f0ff140023a25/recipes/lora_finetune.py>`_.\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions<download_llama_label>`.\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\n\n.. code-block:: bash\n\n    tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\n\n.. note::\n    Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\n    either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\n    or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\n    for more details on how you can easily clone and modify torchtune configs.\n\n.. note::\n    You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\n    and (b) the memory constraints of your hardware.\n\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\n\n.. code-block:: yaml\n\n  # Model Arguments\n  model:\n    _component_: lora_llama2_7b\n    lora_attn_modules: ['q_proj', 'v_proj']\n    lora_rank: 8\n    lora_alpha: 16\n  ...\n\nWe see that the\n",
+          "type": "text"
+        },
+        {
+          "text": "Result 5:\nDocument_id:8892b\nContent: etune\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\n\n.. code-block:: bash\n\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\n  model.use_dora=True\n\n.. code-block:: yaml\n\n  model:\n    _component_: torchtune.models.lora_llama3_8b\n    use_dora: True\n\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA <glossary_lora>` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\neven more memory savings!\n\n.. code-block:: bash\n\n  tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\n  model.apply_lora_to_mlp=True \\\n  model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\n  model.lora_rank=16 \\\n  model.lora_alpha=32 \\\n  model.use_dora=True \\\n  model.quantize_base=True\n\n.. code-block:: yaml\n\n  model:\n    _component_: torchtune.models.lora_llama3_8b\n    apply_lora_to_mlp: True\n    lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\n    lora_rank: 16\n    lora_alpha: 32\n    use_dora: True\n    quantize_base: True\n\n\n.. note::\n\n   Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\n   out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\n\n.. _glossary_distrib:\n\n\n.. TODO\n\n.. Distributed\n.. -----------\n\n.. .. _glossary_fsdp:\n\n.. Fully Sharded Data Parallel (FSDP)\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n.. All our ``_distributed`` recipes use `FSDP <https://pytorch.org/docs/stable/fsdp.html>`.\n.. .. _glossary_fsdp2:\n\n",
+          "type": "text"
+        },
+        {
+          "text": "END of knowledge_search tool results.\n",
+          "type": "text"
+        }
+      ],
+      "error_code": null,
+      "error_message": null,
+      "metadata": {
+        "document_ids": [
+          "f4fd30bb-23d3-4ff8-bb8a-846041ae22cf",
+          "cbc884b1-9d88-4d5c-aff4-7a4b3a56618c",
+          "8892b092-6394-471e-b143-a23c6cc374f8",
+          "cbc884b1-9d88-4d5c-aff4-7a4b3a56618c",
+          "8892b092-6394-471e-b143-a23c6cc374f8"
+        ]
+      }
+    }
+  },
+  "()_[('kwargs', {'session_id': '<UUID>', 'query': 'current CEO of Meta'}), ('tool_name', 'web_search')]": {
+    "type": "value",
+    "value": {
+      "content": "{\"query\": \"current CEO of Meta\", \"top_k\": [{\"title\": \"Executives - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/\", \"content\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer Joel Kaplan, Chief Global Affairs Officer Susan Li, Chief Financial Officer Javier Olivan, Chief Operating Officer Chris Cox, Chief Product Officer Andrew \\u2018Boz\\u2019 Bosworth, Chief Technology Officer Jennifer Newstead, Chief Legal Officer Dave Wehner, Chief Strategy Officer Will Cathcart, Head of WhatsApp Naomi Gleit, Head of Product John Hegeman, Chief Revenue Officer Adam Mosseri, Head of Instagram Erin Egan, Chief Privacy Officer, Policy Michel Protti, Chief Privacy Officer, Product Alex Schultz, Chief Marketing Officer and VP of Analytics Tom Alison, Head of Facebook Nicola Mendelsohn, Head of Global Business Group Ahmad Al-Dahle, VP and Head of GenAI at Meta Joelle Pineau, Vice President of AI Research and Head of FAIR at Meta\", \"score\": 0.8190992, \"raw_content\": null}, {\"title\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/mark-zuckerberg/\", \"content\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer | Meta Meta Quest Ray-Ban Meta Meta Horizon Meta AI Meta Verified Meta Pay Meta Horizon Workrooms Meta and you Learn about our community Shop Meta Meta Quest Meta Portal Meta Horizon Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. In October 2021, Facebook rebranded to Meta to reflect all of its products and services across its family of apps and a focus on developing social experiences for the metaverse \\u2014 moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. Shop Ray-Ban Meta glassesRay-Ban StoriesPrivacy informationSupported countries \\u00a9 2025 Meta\", \"score\": 0.79099923, \"raw_content\": null}, {\"title\": \"Zuckerberg's political pivot targets Apple, puts Meta staffers on edge\", \"url\": \"https://www.cnbc.com/2025/02/14/zuckerbergs-rightward-policy-shift-hits-meta-staffers-targets-apple.html\", \"content\": \"Meta CEO Mark Zuckerberg's actions to curry favor with the president have rattled employees, but people familiar with his efforts say there's a clear strategy.\", \"score\": 0.77179235, \"raw_content\": null}, {\"title\": \"Meet the Executive CSuite Team of Meta (Facebook) [2025]\", \"url\": \"https://digitaldefynd.com/IQ/meet-the-executive-csuite-team-of-meta-facebook/\", \"content\": \"Harvard University Executive Programs Free Harvard University Courses As a chief financial officer of Meta, Susan Li oversees the firm\\u2019s finance and facilities team to keep track of the company\\u2019s overall financial health. The chief operating officer of Meta, Javier Olivan, oversees the firm\\u2019s business team, infrastructure, and other products. Andrew Bosworth, called Boz, serves as chief technology officer at Meta and is responsible for leading the firm\\u2019s AR/VR organization, Reality Labs. Andrew has also served as engineering director to oversee events, mobile monetization, and feed ads and as VP of ads and business platforms to lead engineering, design, analytics, and product teams. Meta\\u2019s c-suite team comprises experienced and diverse executives, having extensive experience in technology, finance, legal, and all major industries.\", \"score\": 0.7602419, \"raw_content\": null}, {\"title\": \"Meta to spend up to $65 billion this year to power AI goals, Zuckerberg ...\", \"url\": \"https://www.reuters.com/technology/meta-invest-up-65-bln-capital-expenditure-this-year-2025-01-24/\", \"content\": \"Meta Platforms plans to spend as much as $65 billion this year to expand its AI infrastructure, CEO Mark Zuckerberg said on Friday, aiming to bolster the company's position against rivals OpenAI\", \"score\": 0.73914057, \"raw_content\": null}]}",
+      "error_code": null,
+      "error_message": null,
+      "metadata": null
+    }
+  }
+}
diff --git a/tests/client-sdk/fixtures/recorded_responses/invoke_tool.pickle b/tests/client-sdk/fixtures/recorded_responses/invoke_tool.pickle
new file mode 100644
index 0000000000000000000000000000000000000000..98bc17a841cd1e39d282a779287ad05865a3781c
GIT binary patch
literal 35821
zcmeHwU2_{(nw}GTGcMF5v%57_xyi*Ddb|WD00@E<MR7?b)R#@KM9C68vR77s2G9W7
zY;?oj4Z`ruq_S1H*(|GetHLA~N%n3xzaW1hH~AO2m|W#Q<ayrrobColN%Yt|Gc`la
zSOOa7^Zouhr~lnM|LOny)m!rC7w6TpL3OSDqPluA^O9k@S~EXeP5m?t;;0?;S9i_o
z-Y1_N-n)$-R==G3$()aR(<Jeu%-p^I*u(?#$j`jhU#!(kbv29QupN10pY3OUx1IW4
z(i^R=y*%e!?_~3de}47{r(QVq&qwc^{fjX4#$G$kyxvK}n*?ctBiO&s&whA}Z->!o
z-19Q5=&7Gh!|eP&p0E9d4EJuk-4A+MyDi_o*NdYJ%R2wG|AV*Q`q_#scV*YCT(aDi
znpw%>N&5u9zxdfo7G$B%ruV<{d($j9^;1)qeX-#*3HjS7%O>gW`nuPRr&(i+O*eY+
zczx{m1Ft^x!Vu6|_wxbP+0`zI-5+_$iTU$s@5E2Kelo0?2k|uO`$^5*9eF`A_9Ej&
zeS8W0fyvil9uEdV&ri%pu^*a0^CqDeu?HJ{5XN0EGzWtL`lorzXVVm8Jq%p42SMaT
zy#OuqvAoj&Lp~0J(_*y86F&izqha3c?kEXTb2t9Vjr1gm`_o>QH#ms;i9a)+Pg~86
zojdW@^5K@b6Q{E{$wsc*vELg-aTpKhdC!mhC<+Ex!;wErGvDjGwhw(kGH-UzJN3<T
ze-tU0xansZR_T|9dLD$Kx$9-4o|j}b^C1Q!Q9d7eS$Z&;m?JM92Y{6ylzw$P`6wPm
z=rZ&Hez%0D{;3~L{d^GzeQ#_Y#cAp%fT<h*FapsI6Z~{P2_kcUSPbwa0bG0eW^3k2
z90vIQDCmuVM9iIKId(2Kwm?1z{jbck-Ut-`wObl#$X~z_N#>_dxZ#7y3+GwTOU-dS
zHV0vl#@Lcu?gOvqcjNfP90k2N^g!F7y)ec0i#C>??o87lf(S4lB=K}&4n|`@^dPL?
z^G2ax<7}MN4g7&09UK}j6K)YI4Sn+j19(%k1?ghHej4;CFSyD>11=V!urSPngTtq8
z6f#cQixYBWv$3;rt9k2I3(pd7)-D(-nug&oAg&7L{gxc|tr>m{?9{)`Is7{o06$j<
zQ`nl1r+%85r{27N2dplS<gX9oB=|bEPXKCp_*2mBAn^P8?TH6M<QT5)^Ef$4;&`0u
zW8!umPt8N%2oj01&cxHiK-7(=QIO5evr#<Jwla>cMLs?O*MoZ(hF?O8fkeqhz8N^-
z*IN*Ng+i&B*$Dh%&=#8-LWYIm+;;05FEu$ugZo;|=2pWTM&@xZiy>X{ptS)B>xe$_
zy9qW17-X?75C10qAoL;4Ffb`I$Wk*=-b;lv!SAP_Uwr8$;C^W^@Wug_f#%+1qRl)k
zsy9syz}|=C2;&K5Xc~*e^}m8t4g8414B|w#J_htp{Ur5OaBi7#e9A6ee;xz)t$QZz
zCB7d4P%)Hp-vr|^nnNj>@^E2r;u~)|9D}p4G2-nsNU|vqkoaCmOktA{I1|&I1|hZ%
zP)7b&nek8K5K0aU2I%ZUvs^|u6u}F`H4Ht7ztlZ@mH{Px>O@x%VQXX2(9fom35Wpe
z=>fqRTVa>Yrgw`dxgoG$Wbd2g?pv+REAV%rJXe7#$W%x>>~<2I#+k`rzW6{PL_Vd|
zWQxy1h!E2QXc|F~-yiyyX!O}^*62mup6GL~bG_MG-`H9&csZ?u21gmuQRmFPc|9Ek
zgRDLR+0=>Ax|^qNS5gOsgnna`jl*S2fb2)xX^A(mE3Zeq5c7ss3kcpj1=-AkY$W)b
z<g#=`)d*Dx!=-Qh@g$7rzMs~B>&#627*nG!h&AwR_YpYH9{^JTObQ_Ya!7nmX?h`e
z-K18rHkR;nbK@q^bK?s5dF1=R1E{R9UhY1d2AOY;edtLrjUc4z*mBLhAh2Ivf{{Kr
z9-^f059U!{&~o@OEG<8)WBht<WYv2q$FIYJ7}w*0B4S{<ep&v0=q0CQr%xiV68H%W
zQY;ionaUhu1*~T-X?hnz5G-I01Y%-;4RXUOwnm!WV$C=k%*4X!=tsB=VCJcf1Zg0h
z!NK5LtQd&O$Sm9mv47(G69bI&P6#)4h5(*;(cJatz?59YB1Yi)Y}7EvY%B|mbCd4E
z0zX<b0bjYyt}_O>uo?u$An`ycumYzE*o>=>vAQG|oedZwViguvAefXy3InlbC{(dc
z$ZuU@A$PD(wRD{=!LdQJp!f+`BIuH$!3!YPKE>rSKo6d-|MV$z1q`v^YY|2@m>Mbw
zA9~$1U)TtfdoX=;Euu^O2nxoBq{g`YAc0n(td1S_ATc3i!Cd3G8(<mZID!@CCJ_w!
z6v2HAj74L7HObtA9Eh;qGs;R~aBjquX0Abhq`?pdndeM71+djhLL1lynaEK=uPzLr
zWJ3yp#SZWmdO{ULUnrjX!?`y#0|CPALLoraEGif3g#s}(N2@i!9@e=oinKTOz5twp
zs6PcsfO?lj$9A)|wQ=i8k^>SE@J=T%OAU-peE#wF4FhP0)KeLS*PK!cKTP5o=ndA*
zFa{cHC7;SP8nT2XhmGPr@uwM>UPMW;HHZ{Q?jQn5X7%Z$zI~(K4WoJwo+zXb<P6sn
zkklb}g0#*h)G4#-&5e3%YgxjGczcrLn7*1|TDJwV7*Ao*;NMs74KoVaQ;{8B;I>P2
z21I!P7K|Xbg1$EgZi2CJM{K%qq_O9+@DRNT$w!!aLoa|2W9Y1e@E)<%gTp1+zj<q8
zt9j$*<rsYVi}SO8>L*E@w8wB4yrF-7bXL&^y|~YB?-IU!FZ0fire}ZLZcpZYFM`%?
zgX$6we*4kcKQBFnl^XU_#0==t|Mhuw^!>j%`(f#KQ1>aEKlYn;r=K4G<G0?ra`1vH
zV(bDc1yZGrp~ikSTHF0RGWZh=Ol0=<&4&HXXNeEQ1F(ROzu0Q?Vyok1T5j~dX_@_f
z(}X<2!0($)`F62ed2q9DZs<I)d?955^Ar4sMMb54ve5VOygYg}F#poD)|ThEd=q?%
z_WNdAC$rns4SWtGd0=2jwWDmb3?wn>u*#-(jHY9JX_c4EK^7NVfO<N1z~&10N5{iT
zHgpVqD#)-?E$-d@ZdZ)?i+)^;j`Vp1+(ub-aT6QO=Gxk2K`PQk8}0oBwiW)ip{mi_
zqj&JX?`foIR6+D=^!>B{K@l6GS-<ik%@DjvURI8wr2Q@xb#)w*pZp$uT`D~TvpHMa
ztr!zRy%^X~5UhJ+1W`i&Hl7+YNZ_k?XNfm~WJ-YZG0kTP)*z}pl6FUNCV`^+WVBs#
z6hr&4zhRWEZ&r)WtCbh)c2Vb}e|9wb!O`dskEWwPIvV}4^3v#s!Zv^VkA4NVu?wSg
zqWXmW-Typ#&peEw!^q$uoB^m|(ubt=`t4qNT3y*YzW?Y+`@!ME`?put*!pQaOC5Sv
zj;3RJYOrn+JQl@?$i#FEkxBIfBZlMdLL&7CjdbKqd>P}ew#kRF-<_4jKE_LY_6z-}
zioSN#wNiHhUa^$dj8W>(9cW9_XB@NWe-D%LiCcu7(X9hxXoqV86axJESCHE)ml9yk
zE*bovEDXAh$()~rK*Q|w=uI@b3WFB<8v3vSl3fLfD3)OnfDJ)Rw7W@^G(7k<gW4di
zzgXqB)t5^1S=@F%g5un7b~Fv6b?0ipP`yJo^rn~=)<3QIA^6@c<MYTqf=se2!xW&P
zA%k(<d1-hMRf7h}+KXZrt1l6l_M)nt`UzpdCDYc0fd;M#v&JA8!lJ3-pEUT|->+^o
zYun@w{zm9kFD#(}8_!1=_~H)|$){z^$Yip=8WV;bNJ8q>)dRc1&#+JgP7d9sT(hr2
zuj_}^RsQat&FwSS_d=&3Y@I4~jh!nU#k1<#$|Wg?-#h7k_TR5;5#{Nehs?oj)_3k8
z0Lui5Slke2%;B7hFVyaLkqt^=KfPGZXe;aEdcRw@>7kdG0RJS4XK?(6{$dvBQq}qr
z*`fC@B!d`qBx`D7r!7+V{OkwEFdy&gv%}0E=LsQI({Cfqga+rM?X#ohC4h}%_{<DG
znj3IIr_o8eTe)-pgTo^V@5`IR14R%%ZB)(+ZMyw8qD@DwvVrjSXnVB%>0iD5)?0{i
zqYKQ*$~^=J8KMLHc44H{H!61Jy9QR_o=Ymp){CT)eBHd=Y10Gt5Vo6XG)F>@FaQv`
zhoJ!vGKCEQ(ITG5!6DQpyk|(o1P*-)M@oDr9|566LUf2jIvpiQZ`n~R*t;Fo20J~|
zLBtf23BU9M2GD5KbP>r!Iv_boi<OV2LpaYvMqPj?^{8a|>s(Y_=Bbmq-s@tuby`^o
z%p1R7*BP<NM&+(%6d@rPOn_X(f7uKoH=mlK3NbtzLy}R==B^>p0+%<NcO?ERj$$@t
zd=intnUBaBV<av>OREAsIF|0WQ8~`%Lm(Gmrs)O3;NDa0H=7|6YZO@Zn>v|%6MoSh
z>{cpuZ6Xh+^uRkJudzwtvJzwE826H|g3|`#6-X$i>#fb^jmE}iYwLy}9gdmJV{yp{
zEyC<V=xPk23hD^-z^%_&Rcy$7g$qlOPzS6~ghms?ENWNxUfpQz)LJ*Tp?C6$He0o3
zv#A!Tj%G<>ZDB~+omWCk7N7WtxQL_4yd#B4%TZIclLaTzq97PV_D0h#Xl1;P09E7#
z>#ohx);lR^j<NGe0Gs;K0fuor4E^<mh8U%^<SR1m|0|fb^?IiL-3J*s#A4Y}j<`pX
z=Z4ABhdF_O5hM#3g-Me~51)X<<_xhz{5T2#vs^|uBZSdGDij4qYzG=vg*^mnhy88`
z?jTX>AqGB)HDm#f_Te-18U8B?pe07yKq{ztYiD~4bWo}Myi)nVN4gb}F0Y3yjKpJd
z2Cg%U%V1`ueuY1k$^(W48H;iv23aaAW4b<oCXp8~ENO(p0!sC43R!1h1_{g{!1CD}
z<^l5d$V$VbL}aEP&!Uk26j@#*i!$SfDqtm>)i9qSHzz_3@k&W&4dO72XVR%OZCbIS
zzawE)N<+~R6Iz?O3@WiABQ^Dq%)2gsZ?1PyUa(Gaxc>D1!M#WK8{<A16!>D~s07Y@
zy^YEd6fccIq^45Xn373ET)oQ=i7D>{<3Mic4m%=)^HnWr6oi9_zjR%Cq*z0vQmHf=
zu*~~D;&pNFWOtVoHL8eDO@J4`EbXhTkFF>dw%^Hh`Q}Bj&@6QMZYM{d;@3FR0);Sj
z1cFt#w#B-=-N7D?nYR%i!|I!oC?}kCq$|;8y^i5PH`1j=1%@aB?F5(!UrrIyrWt`a
zWDlv}9)i1e;@Jo|fVD852#XMtP|$U&p(`>15`qB@2mV+Zcg7$<uFPGE3cR7i<#QnD
zF%UYKA@s*xCDW(^=Y|#q%@OtWWu_Y2twLct+J+dCX;Z{H@&P3a5M#k(CjSb{h7>RB
zgDd7W%ww!KnIRlrD>0OC&KC$*yN-o!rOhZw7H}i@*5;opW2DN!_53HIV;LesubUN;
zrf8JJGMCzeR0C~3U{Ht9DtOPLML|=^!9Sd$Frr{bA&|ld?8m)yT>yiKbka}jumWH|
z5m|LH=Q8}9vu(XafrsZZ^#wW*N!5dRPLbadGf}7kAVe7ph=WE~9~p8OGs~<+$S7;=
zrMF)k9z8pLY4-M>efI3O2C*oTq`zdF2UBDSn6!cCAfIWUjgT<`(2+$BXv|~{;ncmo
zbsa^%eHi@6pps+^LLE&}kJB(O)^)NEeTX=2EAfBC0v2olVK8PJfdh0}MSTwS#=)Ub
zPy(rfoO>VEBuwIbPP4b3-mX*xGMfc^c9{Dbc?hZXzbj0@aKEB_`LBh1*?O&fx!M-@
ze(uNgG=7eK9)kdL)Y7Yizc4TCHqGJyrgOy|5_%phEWnfy8wlz-%Uf)6&<!GzN>mNh
zhEQM4yzm4_L0n>p<g{9WqGux{l|05tolwZdQcn1!3C!(~kv|7sr6^7;UQo>p>;!TI
z=lQ#gqZ-L>S`^-)5urlP+AE!W)yq;y>FpKX);DA{@yiNRr=TU7r0e%Q3WFd+iY*nP
zF$bk~=|?yP4f}ycfrSEdaEX=9<VS*1Ig%w5(6xRa&W#9BnSs0pAsP|b7X;}=*syqj
zRXoTPL$afW2i;(`AWjYMn)x;$LJTq)JVym3$t6;@VNmfUsb?gc*e2f-cazIs5jVvY
zVj9W#%j_qm4Yu|Mf-RTw4Z$?zh?I>j7+qv(_|(V<TEX+m(Up3T$Dqt?yREfPkz=AN
z^uo3R8$$26AmAkhmYP3N&6(roiW*r>N=O`G7uH-HGV2OKi>#gS2dqJS2139qpqZpz
z2rJ1w!DPkCk<bRQ2%yk<4v%eI(KTKotS56|J5q6NXW2I|1oUK|pXHzUZ6%4d?o~%!
zsV)9`MIrY0Z|6qRjf+BTvoMm7E_ixC*{y=aI*D(9W~K66s$XK%5tD6lMTX!9Yq3~n
zGhZIr0t@vXfsZ<mg&+o^NfH5-urH!Iik^%%t7k&iNWyl4=;1=qRxSC(C4=bkt6h-A
z45**8lGC_NCA|d`wLp#7dHDbhkzr{gRM*)Cwe?&3W*7cJjAUv4ow$9^+>x3t3rIIH
z>O&L~>W`rz9j&&?$bFL!QC-tPs3VZ@gYwH@7pR#nOuaZGHU^^%6<)|*V}}rKWc++I
zS<iFdh+XAOHw??1mlp9^_mI94jX)s-;*E5j#WV)qGsH5$>U)?Z^}|6uk8tD|?~P)a
zoc7g=l`qNHUoc~Rl7BjNpD2u0fD6=t>7$o+7Fua=UbOxL773}RE!#y9e;iKs8CRyo
zxA=<=r9Xl$7Ku(PV3_0c64{I0FFLXEmE=bmySOW^N9N73vw-EQ{010AL?5h^bicf1
zyU~|GWT781qCqJ%Y9=7!cL&p`x7#T&*=}8ppU?i}{?R?I`5Q+=eV2AbWx)5LR9)aR
zJ!g3L4<!6~KKgV0)I1-(&`+D^h?W+4%F!QcY;}6}U%q)P`#9%HH@~p?<)Q2%xc%Kl
zu`h`T|7OC_|LxseDldhh2U~-FSt>)wF+3rFf{vs6RCDC>Zwo2K=MWO8p@DPBd>6lU
z=uc=p6CxlUYI0)KvjrJq8|0yAUO{ba27ZN`ZpTJ1+8<sx^X9ORY#2j*)4Az%QiKY6
zey3BzXU@_dpc*3OcQQ48hup|I9g#s4cy=Wfsd7bZsVtjy8I~2^xEB#L@Ped3My%nb
z)m1v3GO$pM&fFl@qAOeYEzRa3b^%8ekq@pB3udH-Q8KuOofYJ?6Ms0#R&*~AuPBd;
zD*%nSY2vFh9!_lp&j<BuVHTsR0bgoTl6}=}F}@ZWB*~^>zak1wYKPPK4d%m=ma=e0
zdR~hZ%48stXQ_Qzt8KbYqBJ(u>|)SZ4HfG>0C`qPm0DC+*0b?sIhl}0?$%v(xQ2%~
zrs~s`HCYsD^7h3nbrf5b$CQ-$to!+=!T?e#++X<u$0$m6Q9+RVEBw4Zp3A_ErFKH3
z`zvC;m%1-dh*A&6fgu_Xr>IQMm$uMOC~3c-7j34Jf5E_*d*e_6`X2mS8ZR((WXybn
z>O>g>(ghYHCZ=`^Q)a;u*;Iq1gbKU72@c!rnqLhox;#rtZ&-{YxCMb@*P!UxH3YkT
zRG?;8_-z3-t{Y_sHcYR|K3V{^q6GadN-k?7=-356Df^ZcFsX{s5Hj!twmfAKMbwEu
z$B8I7e92IBqR1|&W>m`9O%|btrRb;lrVbM8r>#b-*=Vl&(fR}>M+oM&ag0GjY3m#h
znjh78Fw*+zd#+cbJP|{<2wK5}flPp;l^xzwVU9|0ESjN02wKRQxf*`(Cm_r5eN3ke
z5kCAM-U@;{vJbkE2)#`_)K9a5DwQEqTk2@#IxP~)J+2Af6tJLX^DYT^*UEI7T}rf5
z9!0~TG;C!!+m1y~h@7f3EV@H1NQSPUe3l$H2?m)N#U7k2I9y022vU$DB?P#hoG}^|
zjdITXRk+t>Us-JIO^qVv%`IdAD7bC}B`jh{aEYSgV+9972*jlosaH?0I!1}FjZs=R
z2mx~~4t<0W8A`hmVc8XY>~>0u%L>)8wrLcCv|+m)s~_tpof;NFl`KBF^L?2tb{2p2
z&Ewla??MS3+n8}@4M4Not#IeI_!E1Bx0+~ewg5LskVJM-2KBZIGQXNF`sr3-#VqCJ
zdfnd6PT^a^3RW|osH2~#adxP<{nz}dJYZ1dSc4*h=$6tYU6j}NM1T!^pb=d0kC3qi
z0wpFPr=~jM=Hyw83u&rtq^Y)dnD2#UQ?O4Rhctp7;+Gn+u>l*YSq_j)u2o1^tay>B
z+`p7KM0akk36d-*FudKH`Woaaal#sole~~+dk|D8*?`XmlS3Ad4RSUYxrgx~$3&RV
z;P^HORl%MSE1DtoNB`a-V8IYmUgRpjn2x2sdl!OhS7Bs(P)2}W0u<7fZMZ#gVe|)=
zJW=nObwEh{v{b2p0{Ebg10iDxnB%A27)dyYl-<r4b(JXI?#76u|LKN5{?m>e5fvZ`
z`gjP&#!U<=Z+U)n0SJUNq*jUm4hp$_j0aAd%8`L${Z?Q61cxBx@Bq%)&ge=BU~-i!
zdLhSD-seM&IRL6e*a9eRYSB42%U!hIpJgUWy+s(*PF#+7Tm@P<J&J;Oh<((!`VL-C
z5_~~23E5ab)lmqkI}n!(Cyf-d`Tq78U?(6P^77R`%JYOEChf&&B85uP+dg?sXR@5c
z)m#dWIsvlXQkX<lSi50~U(<1WuCWGI^5H9jco04z_pcUXQ<X|b;CU6Emckt%vhPix
z6YGq_pngL%Glgd*SC_Bgcak5p8oUMCDjrY2z<H>nDCYzMTu8|l@<Z(;a!wIIED-NP
z5R5QJKJ+QGYAC5mPHGhZhnZL3XGskqWq*OnNpKQ6kPvacKpZ)dT>~ak;H>)IM&2}n
zQ^6vmlqwJ=ys9Xvb=K$)(_&;D3}9^qtdts?Q|Fs<+o<5jG)BJ9Mol!VLQVPwGD=XK
z!aX8u%d^UUv4Q0v%W`-`O$X}uY2?VZ1|8}1q!nU!SQA#88&$=z&5lI0^y~zdCWy!h
z!H*iV-~^HjhNIN<@Z0)RAgf)~4Ya**$Wy3m%+k?Lq+sm_DcoaM7yyINA{gPNB>oBs
zPTlVtrg><)wHLZ!9!SYMI6|mefYLP={HBW3M2xeVq)Gt1)J9agWR438SgTRSi&;xi
zgF~}K5)z444ninmc$6?8bVl65mg!2Kk97wSAX*9`QXNSj#DQ$(cQJ!54W$BoYCe$M
zbJD1aJtz%HL`ad5Y+(%gh_dOpRuNCO!z2373)^BczoK>VcPNForFu*0{hgg#t!`mm
zbV$_*!XZK+r;_2Mr!#LtM3QJg`L8h67`SUp{6W?r4?P4byrhFcu(?hL3cKA$)K9{S
z2o;Dc(H@34ZH&4X+-(6vFzCXBArT)xzV}!@+|$Q6&(!DhQsIjHOhYIiVvzPH>g7Mc
zae1EIhWZ1_Y4DpTu-I`jFErH$&+a{0<6!^hjelg01N9xS1-D4925}74(&8SN4o58`
zMP#~ghd4DL5J<IkE>TX&-Ua}$gbVA3-FHQF{_pSQ=v>OCE*Xo>?e{5ONa{j@YM1Ad
zRuNH3I$D2&i8nnS2vAy;3=t;M(Hbt~t3Gnk2wC;50%GvyG8<-1Z6XOV#gTJPAb&(W
zu(}hsbD$)FqG=MtW5HT^zVo5Fd5h{y#EA{zmsG|AgJHLb(J|41*Z}q-^8gGqwy+J|
z0rpNTs4ZPKx_|}QXh~GqJ(rEOMTJ?S&C*9yNa%6?Jg|=_JrxA;;X+)pAv+hn7**a&
zb`a#fYGk{TTu_z@n~-&(*@W<r&DUsQ)7H**YrDU3tKZsa?hJ0VdN;RjZEb7}y8Ro2
z=3szxZ_U=Ghoj&+bPDepNf@^voHG#lTC!lMuyacUCw-B)bS4I6Ea^n^JFXsx3ZU>O
z;g@NAkaF5uP@;-lewCW<6-G;=@|l&+m?olGF7p-RQSIt<dELvhg$X*)_0duiA<O#P
z51xFInyM&UdtUUXPY)gm?_@lQi%o89e{ct9oJcea!KkHzEH0{Ek0NN6)NJr0R9vr(
z`kgkP;k?`efXi|PjWsG@Q%>tQ?+AIyoFy(TA%mwx$uUqyggk@K5>LP`furQLsIWl}
z`YK?TK7wSzD7ZfcoJ{|j5n>`qLl*^qBJx;#(Wv$Q3&aP7blat$$j2`W+?RgZNAsK}
zO3&Nu(ou$$HRNIlg9n2-*Taq4HCX^P7!X9D9Kr}iTu|5C`pSw@mx8qIDjC%P?yOib
zAX8DT!OvlikP=O<P$)Z`HUrxW0VCm8T?}^SQlmNvlB^_;5~>`%x(-<SDer_LWw}t#
z0)2F_204t2jUp~o^$>+Xr^1vFO3nGtJH>jbUNlxp+OKxCi#1uWGKN_|BvMr_R6*s4
zD;`+Gb&SDL-PVIahZ!CJh@*I!f2IuMwhgg_a(F;t4kJmks(LX4%mx;cIe)t3ws|<~
zjH;-GTr}W{ng9^5rliY8v4znqS*By-XbSiRzH~w2_i(2X>jMvLOp;+cpu-O}D!ULM
zmU%)EUr0y`ZOEf?dXi^DG$kv^Hp#lKzAW0(U`D-u@xTiwBX1Y-1CMZU8+m@FNTHx8
zNrBueD*eCsC%MvJsy<k<;Up?lu}PDhL@y6{?{s0LF-md|R$Doa$Qa9;hRE}{o9%p@
z3&!4SBqpO*BE%Qbt92r6Tr$H7xJ8+`xxS+^vAx*pbHG-R+E}nPFG_1i6|bUzi||_5
zrC`-fv1^z^uh@VpA*i<uz?r25p^?;*Wg^zIM1wR}7<^5h6Juj9w`2*6+eoxQ8p&l+
zy!xe)2iql||K$|+5}X8)40O1y(cl!W_zEyqRQ}~7f61Xc6zpnUbUJPzYnM}tAUrmy
z@EQmeh>L#Dk1oQ~X}^CETrQR-<V6bwvM(@asdgZ*mRNB$1ZJrtIdy+yd-<{O>;7h|
z^xR=w>mrI_901$Ig8X<9Lf-|&T>yk7J^I%dU;qL2x|)#-qDlml8%Jf;X6rIS=WGi*
zb_`HbdhGWJ3Wc>g7j-PS@eZj5-svR49k^uVlft@CZO(kma{Rw=DcQxjlwjOz{}kS<
z5Zb|SrGGm5q^NSS=Lbs_Gow#gB=ch_&S-YK_10#8v%WPL?9{v69k0H#wcXs>@cdS*
zH#k50u{g9_-HrOK{?1N)tAC?c$2C`5^_$*SchkGEy}hy1Lo-o<&0DSd_U5gv`qs@2
z9GKYH+N^u6&E9sew|R4Guya28%h7*$MFq}ZfAb0)8^xxgqzL~#th_1Ztd=TomckIZ
zpY7aAHNs&Uye4I}f1R_^^CZ_fD|+D9IV+dpyUtmOz`f2{u?FQjXGH_@I%oB}m$ND*
zIbL58bDiY4PI6o)Ip|e8^P17+>m-L<)^VNWK(U4dbFPyd*GUekf|r2dZ)uXFQk22I
zzEH)cZkgC(5M6Q@5=lqhUhgEF*D7*K&z-hmak|Z|N>dbcNHn~|q5z)CWx;jL_>JMO
zC11fiF<34kVSeVHkv^03Tq#?CV7yBc*wPC;;VsCT*=0Eetbe>2QeK&iK_NW<k}
zIQm+#rxT?r$K7Eg(9**)%>OdPtVM3FPQs>0DnCK_%OyePhbYg%<)Fz+9t*;iGZX9p
z=SuOE>W39QtduI1#oI;b4LWE-!(HZJ-a#VVkW;d%j3;(ja)}g$<kvWbtWPs5?7-f7
z=TuQKr(5HJLw42ElCS8BlM1C$yTy*0FL*^qP2aw1PCY*Px;5vgJy}r2dXS4lVg1i>
zGcEvWGw_m|wuO$^I<Q4eUuVt*wU$V<+MCcUIDrk+LH57{IQzrmZAs@Mw+k#w#u2Qb
zSi*fLNcd>QrWE?(l{&aLhiPP~W@ABzV=-V7=kqH<4wvL3?%=|?SAdKcCxOi_FJ)|8
zGN3|c7dNTDa!er{=AT(%a}kuJf#L{<-uNKrCTwjbDM;=K!}XNRUgMC7kNS_su)&q$
zKI`F>fRI^I6fog!0~k@yzsf@tJE9W7OL0RS5ujzc?kb5APWgT%Io-fllen5uMTkW_
zk)-5YB>C<66uo%BUNRv8e_L9BK988f25O~Dk3nc$tIroC<o8BZ-8qX7d=$k9PCuUc
zQOj&JZs`4PO?+6Bnm#ZShI|DqQ5qt*JEZ!~3vkvV`W0JJ)(4@6l`wbdAv2-8&t=J7
z7s9fgj(`u8f?GdP;@}R#AU%{N5Mx2Z|FuY9Q_AiwZs*&k8EcEjvA>IiAPW+3O6_7M
z`ohXfc|n1*c3LW5cwKhz!s?%)2AfyR;8i2}!6!>~_<`C*@R}k`otH909HA^kyb!u2
zsAI`xJ&>F06Rng$WeVj6u3qdxgRr3+>xzXD|N5%ckWRU{swja_`i4rr>uVV_!(14Q
zkQCR~GQ2j$t|zz`<p4DjE`RuazLo(cRc7yEfsK?~m84q>?N|XL)plaKK<h!n$`M4$
zhs6U5ssT&FiR!0R;>@ZA)&y!b3@%}gXM46<t(`YR-67wKgK6m|v-S@<8G4WA6KozW
zS3`xWoCy#^Mh#e3dqdL~g>5hxFuByTh4V{l(p<I7dw5x#lx<=+T0(pIu?}a7PgB0`
zN<%tolz^IU-@Xk)1H)8VeWKNu4Yd+qG+#C-^m#P=5${svTkH<pO{#h_ysD=4+OE;k
zifRzG2foLbfTa6dII_pX_u2BJZs4W+@-_!Oc3xF0``eD9Cik};MJ*ArzQ65II5E;L
zj46!vRPN|}l3&qzKex1S$&4}fqL~7%a+M#6$zrYWP;Hsk0tQ95Ta7Ia(l^9lULfO6
z{a5=*JOMng@_YKJI@!mGY93d@U5q`yj6`!qO?zB;=SwAsxB9*A&EgmdEfp7xbSKUl
za;vB?2n>Rc#nFSUoy!BhGf69Vl~c9FvAg4VPs-OW8FjIi)D@AEg;9md;^y2}QdIEn
z;`*Vx?x;Wu7M0&oWor+Ys_=`Om#s-Ib^Rit@;T&ULY4{plNhIDWl(z<%oVu1i#$Zs
z;KCukR0vhuD3O{>6P{FpTMt8;R~q38OIMOhQgZ97+w}<y^Qm#}?4q&hBo3U6@2uLg
z?1@mi9_tw?T>OEvH+clWo+l8;15z7@q;YDR_a(xdr1PZV7+k4ukF2xYxYLnUhPVYq
zZ%n1QcMyQf#fs&!3gzKCSLH30|8+K!6R9{Mivs+;UKq4_E|XuWRP-ViA602|_I^F^
z5?6zO!mD8NLoSO&^eA)R-z$E^set5msaswu-(Y?Gg_g@#f|Pm|V`4%pA1|AX0Vx+L
z;57<jh>Pf2<d<SRMXWo8Z!fs*EN=pcgma1)oT(3>$Py&GLnAN=c#7S9b>bpNJ6_Fs
zit=_)0`BzGTjfCVP=}?Z*wIDi>WKl?q!;HxNJj4d6tW!-9O{iZB92N`|7ch#jjK%&
z#mZmz_&SMIbR>QE@n*X^Ur1*5imM(=M;-ix2f<3ElcW#JN?fln6ATr)QOYX_>hO4M
z$<(1xkO)teAsY!#tU1PV;*YU`64-ka3-I2gP3Ew-^wQqx#gjQs9AG49A{@3*4bQc>
zn-p{%jjN1jkCh1WMabHtOL*sJz)&GxP#{NV<<Kn8Gq}S7a;dV~QF@~i?secL_0X3L
zXCZ=xp=l8^XvvrBor1hjPpWvw@iJXJQQ&`tXk;O4F2Juu0}#|#0FsvNvwUAp=t8{9
zs<dY@uN7?tpdFAwJ#sWfJ5X{9?Tc@vn83jlcTxq!@mTI7b`dx|S1j*k(s-{J3?)9W
zkv;&1jFR8G3DN5g1{E!E6VH(MZ9c#m?FBsoc<JaDxXgwTBb<@39zEXgjcKewqh1g0
z;UF1-Xe9to>Z!`>;8I*;j8})RsTb~yAdY!8>*ESlr6}-3iRb7q&wecS$kxq%y}8}G
zS>I~*P&SHE(0UWsleW6s-R2E%@T%L`{`ShXpgeEFcdglrq848o^~U!^sqKze-tavK
z)$6x*$y>Y5{?~5^`tC(rzz6P{je(ES6POYZTV4MeOZslLv%kACmh!jnJi(ji@Tzyd
zjusXjc!dfYno)Em-!b*;eGAVwzQ1r2g7zQcH9GI&En)U-{!$e+&WujN!l^0&kW$qF
zk@wT<TgJu8!dp9AREGx%?<(k<XNVBsZIZCF4<cNA^Jj51gClYh)XY7+&LlOTA@IS>
z9h!a=k0M~@LnvnVx>o87oDSn_|M;Sp2uGVB`}wub8<q7sU5xD|886)DH3$eu;2QH_
zD7W5M4-VI4>@ig}OmnH^1<EDCXG*K%mhj$BxKy~hgSsoTuj-`D4lfQjoWprQ9Wbn{
zguygZ>;<+1+Te07`?g$1R`QZz48&Uiy0|;!1hE`;)IF<~yL-$zFU)};G%sjiHWLHz
z)^+Vl;K)*+m6j?CbRc8<kEif}r6HV3{A!QQ;O%*|2jTAVfDBBI=TM2}S%w=*vc`)W
zFR!Ku&;Gq7m!-0}C2|ow{-&4uKkZVNlKD~BYtIVGJ_hcO?i>gjn2_Xq)d8J@>5y+Q
zz?Ebhx3+EpOC&|^2)D2BML#x&d<R)LKkE{d2dTW~-5r5GKtr6LGu1l>2TF$QeI!Su
zL8+yV4}A|86PoJLorh~zeE-!I?2)q{2u?)kwRuy@`RJEt@5<)+<rQa9{_{7NtpuI`
zj(5r7>bc)nVS481ZAp^;F3{VjB}MUi&c9X^*NTE~=*tzwlJxy8R1{YbrlbEm`sL`C
Km&?p)<NpI5Nbpwx

literal 0
HcmV?d00001