Removing custom tool and agent utilities and moving them client side

2025-12-04 02:03:44 +00:00 · 2024-09-17 12:23:16 -07:00 · 2024-09-17 12:23:16 -07:00 · 099ac81bc7
commit 099ac81bc7
parent fa864f70da
17 changed files with 100 additions and 392 deletions
--- a/llama_stack/providers/impls/meta_reference/agents/agent_instance.py
+++ b/llama_stack/providers/impls/meta_reference/agents/agent_instance.py
@ -25,14 +25,10 @@ from llama_stack.apis.inference import *  # noqa: F403
 from llama_stack.apis.memory import *  # noqa: F403
 from llama_stack.apis.safety import *  # noqa: F403

-from llama_stack.tools.base import BaseTool
-from llama_stack.tools.builtin import (
-    interpret_content_as_attachment,
-    SingleMessageBuiltinTool,
-)
-
 from .rag.context_retriever import generate_rag_query
 from .safety import SafetyException, ShieldRunnerMixin
+from .tools.base import BaseTool
+from .tools.builtin import interpret_content_as_attachment, SingleMessageBuiltinTool


 def make_random_string(length: int = 8):
--- a/llama_stack/providers/impls/meta_reference/agents/agents.py
+++ b/llama_stack/providers/impls/meta_reference/agents/agents.py
@ -14,16 +14,16 @@ from llama_stack.apis.inference import Inference
 from llama_stack.apis.memory import Memory
 from llama_stack.apis.safety import Safety
 from llama_stack.apis.agents import *  # noqa: F403
-from llama_stack.tools.builtin import (
+
+from .agent_instance import ChatAgent
+from .config import MetaReferenceImplConfig
+from .tools.builtin import (
    CodeInterpreterTool,
    PhotogenTool,
    SearchTool,
    WolframAlphaTool,
 )
-from llama_stack.tools.safety import with_safety
-
-from .agent_instance import ChatAgent
-from .config import MetaReferenceImplConfig
+from .tools.safety import with_safety


 logger = logging.getLogger()
--- a/llama_stack/providers/impls/meta_reference/agents/tests/init.py
+++ b/llama_stack/providers/impls/meta_reference/agents/tests/init.py
--- a/llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py
+++ b/llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py
@ -0,0 +1,93 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import unittest
+
+from llama_models.llama3.api.datatypes import (
+    Attachment,
+    BuiltinTool,
+    CompletionMessage,
+    StopReason,
+    ToolCall,
+)
+
+from ..tools.builtin import CodeInterpreterTool
+
+
+class TestCodeInterpreter(unittest.IsolatedAsyncioTestCase):
+    async def test_matplotlib(self):
+        tool = CodeInterpreterTool()
+        code = """
+import matplotlib.pyplot as plt
+import numpy as np
+
+x = np.array([1, 1])
+y = np.array([0, 10])
+
+plt.plot(x, y)
+plt.title('x = 1')
+plt.xlabel('x')
+plt.ylabel('y')
+plt.grid(True)
+plt.axvline(x=1, color='r')
+plt.show()
+        """
+        message = CompletionMessage(
+            role="assistant",
+            content="",
+            tool_calls=[
+                ToolCall(
+                    call_id="call_id",
+                    tool_name=BuiltinTool.code_interpreter,
+                    arguments={"code": code},
+                )
+            ],
+            stop_reason=StopReason.end_of_message,
+        )
+        ret = await tool.run([message])
+
+        self.assertEqual(len(ret), 1)
+
+        output = ret[0].content
+        self.assertIsInstance(output, Attachment)
+        self.assertEqual(output.mime_type, "image/png")
+
+    async def test_path_unlink(self):
+        tool = CodeInterpreterTool()
+        code = """
+import os
+from pathlib import Path
+import tempfile
+
+dpath = Path(os.environ["MPLCONFIGDIR"])
+with open(dpath / "test", "w") as f:
+    f.write("hello")
+
+Path(dpath / "test").unlink()
+print("_OK_")
+        """
+        message = CompletionMessage(
+            role="assistant",
+            content="",
+            tool_calls=[
+                ToolCall(
+                    call_id="call_id",
+                    tool_name=BuiltinTool.code_interpreter,
+                    arguments={"code": code},
+                )
+            ],
+            stop_reason=StopReason.end_of_message,
+        )
+        ret = await tool.run([message])
+
+        self.assertEqual(len(ret), 1)
+
+        output = ret[0].content
+        self.assertTrue("_OK_" in output)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/llama_stack/providers/impls/meta_reference/agents/tools/init.py
+++ b/llama_stack/providers/impls/meta_reference/agents/tools/init.py
@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
--- a/llama_stack/providers/impls/meta_reference/agents/tools/base.py
+++ b/llama_stack/providers/impls/meta_reference/agents/tools/base.py
@ -0,0 +1,20 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from abc import ABC, abstractmethod
+from typing import List
+
+from llama_stack.apis.inference import Message
+
+
+class BaseTool(ABC):
+    @abstractmethod
+    def get_name(self) -> str:
+        raise NotImplementedError
+
+    @abstractmethod
+    async def run(self, messages: List[Message]) -> List[Message]:
+        raise NotImplementedError
--- a/llama_stack/providers/impls/meta_reference/agents/tools/builtin.py
+++ b/llama_stack/providers/impls/meta_reference/agents/tools/builtin.py
@ -0,0 +1,375 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import json
+import re
+import tempfile
+
+from abc import abstractmethod
+from typing import List, Optional
+
+import requests
+from termcolor import cprint
+
+from .ipython_tool.code_execution import (
+    CodeExecutionContext,
+    CodeExecutionRequest,
+    CodeExecutor,
+    TOOLS_ATTACHMENT_KEY_REGEX,
+)
+
+from llama_stack.apis.inference import *  # noqa: F403
+from llama_stack.apis.agents import *  # noqa: F403
+
+from .base import BaseTool
+
+
+def interpret_content_as_attachment(content: str) -> Optional[Attachment]:
+    match = re.search(TOOLS_ATTACHMENT_KEY_REGEX, content)
+    if match:
+        snippet = match.group(1)
+        data = json.loads(snippet)
+        return Attachment(
+            content=URL(uri="file://" + data["filepath"]), mime_type=data["mimetype"]
+        )
+
+    return None
+
+
+class SingleMessageBuiltinTool(BaseTool):
+    async def run(self, messages: List[CompletionMessage]) -> List[ToolResponseMessage]:
+        assert len(messages) == 1, f"Expected single message, got {len(messages)}"
+
+        message = messages[0]
+        assert len(message.tool_calls) == 1, "Expected a single tool call"
+
+        tool_call = messages[0].tool_calls[0]
+
+        query = tool_call.arguments["query"]
+        response: str = await self.run_impl(query)
+
+        message = ToolResponseMessage(
+            call_id=tool_call.call_id,
+            tool_name=tool_call.tool_name,
+            content=response,
+        )
+        return [message]
+
+    @abstractmethod
+    async def run_impl(self, query: str) -> str:
+        raise NotImplementedError()
+
+
+class PhotogenTool(SingleMessageBuiltinTool):
+    def __init__(self, dump_dir: str) -> None:
+        self.dump_dir = dump_dir
+
+    def get_name(self) -> str:
+        return BuiltinTool.photogen.value
+
+    async def run_impl(self, query: str) -> str:
+        """
+        Implement this to give the model an ability to generate images.
+
+        Return:
+            info = {
+                "filepath": str(image_filepath),
+                "mimetype": "image/png",
+            }
+        """
+        raise NotImplementedError()
+
+
+class SearchTool(SingleMessageBuiltinTool):
+    def __init__(self, engine: SearchEngineType, api_key: str, **kwargs) -> None:
+        self.api_key = api_key
+        if engine == SearchEngineType.bing:
+            self.engine = BingSearch(api_key, **kwargs)
+        elif engine == SearchEngineType.brave:
+            self.engine = BraveSearch(api_key, **kwargs)
+        else:
+            raise ValueError(f"Unknown search engine: {engine}")
+
+    def get_name(self) -> str:
+        return BuiltinTool.brave_search.value
+
+    async def run_impl(self, query: str) -> str:
+        return await self.engine.search(query)
+
+
+class BingSearch:
+    def __init__(self, api_key: str, top_k: int = 3, **kwargs) -> None:
+        self.api_key = api_key
+        self.top_k = top_k
+
+    async def search(self, query: str) -> str:
+        url = "https://api.bing.microsoft.com/v7.0/search"
+        headers = {
+            "Ocp-Apim-Subscription-Key": self.api_key,
+        }
+        params = {
+            "count": self.top_k,
+            "textDecorations": True,
+            "textFormat": "HTML",
+            "q": query,
+        }
+
+        response = requests.get(url=url, params=params, headers=headers)
+        response.raise_for_status()
+        clean = self._clean_response(response.json())
+        return json.dumps(clean)
+
+    def _clean_response(self, search_response):
+        clean_response = []
+        query = search_response["queryContext"]["originalQuery"]
+        if "webPages" in search_response:
+            pages = search_response["webPages"]["value"]
+            for p in pages:
+                selected_keys = {"name", "url", "snippet"}
+                clean_response.append(
+                    {k: v for k, v in p.items() if k in selected_keys}
+                )
+        if "news" in search_response:
+            clean_news = []
+            news = search_response["news"]["value"]
+            for n in news:
+                selected_keys = {"name", "url", "description"}
+                clean_news.append({k: v for k, v in n.items() if k in selected_keys})
+
+            clean_response.append(clean_news)
+
+        return {"query": query, "top_k": clean_response}
+
+
+class BraveSearch:
+    def __init__(self, api_key: str) -> None:
+        self.api_key = api_key
+
+    async def search(self, query: str) -> str:
+        url = "https://api.search.brave.com/res/v1/web/search"
+        headers = {
+            "X-Subscription-Token": self.api_key,
+            "Accept-Encoding": "gzip",
+            "Accept": "application/json",
+        }
+        payload = {"q": query}
+        response = requests.get(url=url, params=payload, headers=headers)
+        return json.dumps(self._clean_brave_response(response.json()))
+
+    def _clean_brave_response(self, search_response, top_k=3):
+        query = None
+        clean_response = []
+        if "query" in search_response:
+            if "original" in search_response["query"]:
+                query = search_response["query"]["original"]
+        if "mixed" in search_response:
+            mixed_results = search_response["mixed"]
+            for m in mixed_results["main"][:top_k]:
+                r_type = m["type"]
+                results = search_response[r_type]["results"]
+                if r_type == "web":
+                    # For web data - add a single output from the search
+                    idx = m["index"]
+                    selected_keys = [
+                        "type",
+                        "title",
+                        "url",
+                        "description",
+                        "date",
+                        "extra_snippets",
+                    ]
+                    cleaned = {
+                        k: v for k, v in results[idx].items() if k in selected_keys
+                    }
+                elif r_type == "faq":
+                    # For faw data - take a list of all the questions & answers
+                    selected_keys = ["type", "question", "answer", "title", "url"]
+                    cleaned = []
+                    for q in results:
+                        cleaned.append(
+                            {k: v for k, v in q.items() if k in selected_keys}
+                        )
+                elif r_type == "infobox":
+                    idx = m["index"]
+                    selected_keys = [
+                        "type",
+                        "title",
+                        "url",
+                        "description",
+                        "long_desc",
+                    ]
+                    cleaned = {
+                        k: v for k, v in results[idx].items() if k in selected_keys
+                    }
+                elif r_type == "videos":
+                    selected_keys = [
+                        "type",
+                        "url",
+                        "title",
+                        "description",
+                        "date",
+                    ]
+                    cleaned = []
+                    for q in results:
+                        cleaned.append(
+                            {k: v for k, v in q.items() if k in selected_keys}
+                        )
+                elif r_type == "locations":
+                    # For faw data - take a list of all the questions & answers
+                    selected_keys = [
+                        "type",
+                        "title",
+                        "url",
+                        "description",
+                        "coordinates",
+                        "postal_address",
+                        "contact",
+                        "rating",
+                        "distance",
+                        "zoom_level",
+                    ]
+                    cleaned = []
+                    for q in results:
+                        cleaned.append(
+                            {k: v for k, v in q.items() if k in selected_keys}
+                        )
+                elif r_type == "news":
+                    # For faw data - take a list of all the questions & answers
+                    selected_keys = [
+                        "type",
+                        "title",
+                        "url",
+                        "description",
+                    ]
+                    cleaned = []
+                    for q in results:
+                        cleaned.append(
+                            {k: v for k, v in q.items() if k in selected_keys}
+                        )
+                else:
+                    cleaned = []
+
+                clean_response.append(cleaned)
+
+        return {"query": query, "top_k": clean_response}
+
+
+class WolframAlphaTool(SingleMessageBuiltinTool):
+    def __init__(self, api_key: str) -> None:
+        self.api_key = api_key
+        self.url = "https://api.wolframalpha.com/v2/query"
+
+    def get_name(self) -> str:
+        return BuiltinTool.wolfram_alpha.value
+
+    async def run_impl(self, query: str) -> str:
+        params = {
+            "input": query,
+            "appid": self.api_key,
+            "format": "plaintext",
+            "output": "json",
+        }
+        response = requests.get(
+            self.url,
+            params=params,
+        )
+
+        return json.dumps(self._clean_wolfram_alpha_response(response.json()))
+
+    def _clean_wolfram_alpha_response(self, wa_response):
+        remove = {
+            "queryresult": [
+                "datatypes",
+                "error",
+                "timedout",
+                "timedoutpods",
+                "numpods",
+                "timing",
+                "parsetiming",
+                "parsetimedout",
+                "recalculate",
+                "id",
+                "host",
+                "server",
+                "related",
+                "version",
+                {
+                    "pods": [
+                        "scanner",
+                        "id",
+                        "error",
+                        "expressiontypes",
+                        "states",
+                        "infos",
+                        "position",
+                        "numsubpods",
+                    ]
+                },
+                "assumptions",
+            ],
+        }
+        for main_key in remove:
+            for key_to_remove in remove[main_key]:
+                try:
+                    if key_to_remove == "assumptions":
+                        if "assumptions" in wa_response[main_key]:
+                            del wa_response[main_key][key_to_remove]
+                    if isinstance(key_to_remove, dict):
+                        for sub_key in key_to_remove:
+                            if sub_key == "pods":
+                                for i in range(len(wa_response[main_key][sub_key])):
+                                    if (
+                                        wa_response[main_key][sub_key][i]["title"]
+                                        == "Result"
+                                    ):
+                                        del wa_response[main_key][sub_key][i + 1 :]
+                                        break
+                            sub_items = wa_response[main_key][sub_key]
+                            for i in range(len(sub_items)):
+                                for sub_key_to_remove in key_to_remove[sub_key]:
+                                    if sub_key_to_remove in sub_items[i]:
+                                        del sub_items[i][sub_key_to_remove]
+                    elif key_to_remove in wa_response[main_key]:
+                        del wa_response[main_key][key_to_remove]
+                except KeyError:
+                    pass
+        return wa_response
+
+
+class CodeInterpreterTool(BaseTool):
+    def __init__(self) -> None:
+        ctx = CodeExecutionContext(
+            matplotlib_dump_dir=tempfile.mkdtemp(),
+        )
+        self.code_executor = CodeExecutor(ctx)
+
+    def get_name(self) -> str:
+        return BuiltinTool.code_interpreter.value
+
+    async def run(self, messages: List[CompletionMessage]) -> List[ToolResponseMessage]:
+        message = messages[0]
+        assert len(message.tool_calls) == 1, "Expected a single tool call"
+
+        tool_call = messages[0].tool_calls[0]
+        script = tool_call.arguments["code"]
+
+        req = CodeExecutionRequest(scripts=[script])
+        res = self.code_executor.execute(req)
+
+        pieces = [res["process_status"]]
+        for out_type in ["stdout", "stderr"]:
+            res_out = res[out_type]
+            if res_out != "":
+                pieces.extend([f"[{out_type}]", res_out, f"[/{out_type}]"])
+                if out_type == "stderr":
+                    cprint(f"ipython tool error: ↓\n{res_out}", color="red")
+
+        message = ToolResponseMessage(
+            call_id=tool_call.call_id,
+            tool_name=tool_call.tool_name,
+            content="\n".join(pieces),
+        )
+        return [message]
--- a/llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/init.py
+++ b/llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/init.py
@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
--- a/llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py
+++ b/llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py
@ -0,0 +1,133 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import errno
+
+# Disabling potentially dangerous functions
+import os as _os
+from functools import partial
+
+os_funcs_to_disable = [
+    "kill",
+    "system",
+    "putenv",
+    "remove",
+    "removedirs",
+    "rmdir",
+    "fchdir",
+    "setuid",
+    "fork",
+    "forkpty",
+    "killpg",
+    "rename",
+    "renames",
+    "truncate",
+    "replace",
+    # "unlink",  # Commenting as this was blocking matpltlib from rendering plots correctly
+    "fchmod",
+    "fchown",
+    "chmod",
+    "chown",
+    "chroot",
+    "fchdir",
+    "lchflags",
+    "lchmod",
+    "lchown",
+    "chdir",
+]
+
+
+def call_not_allowed(*args, **kwargs):
+    raise OSError(errno.EPERM, "Call are not permitted in this environment")
+
+
+for func_name in os_funcs_to_disable:
+    if hasattr(_os, func_name):
+        setattr(_os, func_name, partial(call_not_allowed, _func_name=f"os.{func_name}"))
+
+import shutil as _shutil
+
+for func_name in ["rmtree", "move", "chown"]:
+    if hasattr(_shutil, func_name):
+        setattr(
+            _shutil,
+            func_name,
+            partial(call_not_allowed, _func_name=f"shutil.{func_name}"),
+        )
+
+import subprocess as _subprocess
+
+
+def popen_not_allowed(*args, **kwargs):
+    raise _subprocess.CalledProcessError(
+        -1,
+        args[0] if args else "unknown",
+        stderr="subprocess.Popen is not allowed in this environment",
+    )
+
+
+_subprocess.Popen = popen_not_allowed
+
+
+import atexit as _atexit
+import builtins as _builtins
+import io as _io
+import json as _json
+import sys as _sys
+
+# NB! The following "unused" imports crucial, make sure not not to remove
+# them with linters - they're used in code_execution.py
+from contextlib import (  # noqa
+    contextmanager as _contextmanager,
+    redirect_stderr as _redirect_stderr,
+    redirect_stdout as _redirect_stdout,
+)
+from multiprocessing.connection import Connection as _Connection
+
+# Mangle imports to avoid polluting model execution namespace.
+
+_IO_SINK = _io.StringIO()
+_NETWORK_TIMEOUT = 5
+_NETWORK_CONNECTIONS = None
+
+
+def _open_connections():
+    global _NETWORK_CONNECTIONS
+    if _NETWORK_CONNECTIONS is not None:
+        # Ensure connections only opened once.
+        return _NETWORK_CONNECTIONS
+    req_w_fd, resp_r_fd = _sys.argv[1], _sys.argv[2]
+    req_con = _Connection(int(req_w_fd), readable=False)
+    resp_con = _Connection(int(resp_r_fd), writable=False)
+    _NETWORK_CONNECTIONS = (req_con, resp_con)
+    return _NETWORK_CONNECTIONS
+
+
+_builtins._open_connections = _open_connections
+
+
+@_atexit.register
+def _close_connections():
+    global _NETWORK_CONNECTIONS
+    if _NETWORK_CONNECTIONS is None:
+        return
+    for con in _NETWORK_CONNECTIONS:
+        con.close()
+    del _NETWORK_CONNECTIONS
+
+
+def _network_call(request):
+    # NOTE: We communicate with the parent process in json, encoded
+    # in raw bytes. We do this because native send/recv methods use
+    # pickle which involves execution of arbitrary code.
+    _open_connections()
+    req_con, resp_con = _NETWORK_CONNECTIONS
+
+    req_con.send_bytes(_json.dumps(request).encode("utf-8"))
+    if resp_con.poll(timeout=_NETWORK_TIMEOUT) is None:
+        raise Exception(f"Network request timed out: {_json.dumps(request)}")
+    else:
+        return _json.loads(resp_con.recv_bytes().decode("utf-8"))
--- a/llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py
+++ b/llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py
@ -0,0 +1,256 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import base64
+import json
+import multiprocessing
+import os
+import re
+import subprocess
+import sys
+import tempfile
+import textwrap
+import time
+from dataclasses import dataclass
+from datetime import datetime
+from io import BytesIO
+from pathlib import Path
+from typing import List
+
+from PIL import Image
+
+from .utils import get_code_env_prefix
+
+TOOLS_ATTACHMENT_KEY = "__tools_attachment__"
+TOOLS_ATTACHMENT_KEY_REGEX = re.compile(r"__tools_attachment__=(\{.*?\})")
+
+DIRNAME = Path(__file__).parent
+
+CODE_EXEC_TIMEOUT = 20
+CODE_ENV_PREFIX = get_code_env_prefix()
+
+STDOUTERR_SINK_WRAPPER_TEMPLATE = """\
+with _redirect_stdout(_IO_SINK), _redirect_stderr(_IO_SINK):
+{code}\
+"""
+
+TRYEXCEPT_WRAPPER_TEMPLATE = """\
+try:
+{code}
+except:
+    pass\
+"""
+
+
+def generate_bwrap_command(bind_dirs: List[str]) -> str:
+    """
+    Generate the bwrap command string for binding all
+    directories in the current directory read-only.
+    """
+    bwrap_args = ""
+    bwrap_args += "--ro-bind / / "
+    # Add the --dev flag to mount device files
+    bwrap_args += "--dev /dev "
+    for d in bind_dirs:
+        bwrap_args += f"--bind {d} {d} "
+
+    # Add the --unshare-all flag to isolate the sandbox from the rest of the system
+    bwrap_args += "--unshare-all "
+    # Add the --die-with-parent flag to ensure the child process dies when bwrap's parent dies
+    bwrap_args += "--die-with-parent "
+    return bwrap_args
+
+
+@dataclass
+class CodeExecutionContext:
+    matplotlib_dump_dir: str
+    use_proxy: bool = False
+
+
+@dataclass
+class CodeExecutionRequest:
+    scripts: List[str]
+    only_last_cell_stdouterr: bool = True
+    only_last_cell_fail: bool = True
+    seed: int = 0
+    strip_fpaths_in_stderr: bool = True
+
+
+class CodeExecutor:
+    def __init__(self, context: CodeExecutionContext):
+        self.context = context
+
+    def execute(self, req: CodeExecutionRequest) -> dict:
+        scripts = req.scripts
+        for i in range(len(scripts) - 1):
+            if req.only_last_cell_stdouterr:
+                scripts[i] = STDOUTERR_SINK_WRAPPER_TEMPLATE.format(
+                    code=textwrap.indent(scripts[i], " " * 4)
+                )
+            if req.only_last_cell_fail:
+                scripts[i] = TRYEXCEPT_WRAPPER_TEMPLATE.format(
+                    code=textwrap.indent(scripts[i], " " * 4)
+                )
+
+        # Seeds prefix:
+        seed = req.seed
+        seeds_prefix = f"""\
+def _set_seeds():
+    import random
+    random.seed({seed})
+    import numpy as np
+    np.random.seed({seed})
+_set_seeds()\
+"""
+
+        script = "\n\n".join([seeds_prefix] + [CODE_ENV_PREFIX] + scripts)
+        with tempfile.TemporaryDirectory() as dpath:
+            bwrap_prefix = "bwrap " + generate_bwrap_command(bind_dirs=[dpath])
+            cmd = [*bwrap_prefix.split(), sys.executable, "-c", script]
+            code_fpath = os.path.join(dpath, "code.py")
+            with open(code_fpath, "w") as f:
+                f.write(script)
+
+            try:
+                python_path = os.environ.get("PYTHONPATH", "")
+                env = dict(
+                    os.environ,
+                    PYTHONHASHSEED=str(seed),
+                    MPLCONFIGDIR=dpath,
+                    MPLBACKEND="module://matplotlib_custom_backend",
+                    PYTHONPATH=f"{DIRNAME}:{python_path}",
+                )
+                stdout, stderr, returncode = do_subprocess(
+                    cmd=cmd,
+                    env=env,
+                    ctx=self.context,
+                )
+
+                stderr = stderr.strip()
+                if req.strip_fpaths_in_stderr:
+                    pattern = r'File "([^"]+)", line (\d+)'
+                    stderr = re.sub(pattern, r"line \2", stderr)
+
+                return {
+                    "process_status": "completed",
+                    "returncode": returncode,
+                    "stdout": stdout.strip(),
+                    "stderr": stderr,
+                }
+
+            except subprocess.TimeoutExpired:
+                return {
+                    "process_status": "timeout",
+                    "stdout": "Timed out",
+                    "stderr": "Timed out",
+                }
+
+            except Exception as e:
+                return {
+                    "process_status": "error",
+                    "error_type": type(e).__name__,
+                    "stderr": str(e),
+                    "stdout": str(e),
+                }
+
+
+def process_matplotlib_response(response, matplotlib_dump_dir: str):
+    image_data = response["image_data"]
+    # Convert the base64 string to a bytes object
+    images = [base64.b64decode(d["image_base64"]) for d in image_data]
+    # Create a list of PIL images from the bytes objects
+    images = [Image.open(BytesIO(img)) for img in images]
+    # Create a list of image paths
+    image_paths = []
+    for i, img in enumerate(images):
+        # create new directory for each day to better organize data:
+        dump_dname = datetime.today().strftime("%Y-%m-%d")
+        dump_dpath = Path(matplotlib_dump_dir, dump_dname)
+        dump_dpath.mkdir(parents=True, exist_ok=True)
+        # save image into a file
+        dump_fname = f"matplotlib_{str(time.time()).replace('.', '_')}_{i}.png"
+        dump_fpath = dump_dpath / dump_fname
+        img.save(dump_fpath, "PNG")
+        image_paths.append(str(dump_fpath))
+
+    # this is kind of convoluted, we send back this response to the subprocess which
+    # prints it out
+    info = {
+        "filepath": str(image_paths[-1]),
+        "mimetype": "image/png",
+    }
+    return f"{TOOLS_ATTACHMENT_KEY}={json.dumps(info)}"
+
+
+def execute_subprocess_request(request, ctx: CodeExecutionContext):
+    "Route requests from the subprocess (via network Pipes) to the internet/tools."
+    if request["type"] == "matplotlib":
+        return process_matplotlib_response(request, ctx.matplotlib_dump_dir)
+    else:
+        raise Exception(f'Unrecognised network request type: {request["type"]}')
+
+
+def do_subprocess(*, cmd: list, env: dict, ctx: CodeExecutionContext):
+    # Create Pipes to be used for any external tool/network requests.
+    req_r, req_w = multiprocessing.Pipe(duplex=False)
+    resp_r, resp_w = multiprocessing.Pipe(duplex=False)
+
+    cmd += [str(req_w.fileno()), str(resp_r.fileno())]
+    proc = subprocess.Popen(
+        cmd,
+        pass_fds=(req_w.fileno(), resp_r.fileno()),
+        text=True,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        close_fds=True,
+        env=env,
+    )
+
+    # Close unnecessary fds.
+    req_w.close()
+    resp_r.close()
+
+    pipe_close = False
+    done_read = False
+    start = time.monotonic()
+    while proc.poll() is None and not pipe_close:
+        if req_r.poll(0.1):
+            # NB: Python pipe semantics for poll and recv mean that
+            # poll() returns True is a pipe is closed.
+            # CF old school PEP from '09
+            #  https://bugs.python.org/issue5573
+            try:
+                request = json.loads(req_r.recv_bytes().decode("utf-8"))
+                response = execute_subprocess_request(request, ctx)
+
+                resp_w.send_bytes(json.dumps(response).encode("utf-8"))
+            except EOFError:
+                # The request pipe is closed - set a marker to exit
+                # after the next attempt at reading stdout/stderr.
+                pipe_close = True
+
+            try:
+                # If lots has been printed, pipe might be full but
+                # proc cannot exit until all the stdout/stderr
+                # been written/read.
+                stdout, stderr = proc.communicate(timeout=0.3)
+                done_read = True
+            except subprocess.TimeoutExpired:
+                # The program has not terminated. Ignore it, there
+                # may be more network/tool requests.
+                continue
+        if time.monotonic() - start > CODE_EXEC_TIMEOUT:
+            proc.terminate()
+            raise subprocess.TimeoutExpired(cmd, CODE_EXEC_TIMEOUT)
+
+    if not done_read:
+        # Solve race condition where process terminates before
+        # we hit the while loop.
+        stdout, stderr = proc.communicate(timeout=0.3)
+
+    resp_w.close()
+    req_r.close()
+    return stdout, stderr, proc.returncode
--- a/llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py
+++ b/llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py
@ -0,0 +1,87 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""
+A custom Matplotlib backend that overrides the show method to return image bytes.
+"""
+
+import base64
+import io
+import json as _json
+
+import matplotlib
+from matplotlib.backend_bases import FigureManagerBase
+
+# Import necessary components from Matplotlib
+from matplotlib.backends.backend_agg import FigureCanvasAgg
+
+
+class CustomFigureCanvas(FigureCanvasAgg):
+    def show(self):
+        # Save the figure to a BytesIO object
+        buf = io.BytesIO()
+        self.print_png(buf)
+        image_bytes = buf.getvalue()
+        buf.close()
+        return image_bytes
+
+
+class CustomFigureManager(FigureManagerBase):
+    def __init__(self, canvas, num):
+        super().__init__(canvas, num)
+
+
+# Mimic module initialization that integrates with the Matplotlib backend system
+def _create_figure_manager(num, *args, **kwargs):
+    """
+    Create a custom figure manager instance.
+    """
+    FigureClass = kwargs.pop("FigureClass", None)  # noqa: N806
+    if FigureClass is None:
+        from matplotlib.figure import Figure
+
+        FigureClass = Figure  # noqa: N806
+    fig = FigureClass(*args, **kwargs)
+    canvas = CustomFigureCanvas(fig)
+    manager = CustomFigureManager(canvas, num)
+    return manager
+
+
+def show():
+    """
+    Handle all figures and potentially return their images as bytes.
+
+    This function iterates over all figures registered with the custom backend,
+    renders them as images in bytes format, and could return a list of bytes objects,
+    one for each figure, or handle them as needed.
+    """
+    image_data = []
+    for manager in matplotlib._pylab_helpers.Gcf.get_all_fig_managers():
+        # Get the figure from the manager
+        fig = manager.canvas.figure
+        buf = io.BytesIO()  # Create a buffer for the figure
+        fig.savefig(buf, format="png")  # Save the figure to the buffer in PNG format
+        buf.seek(0)  # Go to the beginning of the buffer
+        image_bytes = buf.getvalue()  # Retrieve bytes value
+        image_base64 = base64.b64encode(image_bytes).decode("utf-8")
+        image_data.append({"image_base64": image_base64})
+        buf.close()
+
+    req_con, resp_con = _open_connections()
+
+    _json_dump = _json.dumps(
+        {
+            "type": "matplotlib",
+            "image_data": image_data,
+        }
+    )
+    req_con.send_bytes(_json_dump.encode("utf-8"))
+    resp = _json.loads(resp_con.recv_bytes().decode("utf-8"))
+    print(resp)
+
+
+FigureCanvas = CustomFigureCanvas
+FigureManager = CustomFigureManager
--- a/llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py
+++ b/llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py
@ -0,0 +1,21 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import os
+
+DIR = os.path.dirname(os.path.realpath(__file__))
+CODE_ENV_PREFIX_FILE = os.path.join(DIR, "code_env_prefix.py")
+CODE_ENV_PREFIX = None
+
+
+def get_code_env_prefix() -> str:
+    global CODE_ENV_PREFIX
+
+    if CODE_ENV_PREFIX is None:
+        with open(CODE_ENV_PREFIX_FILE, "r") as f:
+            CODE_ENV_PREFIX = f.read()
+
+    return CODE_ENV_PREFIX
--- a/llama_stack/providers/impls/meta_reference/agents/tools/safety.py
+++ b/llama_stack/providers/impls/meta_reference/agents/tools/safety.py
@ -0,0 +1,58 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import List
+
+from llama_stack.apis.inference import Message
+from llama_stack.apis.safety import Safety, ShieldDefinition
+
+from llama_stack.providers.impls.meta_reference.agents.safety import ShieldRunnerMixin
+
+from .builtin import BaseTool
+
+
+class SafeTool(BaseTool, ShieldRunnerMixin):
+    """A tool that makes other tools safety enabled"""
+
+    def __init__(
+        self,
+        tool: BaseTool,
+        safety_api: Safety,
+        input_shields: List[ShieldDefinition] = None,
+        output_shields: List[ShieldDefinition] = None,
+    ):
+        self._tool = tool
+        ShieldRunnerMixin.__init__(
+            self, safety_api, input_shields=input_shields, output_shields=output_shields
+        )
+
+    def get_name(self) -> str:
+        # return the name of the wrapped tool
+        return self._tool.get_name()
+
+    async def run(self, messages: List[Message]) -> List[Message]:
+        if self.input_shields:
+            await self.run_shields(messages, self.input_shields)
+        # run the underlying tool
+        res = await self._tool.run(messages)
+        if self.output_shields:
+            await self.run_shields(messages, self.output_shields)
+
+        return res
+
+
+def with_safety(
+    tool: BaseTool,
+    safety_api: Safety,
+    input_shields: List[ShieldDefinition] = None,
+    output_shields: List[ShieldDefinition] = None,
+) -> SafeTool:
+    return SafeTool(
+        tool,
+        safety_api,
+        input_shields=input_shields,
+        output_shields=output_shields,
+    )
--- a/llama_stack/providers/utils/agents/event_logger.py
+++ b/llama_stack/providers/utils/agents/event_logger.py
@ -1,184 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from typing import Optional
-
-from llama_models.llama3.api.datatypes import *  # noqa: F403
-from llama_models.llama3.api.tool_utils import ToolUtils
-
-from termcolor import cprint
-
-from llama_stack.apis.agents import AgentTurnResponseEventType, StepType
-
-
-class LogEvent:
-    def __init__(
-        self,
-        role: Optional[str] = None,
-        content: str = "",
-        end: str = "\n",
-        color="white",
-    ):
-        self.role = role
-        self.content = content
-        self.color = color
-        self.end = "\n" if end is None else end
-
-    def __str__(self):
-        if self.role is not None:
-            return f"{self.role}> {self.content}"
-        else:
-            return f"{self.content}"
-
-    def print(self, flush=True):
-        cprint(f"{str(self)}", color=self.color, end=self.end, flush=flush)
-
-
-EventType = AgentTurnResponseEventType
-
-
-class EventLogger:
-    async def log(
-        self,
-        event_generator,
-        stream=True,
-        tool_prompt_format: ToolPromptFormat = ToolPromptFormat.json,
-    ):
-        previous_event_type = None
-        previous_step_type = None
-
-        async for chunk in event_generator:
-            if not hasattr(chunk, "event"):
-                # Need to check for custom tool first
-                # since it does not produce event but instead
-                # a Message
-                if isinstance(chunk, ToolResponseMessage):
-                    yield chunk, LogEvent(
-                        role="CustomTool", content=chunk.content, color="grey"
-                    )
-                continue
-
-            event = chunk.event
-            event_type = event.payload.event_type
-            if event_type in {
-                EventType.turn_start.value,
-                EventType.turn_complete.value,
-            }:
-                # Currently not logging any turn realted info
-                yield event, None
-                continue
-
-            step_type = event.payload.step_type
-            # handle safety
-            if (
-                step_type == StepType.shield_call
-                and event_type == EventType.step_complete.value
-            ):
-                response = event.payload.step_details.response
-                if not response.is_violation:
-                    yield event, LogEvent(
-                        role=step_type, content="No Violation", color="magenta"
-                    )
-                else:
-                    yield event, LogEvent(
-                        role=step_type,
-                        content=f"{response.violation_type} {response.violation_return_message}",
-                        color="red",
-                    )
-
-            # handle inference
-            if step_type == StepType.inference:
-                if stream:
-                    if event_type == EventType.step_start.value:
-                        # TODO: Currently this event is never received
-                        yield event, LogEvent(
-                            role=step_type, content="", end="", color="yellow"
-                        )
-                    elif event_type == EventType.step_progress.value:
-                        # HACK: if previous was not step/event was not inference's step_progress
-                        # this is the first time we are getting model inference response
-                        # aka equivalent to step_start for inference. Hence,
-                        # start with "Model>".
-                        if (
-                            previous_event_type != EventType.step_progress.value
-                            and previous_step_type != StepType.inference
-                        ):
-                            yield event, LogEvent(
-                                role=step_type, content="", end="", color="yellow"
-                            )
-
-                        if event.payload.tool_call_delta:
-                            if isinstance(event.payload.tool_call_delta.content, str):
-                                yield event, LogEvent(
-                                    role=None,
-                                    content=event.payload.tool_call_delta.content,
-                                    end="",
-                                    color="cyan",
-                                )
-                        else:
-                            yield event, LogEvent(
-                                role=None,
-                                content=event.payload.model_response_text_delta,
-                                end="",
-                                color="yellow",
-                            )
-                    else:
-                        # step_complete
-                        yield event, LogEvent(role=None, content="")
-
-                else:
-                    # Not streaming
-                    if event_type == EventType.step_complete.value:
-                        response = event.payload.step_details.model_response
-                        if response.tool_calls:
-                            content = ToolUtils.encode_tool_call(
-                                response.tool_calls[0], tool_prompt_format
-                            )
-                        else:
-                            content = response.content
-                        yield event, LogEvent(
-                            role=step_type,
-                            content=content,
-                            color="yellow",
-                        )
-
-            # handle tool_execution
-            if (
-                step_type == StepType.tool_execution
-                and
-                # Only print tool calls and responses at the step_complete event
-                event_type == EventType.step_complete.value
-            ):
-                details = event.payload.step_details
-                for t in details.tool_calls:
-                    yield event, LogEvent(
-                        role=step_type,
-                        content=f"Tool:{t.tool_name} Args:{t.arguments}",
-                        color="green",
-                    )
-                for r in details.tool_responses:
-                    yield event, LogEvent(
-                        role=step_type,
-                        content=f"Tool:{r.tool_name} Response:{r.content}",
-                        color="green",
-                    )
-
-            if (
-                step_type == StepType.memory_retrieval
-                and event_type == EventType.step_complete.value
-            ):
-                details = event.payload.step_details
-                content = interleaved_text_media_as_str(details.inserted_context)
-                content = content[:200] + "..." if len(content) > 200 else content
-
-                yield event, LogEvent(
-                    role=step_type,
-                    content=f"Retrieved context from banks: {details.memory_bank_ids}.\n====\n{content}\n>",
-                    color="cyan",
-                )
-
-            preivous_event_type = event_type
-            previous_step_type = step_type
--- a/llama_stack/providers/utils/agents/execute_with_custom_tools.py
+++ b/llama_stack/providers/utils/agents/execute_with_custom_tools.py
@ -1,94 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from typing import AsyncGenerator, List
-
-from llama_models.llama3.api.datatypes import *  # noqa: F403
-from llama_stack.apis.agents import *  # noqa: F403
-from llama_stack.apis.memory import *  # noqa: F403
-from llama_stack.apis.safety import *  # noqa: F403
-
-from llama_stack.apis.agents import AgentTurnResponseEventType as EventType
-from llama_stack.tools.custom.datatypes import CustomTool
-
-
-class AgentWithCustomToolExecutor:
-    def __init__(
-        self,
-        api: Agents,
-        agent_id: str,
-        session_id: str,
-        agent_config: AgentConfig,
-        custom_tools: List[CustomTool],
-    ):
-        self.api = api
-        self.agent_id = agent_id
-        self.session_id = session_id
-        self.agent_config = agent_config
-        self.custom_tools = custom_tools
-
-    async def execute_turn(
-        self,
-        messages: List[Message],
-        attachments: Optional[List[Attachment]] = None,
-        max_iters: int = 5,
-        stream: bool = True,
-    ) -> AsyncGenerator:
-        tools_dict = {t.get_name(): t for t in self.custom_tools}
-
-        current_messages = messages.copy()
-        n_iter = 0
-        while n_iter < max_iters:
-            n_iter += 1
-
-            request = AgentTurnCreateRequest(
-                agent_id=self.agent_id,
-                session_id=self.session_id,
-                messages=current_messages,
-                attachments=attachments,
-                stream=stream,
-            )
-
-            turn = None
-            async for chunk in self.api.create_agent_turn(request):
-                if chunk.event.payload.event_type != EventType.turn_complete.value:
-                    yield chunk
-                else:
-                    turn = chunk.event.payload.turn
-
-            message = turn.output_message
-            if len(message.tool_calls) == 0:
-                yield chunk
-                return
-
-            if message.stop_reason == StopReason.out_of_tokens:
-                yield chunk
-                return
-
-            tool_call = message.tool_calls[0]
-            if tool_call.tool_name not in tools_dict:
-                m = ToolResponseMessage(
-                    call_id=tool_call.call_id,
-                    tool_name=tool_call.tool_name,
-                    content=f"Unknown tool `{tool_call.tool_name}` was called. Try again with something else",
-                )
-                next_message = m
-            else:
-                tool = tools_dict[tool_call.tool_name]
-                result_messages = await execute_custom_tool(tool, message)
-                next_message = result_messages[0]
-
-            yield next_message
-            current_messages = [next_message]
-
-
-async def execute_custom_tool(tool: CustomTool, message: Message) -> List[Message]:
-    result_messages = await tool.run([message])
-    assert (
-        len(result_messages) == 1
-    ), f"Expected single message, got {len(result_messages)}"
-
-    return result_messages