test: code exec on mac (#1549)

Summary: 1. adds option to not use bwrap for code execution 2. disable bwrap when running tests on macs Test Plan: ``` LLAMA_STACK_CONFIG=fireworks pytest -s -v tests/integration/agents/test_agents.py --safety-shield meta-llama/Llama-Guard-3-8B --text-model meta-llama/Llama-3.1-8B-Instruct ``` Verify code_interpreter result in logs INFO 2025-03-11 08:10:39,858 llama_stack.providers.inline.agents.meta_reference.agent_instance:1032 agents: tool call code_interpreter completed with result: content='completed\n\n541\n' error_message=None error_code=None metadata=None
2025-03-12 19:21:53 -07:00 · 2025-03-12 19:21:53 -07:00 · 6bfcb65343
commit 6bfcb65343
parent 2baf200b63
6 changed files with 5260 additions and 906 deletions
--- a/llama_stack/providers/inline/tool_runtime/code_interpreter/code_execution.py
+++ b/llama_stack/providers/inline/tool_runtime/code_interpreter/code_execution.py
@ -76,6 +76,7 @@ class CodeExecutionRequest:
    only_last_cell_fail: bool = True
    seed: int = 0
    strip_fpaths_in_stderr: bool = True
    use_bwrap: bool = True
 class CodeExecutor:
@ -103,8 +104,6 @@ _set_seeds()\
        script = "\n\n".join([seeds_prefix] + [CODE_ENV_PREFIX] + scripts)
        with tempfile.TemporaryDirectory() as dpath:
            bwrap_prefix = "bwrap " + generate_bwrap_command(bind_dirs=[dpath])
            cmd = [*bwrap_prefix.split(), sys.executable, "-c", script]
            code_fpath = os.path.join(dpath, "code.py")
            with open(code_fpath, "w") as f:
                f.write(script)
@ -118,6 +117,13 @@ _set_seeds()\
                    MPLBACKEND="module://matplotlib_custom_backend",
                    PYTHONPATH=f"{DIRNAME}:{python_path}",
                )
                if req.use_bwrap:
                    bwrap_prefix = "bwrap " + generate_bwrap_command(bind_dirs=[dpath])
                    cmd = [*bwrap_prefix.split(), sys.executable, "-c", script]
                else:
                    cmd = [sys.executable, "-c", script]
                stdout, stderr, returncode = do_subprocess(
                    cmd=cmd,
                    env=env,
--- a/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py
+++ b/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py
@ -6,6 +6,7 @@
 import logging
 import os
 import tempfile
 from typing import Any, Dict, List, Optional
@ -61,7 +62,9 @@ class CodeInterpreterToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime):
    async def invoke_tool(self, tool_name: str, kwargs: Dict[str, Any]) -> ToolInvocationResult:
        script = kwargs["code"]
-        req = CodeExecutionRequest(scripts=[script])
+        # Use environment variable to control bwrap usage
        force_disable_bwrap = os.environ.get("DISABLE_CODE_SANDBOX", "").lower() in ("1", "true", "yes")
        req = CodeExecutionRequest(scripts=[script], use_bwrap=not force_disable_bwrap)
        res = self.code_executor.execute(req)
        pieces = [res["process_status"]]
        for out_type in ["stdout", "stderr"]:
--- a/tests/integration/agents/test_agents.py
+++ b/tests/integration/agents/test_agents.py
@ -187,7 +187,7 @@ def test_builtin_tool_web_search(llama_stack_client_with_mocked_inference, agent
        messages=[
            {
                "role": "user",
-                "content": "Search the web and tell me who the current CEO of Meta is.",
+                "content": "Search the web and tell me who the founder of Meta is.",
            }
        ],
        session_id=session_id,
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@ -6,12 +6,17 @@
 import inspect
 import itertools
 import os
 import platform
 import textwrap
 from dotenv import load_dotenv
 from llama_stack.log import get_logger
 from .report import Report
 logger = get_logger(__name__, category="tests")
 def pytest_configure(config):
    config.option.tbstyle = "short"
@ -24,6 +29,10 @@ def pytest_configure(config):
        key, value = env_var.split("=", 1)
        os.environ[key] = value
    if platform.system() == "Darwin":  # Darwin is the system name for macOS
        os.environ["DISABLE_CODE_SANDBOX"] = "1"
        logger.info("Setting DISABLE_CODE_SANDBOX=1 for macOS")
    if config.getoption("--report"):
        config.pluginmanager.register(Report(config))
--- a/tests/integration/fixtures/recorded_responses/chat_completion.json
+++ b/tests/integration/fixtures/recorded_responses/chat_completion.json
--- a/tests/integration/fixtures/recorded_responses/invoke_tool.json
+++ b/tests/integration/fixtures/recorded_responses/invoke_tool.json