test: code exec on mac (#1549)

Summary:
1. adds option to not use bwrap for code execution
2. disable bwrap when running tests on macs

Test Plan:
```
LLAMA_STACK_CONFIG=fireworks pytest -s -v tests/integration/agents/test_agents.py --safety-shield meta-llama/Llama-Guard-3-8B --text-model meta-llama/Llama-3.1-8B-Instruct
```

Verify code_interpreter result in logs

INFO 2025-03-11 08:10:39,858
llama_stack.providers.inline.agents.meta_reference.agent_instance:1032
agents: tool
call code_interpreter completed with result:
content='completed\n\n541\n' error_message=None error_code=None
         metadata=None
This commit is contained in:
ehhuang 2025-03-12 19:21:53 -07:00 committed by GitHub
parent 2baf200b63
commit 6bfcb65343
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 5260 additions and 906 deletions

View file

@ -76,6 +76,7 @@ class CodeExecutionRequest:
only_last_cell_fail: bool = True only_last_cell_fail: bool = True
seed: int = 0 seed: int = 0
strip_fpaths_in_stderr: bool = True strip_fpaths_in_stderr: bool = True
use_bwrap: bool = True
class CodeExecutor: class CodeExecutor:
@ -103,8 +104,6 @@ _set_seeds()\
script = "\n\n".join([seeds_prefix] + [CODE_ENV_PREFIX] + scripts) script = "\n\n".join([seeds_prefix] + [CODE_ENV_PREFIX] + scripts)
with tempfile.TemporaryDirectory() as dpath: with tempfile.TemporaryDirectory() as dpath:
bwrap_prefix = "bwrap " + generate_bwrap_command(bind_dirs=[dpath])
cmd = [*bwrap_prefix.split(), sys.executable, "-c", script]
code_fpath = os.path.join(dpath, "code.py") code_fpath = os.path.join(dpath, "code.py")
with open(code_fpath, "w") as f: with open(code_fpath, "w") as f:
f.write(script) f.write(script)
@ -118,6 +117,13 @@ _set_seeds()\
MPLBACKEND="module://matplotlib_custom_backend", MPLBACKEND="module://matplotlib_custom_backend",
PYTHONPATH=f"{DIRNAME}:{python_path}", PYTHONPATH=f"{DIRNAME}:{python_path}",
) )
if req.use_bwrap:
bwrap_prefix = "bwrap " + generate_bwrap_command(bind_dirs=[dpath])
cmd = [*bwrap_prefix.split(), sys.executable, "-c", script]
else:
cmd = [sys.executable, "-c", script]
stdout, stderr, returncode = do_subprocess( stdout, stderr, returncode = do_subprocess(
cmd=cmd, cmd=cmd,
env=env, env=env,

View file

@ -6,6 +6,7 @@
import logging import logging
import os
import tempfile import tempfile
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
@ -61,7 +62,9 @@ class CodeInterpreterToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime):
async def invoke_tool(self, tool_name: str, kwargs: Dict[str, Any]) -> ToolInvocationResult: async def invoke_tool(self, tool_name: str, kwargs: Dict[str, Any]) -> ToolInvocationResult:
script = kwargs["code"] script = kwargs["code"]
req = CodeExecutionRequest(scripts=[script]) # Use environment variable to control bwrap usage
force_disable_bwrap = os.environ.get("DISABLE_CODE_SANDBOX", "").lower() in ("1", "true", "yes")
req = CodeExecutionRequest(scripts=[script], use_bwrap=not force_disable_bwrap)
res = self.code_executor.execute(req) res = self.code_executor.execute(req)
pieces = [res["process_status"]] pieces = [res["process_status"]]
for out_type in ["stdout", "stderr"]: for out_type in ["stdout", "stderr"]:

View file

@ -187,7 +187,7 @@ def test_builtin_tool_web_search(llama_stack_client_with_mocked_inference, agent
messages=[ messages=[
{ {
"role": "user", "role": "user",
"content": "Search the web and tell me who the current CEO of Meta is.", "content": "Search the web and tell me who the founder of Meta is.",
} }
], ],
session_id=session_id, session_id=session_id,

View file

@ -6,12 +6,17 @@
import inspect import inspect
import itertools import itertools
import os import os
import platform
import textwrap import textwrap
from dotenv import load_dotenv from dotenv import load_dotenv
from llama_stack.log import get_logger
from .report import Report from .report import Report
logger = get_logger(__name__, category="tests")
def pytest_configure(config): def pytest_configure(config):
config.option.tbstyle = "short" config.option.tbstyle = "short"
@ -24,6 +29,10 @@ def pytest_configure(config):
key, value = env_var.split("=", 1) key, value = env_var.split("=", 1)
os.environ[key] = value os.environ[key] = value
if platform.system() == "Darwin": # Darwin is the system name for macOS
os.environ["DISABLE_CODE_SANDBOX"] = "1"
logger.info("Setting DISABLE_CODE_SANDBOX=1 for macOS")
if config.getoption("--report"): if config.getoption("--report"):
config.pluginmanager.register(Report(config)) config.pluginmanager.register(Report(config))

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long