forked from phoenix-oss/llama-stack-mirror
test: code exec on mac (#1549)
Summary: 1. adds option to not use bwrap for code execution 2. disable bwrap when running tests on macs Test Plan: ``` LLAMA_STACK_CONFIG=fireworks pytest -s -v tests/integration/agents/test_agents.py --safety-shield meta-llama/Llama-Guard-3-8B --text-model meta-llama/Llama-3.1-8B-Instruct ``` Verify code_interpreter result in logs INFO 2025-03-11 08:10:39,858 llama_stack.providers.inline.agents.meta_reference.agent_instance:1032 agents: tool call code_interpreter completed with result: content='completed\n\n541\n' error_message=None error_code=None metadata=None
This commit is contained in:
parent
2baf200b63
commit
6bfcb65343
6 changed files with 5260 additions and 906 deletions
|
@ -76,6 +76,7 @@ class CodeExecutionRequest:
|
||||||
only_last_cell_fail: bool = True
|
only_last_cell_fail: bool = True
|
||||||
seed: int = 0
|
seed: int = 0
|
||||||
strip_fpaths_in_stderr: bool = True
|
strip_fpaths_in_stderr: bool = True
|
||||||
|
use_bwrap: bool = True
|
||||||
|
|
||||||
|
|
||||||
class CodeExecutor:
|
class CodeExecutor:
|
||||||
|
@ -103,8 +104,6 @@ _set_seeds()\
|
||||||
|
|
||||||
script = "\n\n".join([seeds_prefix] + [CODE_ENV_PREFIX] + scripts)
|
script = "\n\n".join([seeds_prefix] + [CODE_ENV_PREFIX] + scripts)
|
||||||
with tempfile.TemporaryDirectory() as dpath:
|
with tempfile.TemporaryDirectory() as dpath:
|
||||||
bwrap_prefix = "bwrap " + generate_bwrap_command(bind_dirs=[dpath])
|
|
||||||
cmd = [*bwrap_prefix.split(), sys.executable, "-c", script]
|
|
||||||
code_fpath = os.path.join(dpath, "code.py")
|
code_fpath = os.path.join(dpath, "code.py")
|
||||||
with open(code_fpath, "w") as f:
|
with open(code_fpath, "w") as f:
|
||||||
f.write(script)
|
f.write(script)
|
||||||
|
@ -118,6 +117,13 @@ _set_seeds()\
|
||||||
MPLBACKEND="module://matplotlib_custom_backend",
|
MPLBACKEND="module://matplotlib_custom_backend",
|
||||||
PYTHONPATH=f"{DIRNAME}:{python_path}",
|
PYTHONPATH=f"{DIRNAME}:{python_path}",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if req.use_bwrap:
|
||||||
|
bwrap_prefix = "bwrap " + generate_bwrap_command(bind_dirs=[dpath])
|
||||||
|
cmd = [*bwrap_prefix.split(), sys.executable, "-c", script]
|
||||||
|
else:
|
||||||
|
cmd = [sys.executable, "-c", script]
|
||||||
|
|
||||||
stdout, stderr, returncode = do_subprocess(
|
stdout, stderr, returncode = do_subprocess(
|
||||||
cmd=cmd,
|
cmd=cmd,
|
||||||
env=env,
|
env=env,
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
@ -61,7 +62,9 @@ class CodeInterpreterToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime):
|
||||||
|
|
||||||
async def invoke_tool(self, tool_name: str, kwargs: Dict[str, Any]) -> ToolInvocationResult:
|
async def invoke_tool(self, tool_name: str, kwargs: Dict[str, Any]) -> ToolInvocationResult:
|
||||||
script = kwargs["code"]
|
script = kwargs["code"]
|
||||||
req = CodeExecutionRequest(scripts=[script])
|
# Use environment variable to control bwrap usage
|
||||||
|
force_disable_bwrap = os.environ.get("DISABLE_CODE_SANDBOX", "").lower() in ("1", "true", "yes")
|
||||||
|
req = CodeExecutionRequest(scripts=[script], use_bwrap=not force_disable_bwrap)
|
||||||
res = self.code_executor.execute(req)
|
res = self.code_executor.execute(req)
|
||||||
pieces = [res["process_status"]]
|
pieces = [res["process_status"]]
|
||||||
for out_type in ["stdout", "stderr"]:
|
for out_type in ["stdout", "stderr"]:
|
||||||
|
|
|
@ -187,7 +187,7 @@ def test_builtin_tool_web_search(llama_stack_client_with_mocked_inference, agent
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": "Search the web and tell me who the current CEO of Meta is.",
|
"content": "Search the web and tell me who the founder of Meta is.",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
session_id=session_id,
|
session_id=session_id,
|
||||||
|
|
|
@ -6,12 +6,17 @@
|
||||||
import inspect
|
import inspect
|
||||||
import itertools
|
import itertools
|
||||||
import os
|
import os
|
||||||
|
import platform
|
||||||
import textwrap
|
import textwrap
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
from llama_stack.log import get_logger
|
||||||
|
|
||||||
from .report import Report
|
from .report import Report
|
||||||
|
|
||||||
|
logger = get_logger(__name__, category="tests")
|
||||||
|
|
||||||
|
|
||||||
def pytest_configure(config):
|
def pytest_configure(config):
|
||||||
config.option.tbstyle = "short"
|
config.option.tbstyle = "short"
|
||||||
|
@ -24,6 +29,10 @@ def pytest_configure(config):
|
||||||
key, value = env_var.split("=", 1)
|
key, value = env_var.split("=", 1)
|
||||||
os.environ[key] = value
|
os.environ[key] = value
|
||||||
|
|
||||||
|
if platform.system() == "Darwin": # Darwin is the system name for macOS
|
||||||
|
os.environ["DISABLE_CODE_SANDBOX"] = "1"
|
||||||
|
logger.info("Setting DISABLE_CODE_SANDBOX=1 for macOS")
|
||||||
|
|
||||||
if config.getoption("--report"):
|
if config.getoption("--report"):
|
||||||
config.pluginmanager.register(Report(config))
|
config.pluginmanager.register(Report(config))
|
||||||
|
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Add table
Add a link
Reference in a new issue