Merge remote-tracking branch 'origin/main' into telemetry

2025-12-03 09:53:45 +00:00 · 2024-09-11 12:18:12 -07:00 · 2024-09-11 12:18:12 -07:00 · 99af14b18c
commit 99af14b18c
parent f294875396 1433aaf9f7
21 changed files with 1270 additions and 267 deletions
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@ -0,0 +1,5 @@
+# Each line is a file pattern followed by one or more owners.
+
+# These owners will be the default owners for everything in
+# the repo. Unless a later match takes precedence,
+* @ashwinb @yanxi0830 @hardikjshah @dltn @raghotham
--- a/docs/cli_reference.md
+++ b/docs/cli_reference.md
@ -295,13 +295,18 @@ As you can see above, each “distribution” details the “providers” it is
 Let's imagine you are working with a 8B-Instruct model. The following command will build a package (in the form of a Conda environment) _and_ configure it. As part of the configuration, you will be asked for some inputs (model_id, max_seq_len, etc.) Since we are working with a 8B model, we will name our build `8b-instruct` to help us remember the config.

 ```
-llama stack build local --name 8b-instruct
+llama stack build
 ```

-Once it runs successfully , you should see some outputs in the form:
+Once it runs, you will be prompted to enter build name and optional arguments, and should see some outputs in the form:

 ```
-$ llama stack build local --name 8b-instruct
+$ llama stack build
+Enter value for name (required): 8b-instruct
+Enter value for distribution (default: local) (required): local
+Enter value for api_providers (optional):
+Enter value for image_type (default: conda) (required):
+
 ....
 ....
 Successfully installed cfgv-3.4.0 distlib-0.3.8 identify-2.6.0 libcst-1.4.0 llama_toolchain-0.0.2 moreorless-0.4.0 nodeenv-1.9.1 pre-commit-3.8.0 stdlibs-2024.5.15 toml-0.10.2 tomlkit-0.13.0 trailrunner-1.4.0 ufmt-2.7.0 usort-1.0.8 virtualenv-20.26.3
@ -312,17 +317,41 @@ Successfully setup conda environment. Configuring build...
 ...

 YAML configuration has been written to ~/.llama/builds/local/conda/8b-instruct.yaml
+Target `8b-test` built with configuration at /home/xiyan/.llama/builds/local/conda/8b-test.yaml
+Build spec configuration saved at /home/xiyan/.llama/distributions/local/conda/8b-test-build.yaml
 ```
+
+You can re-build package based on build config
+```
+$ cat ~/.llama/distributions/local/conda/8b-instruct-build.yaml
+name: 8b-instruct
+distribution: local
+api_providers: null
+image_type: conda
+
+$ llama stack build --config ~/.llama/distributions/local/conda/8b-instruct-build.yaml
+
+Successfully setup conda environment. Configuring build...
+
+...
+...
+
+YAML configuration has been written to ~/.llama/builds/local/conda/8b-instruct.yaml
+Target `8b-instruct` built with configuration at ~/.llama/builds/local/conda/8b-instruct.yaml
+Build spec configuration saved at ~/.llama/distributions/local/conda/8b-instruct-build.yaml
+```
+
 ### Step 3.3: Configure a distribution

 You can re-configure this distribution by running:
 ```
-llama stack configure local --name 8b-instruct
+llama stack configure ~/.llama/builds/local/conda/8b-instruct.yaml
 ```

 Here is an example run of how the CLI will guide you to fill the configuration
+
 ```
-$ llama stack configure local --name 8b-instruct
+$ llama stack configure ~/.llama/builds/local/conda/8b-instruct.yaml

 Configuring API: inference (meta-reference)
 Enter value for model (required): Meta-Llama3.1-8B-Instruct
@ -363,12 +392,12 @@ Now let’s start Llama Stack Distribution Server.
 You need the YAML configuration file which was written out at the end by the `llama stack build` step.

 ```
-llama stack run local --name 8b-instruct --port 5000
+llama stack run ~/.llama/builds/local/conda/8b-instruct.yaml --port 5000
 ```
 You should see the Stack server start and print the APIs that it is supporting,

 ```
-$ llama stack run local --name 8b-instruct --port 5000
+$ llama stack run ~/.llama/builds/local/conda/8b-instruct.yaml --port 5000

 > initializing model parallel with size 1
 > initializing ddp with size 1
--- a/llama_toolchain/agentic_system/api/api.py
+++ b/llama_toolchain/agentic_system/api/api.py
@ -41,11 +41,19 @@ class ToolDefinitionCommon(BaseModel):
    output_shields: Optional[List[ShieldDefinition]] = Field(default_factory=list)


+class SearchEngineType(Enum):
+    bing = "bing"
+    brave = "brave"
+
+
@json_schema_type
-class BraveSearchToolDefinition(ToolDefinitionCommon):
+class SearchToolDefinition(ToolDefinitionCommon):
+    # NOTE: brave_search is just a placeholder since model always uses
+    # brave_search as tool call name
    type: Literal[AgenticSystemTool.brave_search.value] = (
        AgenticSystemTool.brave_search.value
    )
+    engine: SearchEngineType = SearchEngineType.brave
    remote_execution: Optional[RestAPIExecutionConfig] = None


@ -163,7 +171,7 @@ class MemoryToolDefinition(ToolDefinitionCommon):

 AgenticSystemToolDefinition = Annotated[
    Union[
-        BraveSearchToolDefinition,
+        SearchToolDefinition,
        WolframAlphaToolDefinition,
        PhotogenToolDefinition,
        CodeInterpreterToolDefinition,
--- a/llama_toolchain/agentic_system/client.py
+++ b/llama_toolchain/agentic_system/client.py
@ -134,7 +134,7 @@ async def run_main(host: str, port: int):
    api = AgenticSystemClient(f"http://{host}:{port}")

    tool_definitions = [
-        BraveSearchToolDefinition(),
+        SearchToolDefinition(engine=SearchEngineType.bing),
        WolframAlphaToolDefinition(),
        CodeInterpreterToolDefinition(),
    ]
--- a/llama_toolchain/agentic_system/meta_reference/agent_instance.py
+++ b/llama_toolchain/agentic_system/meta_reference/agent_instance.py
@ -710,7 +710,7 @@ class ChatAgent(ShieldRunnerMixin):
    def _get_tools(self) -> List[ToolDefinition]:
        ret = []
        for t in self.agent_config.tools:
-            if isinstance(t, BraveSearchToolDefinition):
+            if isinstance(t, SearchToolDefinition):
                ret.append(ToolDefinition(tool_name=BuiltinTool.brave_search))
            elif isinstance(t, WolframAlphaToolDefinition):
                ret.append(ToolDefinition(tool_name=BuiltinTool.wolfram_alpha))
--- a/llama_toolchain/agentic_system/meta_reference/agentic_system.py
+++ b/llama_toolchain/agentic_system/meta_reference/agentic_system.py
@ -15,9 +15,9 @@ from llama_toolchain.memory.api import Memory
 from llama_toolchain.safety.api import Safety
 from llama_toolchain.agentic_system.api import *  # noqa: F403
 from llama_toolchain.tools.builtin import (
-    BraveSearchTool,
    CodeInterpreterTool,
    PhotogenTool,
+    SearchTool,
    WolframAlphaTool,
 )
 from llama_toolchain.tools.safety import with_safety
@ -62,17 +62,19 @@ class MetaReferenceAgenticSystemImpl(AgenticSystem):
                if not key:
                    raise ValueError("Wolfram API key not defined in config")
                tool = WolframAlphaTool(key)
-            elif isinstance(tool_defn, BraveSearchToolDefinition):
+            elif isinstance(tool_defn, SearchToolDefinition):
+                key = None
+                if tool_defn.engine == SearchEngineType.brave:
                    key = self.config.brave_search_api_key
+                elif tool_defn.engine == SearchEngineType.bing:
+                    key = self.config.bing_search_api_key
                if not key:
-                    raise ValueError("Brave API key not defined in config")
-                tool = BraveSearchTool(key)
+                    raise ValueError("API key not defined in config")
+                tool = SearchTool(tool_defn.engine, key)
            elif isinstance(tool_defn, CodeInterpreterToolDefinition):
                tool = CodeInterpreterTool()
            elif isinstance(tool_defn, PhotogenToolDefinition):
-                tool = PhotogenTool(
-                    dump_dir=tempfile.mkdtemp(),
-                )
+                tool = PhotogenTool(dump_dir=tempfile.mkdtemp())
            else:
                continue

--- a/llama_toolchain/agentic_system/meta_reference/config.py
+++ b/llama_toolchain/agentic_system/meta_reference/config.py
@ -11,4 +11,5 @@ from pydantic import BaseModel

 class MetaReferenceImplConfig(BaseModel):
    brave_search_api_key: Optional[str] = None
+    bing_search_api_key: Optional[str] = None
    wolfram_api_key: Optional[str] = None
--- a/llama_toolchain/cli/stack/build.py
+++ b/llama_toolchain/cli/stack/build.py
@ -8,6 +8,7 @@ import argparse

 from llama_toolchain.cli.subcommand import Subcommand
 from llama_toolchain.core.datatypes import *  # noqa: F403
+import yaml


 def parse_api_provider_tuples(
@ -47,55 +48,45 @@ class StackBuild(Subcommand):
        self.parser.set_defaults(func=self._run_stack_build_command)

    def _add_arguments(self):
-        from llama_toolchain.core.distribution_registry import available_distribution_specs
-        from llama_toolchain.core.package import (
-            BuildType,
+        from llama_toolchain.core.distribution_registry import (
+            available_distribution_specs,
        )
+        from llama_toolchain.core.package import ImageType

        allowed_ids = [d.distribution_type for d in available_distribution_specs()]
        self.parser.add_argument(
-            "distribution",
+            "--config",
            type=str,
-            help="Distribution to build (either \"adhoc\" OR one of: {})".format(allowed_ids),
-        )
-        self.parser.add_argument(
-            "api_providers",
-            nargs='?',
-            help="Comma separated list of (api=provider) tuples",
+            help="Path to a config file to use for the build",
        )

-        self.parser.add_argument(
-            "--name",
-            type=str,
-            help="Name of the build target (image, conda env)",
-            required=True,
-        )
-        self.parser.add_argument(
-            "--type",
-            type=str,
-            default="conda_env",
-            choices=[v.value for v in BuildType],
-        )
+    def _run_stack_build_command_from_build_config(
+        self, build_config: BuildConfig
+    ) -> None:
+        import json
+        import os

-    def _run_stack_build_command(self, args: argparse.Namespace) -> None:
+        from llama_toolchain.common.config_dirs import DISTRIBS_BASE_DIR
+        from llama_toolchain.common.serialize import EnumEncoder
        from llama_toolchain.core.distribution_registry import resolve_distribution_spec
-        from llama_toolchain.core.package import (
-            ApiInput,
-            BuildType,
-            build_package,
-        )
+        from llama_toolchain.core.package import ApiInput, build_package, ImageType
+        from termcolor import cprint

        api_inputs = []
-        if args.distribution == "adhoc":
-            if not args.api_providers:
-                self.parser.error("You must specify API providers with (api=provider,...) for building an adhoc distribution")
+        if build_config.distribution == "adhoc":
+            if not build_config.api_providers:
+                self.parser.error(
+                    "You must specify API providers with (api=provider,...) for building an adhoc distribution"
+                )
                return

-            parsed = parse_api_provider_tuples(args.api_providers, self.parser)
+            parsed = parse_api_provider_tuples(build_config.api_providers, self.parser)
            for api, provider_spec in parsed.items():
                for dep in provider_spec.api_dependencies:
                    if dep not in parsed:
-                        self.parser.error(f"API {api} needs dependency {dep} provided also")
+                        self.parser.error(
+                            f"API {api} needs dependency {dep} provided also"
+                        )
                        return

                api_inputs.append(
@ -106,13 +97,17 @@ class StackBuild(Subcommand):
                )
            docker_image = None
        else:
-            if args.api_providers:
-                self.parser.error("You cannot specify API providers for pre-registered distributions")
+            if build_config.api_providers:
+                self.parser.error(
+                    "You cannot specify API providers for pre-registered distributions"
+                )
                return

-            dist = resolve_distribution_spec(args.distribution)
+            dist = resolve_distribution_spec(build_config.distribution)
            if dist is None:
-                self.parser.error(f"Could not find distribution {args.distribution}")
+                self.parser.error(
+                    f"Could not find distribution {build_config.distribution}"
+                )
                return

            for api, provider_type in dist.providers.items():
@ -126,8 +121,41 @@ class StackBuild(Subcommand):

        build_package(
            api_inputs,
-            build_type=BuildType(args.type),
-            name=args.name,
-            distribution_type=args.distribution,
+            image_type=ImageType(build_config.image_type),
+            name=build_config.name,
+            distribution_type=build_config.distribution,
            docker_image=docker_image,
        )
+
+        # save build.yaml spec for building same distribution again
+        build_dir = (
+            DISTRIBS_BASE_DIR / build_config.distribution / build_config.image_type
+        )
+        os.makedirs(build_dir, exist_ok=True)
+        build_file_path = build_dir / f"{build_config.name}-build.yaml"
+
+        with open(build_file_path, "w") as f:
+            to_write = json.loads(json.dumps(build_config.dict(), cls=EnumEncoder))
+            f.write(yaml.dump(to_write, sort_keys=False))
+
+        cprint(
+            f"Build spec configuration saved at {str(build_file_path)}",
+            color="green",
+        )
+
+    def _run_stack_build_command(self, args: argparse.Namespace) -> None:
+        from llama_toolchain.common.prompt_for_config import prompt_for_config
+        from llama_toolchain.core.dynamic import instantiate_class_type
+
+        if args.config:
+            with open(args.config, "r") as f:
+                try:
+                    build_config = BuildConfig(**yaml.safe_load(f))
+                except Exception as e:
+                    self.parser.error(f"Could not parse config file {args.config}: {e}")
+                    return
+                self._run_stack_build_command_from_build_config(build_config)
+            return
+
+        build_config = prompt_for_config(BuildConfig, None)
+        self._run_stack_build_command_from_build_config(build_config)
--- a/llama_toolchain/cli/stack/configure.py
+++ b/llama_toolchain/cli/stack/configure.py
@ -9,10 +9,10 @@ import json
 from pathlib import Path

 import yaml
-from termcolor import cprint

 from llama_toolchain.cli.subcommand import Subcommand
 from llama_toolchain.common.config_dirs import BUILDS_BASE_DIR
+from termcolor import cprint
 from llama_toolchain.core.datatypes import *  # noqa: F403


@ -34,38 +34,19 @@ class StackConfigure(Subcommand):
        from llama_toolchain.core.distribution_registry import (
            available_distribution_specs,
        )
-        from llama_toolchain.core.package import BuildType
+        from llama_toolchain.core.package import ImageType

        allowed_ids = [d.distribution_type for d in available_distribution_specs()]
        self.parser.add_argument(
-            "distribution",
+            "config",
            type=str,
-            help='Distribution ("adhoc" or one of: {})'.format(allowed_ids),
-        )
-        self.parser.add_argument(
-            "--name",
-            type=str,
-            help="Name of the build",
-            required=True,
-        )
-        self.parser.add_argument(
-            "--type",
-            type=str,
-            default="conda_env",
-            choices=[v.value for v in BuildType],
+            help="Path to the package config file (e.g. ~/.llama/builds/<distribution>/<image_type>/<name>.yaml)",
        )

    def _run_stack_configure_cmd(self, args: argparse.Namespace) -> None:
-        from llama_toolchain.core.package import BuildType
+        from llama_toolchain.core.package import ImageType

-        build_type = BuildType(args.type)
-        name = args.name
-        config_file = (
-            BUILDS_BASE_DIR
-            / args.distribution
-            / build_type.descriptor()
-            / f"{name}.yaml"
-        )
+        config_file = Path(args.config)
        if not config_file.exists():
            self.parser.error(
                f"Could not find {config_file}. Please run `llama stack build` first"
--- a/llama_toolchain/cli/stack/run.py
+++ b/llama_toolchain/cli/stack/run.py
@ -29,24 +29,12 @@ class StackRun(Subcommand):
        self.parser.set_defaults(func=self._run_stack_run_cmd)

    def _add_arguments(self):
-        from llama_toolchain.core.package import BuildType
+        from llama_toolchain.core.package import ImageType

        self.parser.add_argument(
-            "distribution",
+            "config",
            type=str,
-            help="Distribution whose build you want to start",
-        )
-        self.parser.add_argument(
-            "--name",
-            type=str,
-            help="Name of the build you want to start",
-            required=True,
-        )
-        self.parser.add_argument(
-            "--type",
-            type=str,
-            default="conda_env",
-            choices=[v.value for v in BuildType],
+            help="Path to config file to use for the run",
        )
        self.parser.add_argument(
            "--port",
@ -63,12 +51,13 @@ class StackRun(Subcommand):

    def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
        from llama_toolchain.common.exec import run_with_pty
-        from llama_toolchain.core.package import BuildType
+        from llama_toolchain.core.package import ImageType

-        build_type = BuildType(args.type)
-        build_dir = BUILDS_BASE_DIR / args.distribution / build_type.descriptor()
-        path = build_dir / f"{args.name}.yaml"
+        if not args.config:
+            self.parser.error("Must specify a config file to run")
+            return

+        path = args.config
        config_file = Path(path)

        if not config_file.exists():
--- a/llama_toolchain/common/deployment_types.py
+++ b/llama_toolchain/common/deployment_types.py
@ -5,7 +5,7 @@
 # the root directory of this source tree.

 from enum import Enum
-from typing import Dict, Optional
+from typing import Any, Dict, Optional

 from llama_models.llama3.api.datatypes import URL

@ -26,6 +26,6 @@ class RestAPIMethod(Enum):
 class RestAPIExecutionConfig(BaseModel):
    url: URL
    method: RestAPIMethod
-    params: Optional[Dict[str, str]] = None
-    headers: Optional[Dict[str, str]] = None
-    body: Optional[Dict[str, str]] = None
+    params: Optional[Dict[str, Any]] = None
+    headers: Optional[Dict[str, Any]] = None
+    body: Optional[Dict[str, Any]] = None
--- a/llama_toolchain/core/build_conda_env.sh
+++ b/llama_toolchain/core/build_conda_env.sh
@ -19,7 +19,7 @@ fi

 set -euo pipefail

-if [ "$#" -ne 3 ]; then
+if [ "$#" -ne 4 ]; then
  echo "Usage: $0 <distribution_type> <build_name> <pip_dependencies>" >&2
  echo "Example: $0 <distribution_type> mybuild 'numpy pandas scipy'" >&2
  exit 1
@ -28,7 +28,8 @@ fi
 distribution_type="$1"
 build_name="$2"
 env_name="llamastack-$build_name"
-pip_dependencies="$3"
+config_file="$3"
+pip_dependencies="$4"

 # Define color codes
 RED='\033[0;31m'
@ -117,4 +118,4 @@ ensure_conda_env_python310 "$env_name" "$pip_dependencies"

 printf "${GREEN}Successfully setup conda environment. Configuring build...${NC}\n"

-$CONDA_PREFIX/bin/python3 -m llama_toolchain.cli.llama stack configure $distribution_type --name "$build_name" --type conda_env
+$CONDA_PREFIX/bin/python3 -m llama_toolchain.cli.llama stack configure $config_file
--- a/llama_toolchain/core/build_container.sh
+++ b/llama_toolchain/core/build_container.sh
@ -4,7 +4,7 @@ LLAMA_MODELS_DIR=${LLAMA_MODELS_DIR:-}
 LLAMA_TOOLCHAIN_DIR=${LLAMA_TOOLCHAIN_DIR:-}
 TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}

-if [ "$#" -ne 4 ]; then
+if [ "$#" -ne 5 ]; then
  echo "Usage: $0 <distribution_type> <build_name> <docker_base> <pip_dependencies>
  echo "Example: $0 distribution_type my-fastapi-app python:3.9-slim 'fastapi uvicorn'
  exit 1
@ -14,7 +14,8 @@ distribution_type=$1
 build_name="$2"
 image_name="llamastack-$build_name"
 docker_base=$3
-pip_dependencies=$4
+config_file=$4
+pip_dependencies=$5

 # Define color codes
 RED='\033[0;31m'
@ -110,4 +111,4 @@ set +x
 printf "${GREEN}Succesfully setup Podman image. Configuring build...${NC}"
 echo "You can run it with: podman run -p 8000:8000 $image_name"

-$CONDA_PREFIX/bin/python3 -m llama_toolchain.cli.llama stack configure $distribution_type --name "$build_name" --type container
+$CONDA_PREFIX/bin/python3 -m llama_toolchain.cli.llama stack configure $config_file
--- a/llama_toolchain/core/datatypes.py
+++ b/llama_toolchain/core/datatypes.py
@ -189,3 +189,19 @@ Provider configurations for each of the APIs provided by this package. This incl
 the dependencies of these providers as well.
 """,
    )
+
+
+@json_schema_type
+class BuildConfig(BaseModel):
+    name: str
+    distribution: str = Field(
+        default="local", description="Type of distribution to build (adhoc | {})"
+    )
+    api_providers: Optional[str] = Field(
+        default_factory=list,
+        description="List of API provider names to build",
+    )
+    image_type: str = Field(
+        default="conda",
+        description="Type of package to build (conda | container)",
+    )
--- a/llama_toolchain/core/package.py
+++ b/llama_toolchain/core/package.py
@ -12,24 +12,21 @@ from typing import List, Optional

 import pkg_resources
 import yaml
-from pydantic import BaseModel
-
-from termcolor import cprint

 from llama_toolchain.common.config_dirs import BUILDS_BASE_DIR
 from llama_toolchain.common.exec import run_with_pty
 from llama_toolchain.common.serialize import EnumEncoder
+from pydantic import BaseModel
+
+from termcolor import cprint

 from llama_toolchain.core.datatypes import *  # noqa: F403
 from llama_toolchain.core.distribution import api_providers, SERVER_DEPENDENCIES


-class BuildType(Enum):
-    container = "container"
-    conda_env = "conda_env"
-
-    def descriptor(self) -> str:
-        return "docker" if self == self.container else "conda"
+class ImageType(Enum):
+    docker = "docker"
+    conda = "conda"


 class Dependencies(BaseModel):
@ -44,7 +41,7 @@ class ApiInput(BaseModel):

 def build_package(
    api_inputs: List[ApiInput],
-    build_type: BuildType,
+    image_type: ImageType,
    name: str,
    distribution_type: Optional[str] = None,
    docker_image: Optional[str] = None,
@ -52,7 +49,7 @@ def build_package(
    if not distribution_type:
        distribution_type = "adhoc"

-    build_dir = BUILDS_BASE_DIR / distribution_type / build_type.descriptor()
+    build_dir = BUILDS_BASE_DIR / distribution_type / image_type.value
    os.makedirs(build_dir, exist_ok=True)

    package_name = name.replace("::", "-")
@ -106,14 +103,14 @@ def build_package(
        )

    c.distribution_type = distribution_type
-    c.docker_image = package_name if build_type == BuildType.container else None
-    c.conda_env = package_name if build_type == BuildType.conda_env else None
+    c.docker_image = package_name if image_type == ImageType.docker else None
+    c.conda_env = package_name if image_type == ImageType.conda else None

    with open(package_file, "w") as f:
        to_write = json.loads(json.dumps(c.dict(), cls=EnumEncoder))
        f.write(yaml.dump(to_write, sort_keys=False))

-    if build_type == BuildType.container:
+    if image_type == ImageType.docker:
        script = pkg_resources.resource_filename(
            "llama_toolchain", "core/build_container.sh"
        )
@ -122,6 +119,7 @@ def build_package(
            distribution_type,
            package_name,
            package_deps.docker_image,
+            str(package_file),
            " ".join(package_deps.pip_packages),
        ]
    else:
@ -132,6 +130,7 @@ def build_package(
            script,
            distribution_type,
            package_name,
+            str(package_file),
            " ".join(package_deps.pip_packages),
        ]

--- a/llama_toolchain/stack.py
+++ b/llama_toolchain/stack.py
@ -15,6 +15,7 @@ from llama_toolchain.telemetry.api import *  # noqa: F403
 from llama_toolchain.post_training.api import *  # noqa: F403
 from llama_toolchain.reward_scoring.api import *  # noqa: F403
 from llama_toolchain.synthetic_data_generation.api import *  # noqa: F403
+from llama_toolchain.safety.api import *  # noqa: F403


 class LlamaStack(
@ -22,6 +23,7 @@ class LlamaStack(
    BatchInference,
    AgenticSystem,
    RewardScoring,
+    Safety,
    SyntheticDataGeneration,
    Datasets,
    Telemetry,
--- a/llama_toolchain/tools/builtin.py
+++ b/llama_toolchain/tools/builtin.py
@ -83,14 +83,72 @@ class PhotogenTool(SingleMessageBuiltinTool):
        raise NotImplementedError()


-class BraveSearchTool(SingleMessageBuiltinTool):
-    def __init__(self, api_key: str) -> None:
+class SearchTool(SingleMessageBuiltinTool):
+    def __init__(self, engine: SearchEngineType, api_key: str, **kwargs) -> None:
        self.api_key = api_key
+        if engine == SearchEngineType.bing:
+            self.engine = BingSearch(api_key, **kwargs)
+        elif engine == SearchEngineType.brave:
+            self.engine = BraveSearch(api_key, **kwargs)
+        else:
+            raise ValueError(f"Unknown search engine: {engine}")

    def get_name(self) -> str:
        return BuiltinTool.brave_search.value

    async def run_impl(self, query: str) -> str:
+        return await self.engine.search(query)
+
+
+class BingSearch:
+    def __init__(self, api_key: str, top_k: int = 3, **kwargs) -> None:
+        self.api_key = api_key
+        self.top_k = top_k
+
+    async def search(self, query: str) -> str:
+        url = "https://api.bing.microsoft.com/v7.0/search"
+        headers = {
+            "Ocp-Apim-Subscription-Key": self.api_key,
+        }
+        params = {
+            "count": self.top_k,
+            "textDecorations": True,
+            "textFormat": "HTML",
+            "q": query,
+        }
+
+        response = requests.get(url=url, params=params, headers=headers)
+        response.raise_for_status()
+        clean = self._clean_response(response.json())
+        return json.dumps(clean)
+
+    def _clean_response(self, search_response):
+        clean_response = []
+        query = search_response["queryContext"]["originalQuery"]
+        if "webPages" in search_response:
+            pages = search_response["webPages"]["value"]
+            for p in pages:
+                selected_keys = {"name", "url", "snippet"}
+                clean_response.append(
+                    {k: v for k, v in p.items() if k in selected_keys}
+                )
+        if "news" in search_response:
+            clean_news = []
+            news = search_response["news"]["value"]
+            for n in news:
+                selected_keys = {"name", "url", "description"}
+                clean_news.append({k: v for k, v in n.items() if k in selected_keys})
+
+            clean_response.append(clean_news)
+
+        return {"query": query, "top_k": clean_response}
+
+
+class BraveSearch:
+    def __init__(self, api_key: str) -> None:
+        self.api_key = api_key
+
+    async def search(self, query: str) -> str:
        url = "https://api.search.brave.com/res/v1/web/search"
        headers = {
            "X-Subscription-Token": self.api_key,
--- a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html
+++ b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html
--- a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml
+++ b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml
--- a/rfcs/openapi_generator/generate.py
+++ b/rfcs/openapi_generator/generate.py
@ -35,7 +35,10 @@ from llama_toolchain.stack import LlamaStack


 # TODO: this should be fixed in the generator itself so it reads appropriate annotations
-STREAMING_ENDPOINTS = ["/agentic_system/turn/create"]
+STREAMING_ENDPOINTS = [
+    "/agentic_system/turn/create",
+    "/inference/chat_completion",
+]


 def patch_sse_stream_responses(spec: Specification):
--- a/rfcs/openapi_generator/pyopenapi/generator.py
+++ b/rfcs/openapi_generator/pyopenapi/generator.py
@ -468,12 +468,14 @@ class Generator:
            builder = ContentBuilder(self.schema_builder)
            first = next(iter(op.request_params))
            request_name, request_type = first
-            if len(op.request_params) == 1 and "Request" in first[1].__name__:
-                # TODO(ashwin): Undo the "Request" hack and this entire block eventually
-                request_name, request_type = first
-            else:
+
            from dataclasses import make_dataclass

+            if len(op.request_params) == 1 and "Request" in first[1].__name__:
+                # TODO(ashwin): Undo the "Request" hack and this entire block eventually
+                request_name = first[1].__name__ + "Wrapper"
+                request_type = make_dataclass(request_name, op.request_params)
+            else:
                op_name = "".join(word.capitalize() for word in op.name.split("_"))
                request_name = f"{op_name}Request"
                request_type = make_dataclass(request_name, op.request_params)