Introduce Llama stack distributions (#22)

* Add distribution CLI scaffolding * More progress towards `llama distribution install` * getting closer to a distro definition, distro install + configure works * Distribution server now functioning * read existing configuration, save enums properly * Remove inference uvicorn server entrypoint and llama inference CLI command * updated dependency and client model name * Improved exception handling * local imports for faster cli * undo a typo, add a passthrough distribution * implement full-passthrough in the server * add safety adapters, configuration handling, server + clients * cleanup, moving stuff to common, nuke utils * Add a Path() wrapper at the earliest place * fixes * Bring agentic system api to toolchain Add adapter dependencies and resolve adapters using a topological sort * refactor to reduce size of `agentic_system` * move straggler files and fix some important existing bugs * ApiSurface -> Api * refactor a method out * Adapter -> Provider * Make each inference provider into its own subdirectory * installation fixes * Rename Distribution -> DistributionSpec, simplify RemoteProviders * dict key instead of attr * update inference config to take model and not model_dir * Fix passthrough streaming, send headers properly not part of body :facepalm * update safety to use model sku ids and not model dirs * Update cli_reference.md * minor fixes * add DistributionConfig, fix a bug in model download * Make install + start scripts do proper configuration automatically * Update CLI_reference * Nuke fp8_requirements, fold fbgemm into common requirements * Update README, add newline between API surface configurations * Refactor download functionality out of the Command so can be reused * Add `llama model download` alias for `llama download` * Show message about checksum file so users can check themselves * Simpler intro statements * get ollama working * Reduce a bunch of dependencies from toolchain Some improvements to the distribution install script * Avoid using `conda run` since it buffers everything * update dependencies and rely on LLAMA_TOOLCHAIN_DIR for dev purposes * add validation for configuration input * resort imports * make optional subclasses default to yes for configuration * Remove additional_pip_packages; move deps to providers * for inline make 8b model the default * Add scripts to MANIFEST * allow installing from test.pypi.org * Fix #2 to help with testing packages * Must install llama-models at that same version first * fix PIP_ARGS --------- Co-authored-by: Hardik Shah <hjshah@fb.com> Co-authored-by: Hardik Shah <hjshah@meta.com>
2025-10-04 12:07:34 +00:00 · 2024-08-08 13:38:41 -07:00 · 2024-08-08 13:38:41 -07:00 · e830814399
commit e830814399
parent da4645a27a
115 changed files with 5839 additions and 1120 deletions
--- a/llama_toolchain/agentic_system/client.py
+++ b/llama_toolchain/agentic_system/client.py
@ -0,0 +1,130 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import asyncio
+import json
+
+from typing import AsyncGenerator
+
+import fire
+
+import httpx
+
+from llama_models.llama3_1.api.datatypes import BuiltinTool, SamplingParams
+
+from .api import (
+    AgenticSystem,
+    AgenticSystemCreateRequest,
+    AgenticSystemCreateResponse,
+    AgenticSystemInstanceConfig,
+    AgenticSystemSessionCreateRequest,
+    AgenticSystemSessionCreateResponse,
+    AgenticSystemToolDefinition,
+    AgenticSystemTurnCreateRequest,
+    AgenticSystemTurnResponseStreamChunk,
+)
+
+
+async def get_client_impl(base_url: str):
+    return AgenticSystemClient(base_url)
+
+
+class AgenticSystemClient(AgenticSystem):
+    def __init__(self, base_url: str):
+        self.base_url = base_url
+
+    async def create_agentic_system(
+        self, request: AgenticSystemCreateRequest
+    ) -> AgenticSystemCreateResponse:
+        async with httpx.AsyncClient() as client:
+            response = await client.post(
+                f"{self.base_url}/agentic_system/create",
+                data=request.json(),
+                headers={"Content-Type": "application/json"},
+            )
+            response.raise_for_status()
+            return AgenticSystemCreateResponse(**response.json())
+
+    async def create_agentic_system_session(
+        self,
+        request: AgenticSystemSessionCreateRequest,
+    ) -> AgenticSystemSessionCreateResponse:
+        async with httpx.AsyncClient() as client:
+            response = await client.post(
+                f"{self.base_url}/agentic_system/session/create",
+                data=request.json(),
+                headers={"Content-Type": "application/json"},
+            )
+            response.raise_for_status()
+            return AgenticSystemSessionCreateResponse(**response.json())
+
+    async def create_agentic_system_turn(
+        self,
+        request: AgenticSystemTurnCreateRequest,
+    ) -> AsyncGenerator:
+        async with httpx.AsyncClient() as client:
+            async with client.stream(
+                "POST",
+                f"{self.base_url}/agentic_system/turn/create",
+                data=request.json(),
+                headers={"Content-Type": "application/json"},
+                timeout=20,
+            ) as response:
+                async for line in response.aiter_lines():
+                    if line.startswith("data:"):
+                        data = line[len("data: ") :]
+                        try:
+                            yield AgenticSystemTurnResponseStreamChunk(
+                                **json.loads(data)
+                            )
+                        except Exception as e:
+                            print(data)
+                            print(f"Error with parsing or validation: {e}")
+
+
+async def run_main(host: str, port: int):
+    # client to test remote impl of agentic system
+    api = await AgenticSystemClient(f"http://{host}:{port}")
+
+    tool_definitions = [
+        AgenticSystemToolDefinition(
+            tool_name=BuiltinTool.brave_search,
+        ),
+        AgenticSystemToolDefinition(
+            tool_name=BuiltinTool.wolfram_alpha,
+        ),
+        AgenticSystemToolDefinition(
+            tool_name=BuiltinTool.photogen,
+        ),
+        AgenticSystemToolDefinition(
+            tool_name=BuiltinTool.code_interpreter,
+        ),
+    ]
+
+    create_request = AgenticSystemCreateRequest(
+        model="Meta-Llama3.1-8B-Instruct",
+        instance_config=AgenticSystemInstanceConfig(
+            instructions="You are a helpful assistant",
+            sampling_params=SamplingParams(),
+            available_tools=tool_definitions,
+            input_shields=[],
+            output_shields=[],
+            quantization_config=None,
+            debug_prefix_messages=[],
+        ),
+    )
+
+    create_response = await api.create_agentic_system(create_request)
+    print(create_response)
+    # TODO: Add chat session / turn apis to test e2e
+
+
+def main(host: str, port: int):
+    asyncio.run(run_main(host, port))
+
+
+if __name__ == "__main__":
+    fire.Fire(main)