llama-stack-mirror/llama_toolchain/agentic_system/utils.py
Ashwin Bharambe e830814399
Introduce Llama stack distributions (#22)
* Add distribution CLI scaffolding

* More progress towards `llama distribution install`

* getting closer to a distro definition, distro install + configure works

* Distribution server now functioning

* read existing configuration, save enums properly

* Remove inference uvicorn server entrypoint and llama inference CLI command

* updated dependency and client model name

* Improved exception handling

* local imports for faster cli

* undo a typo, add a passthrough distribution

* implement full-passthrough in the server

* add safety adapters, configuration handling, server + clients

* cleanup, moving stuff to common, nuke utils

* Add a Path() wrapper at the earliest place

* fixes

* Bring agentic system api to toolchain

Add adapter dependencies and resolve adapters using a topological sort

* refactor to reduce size of `agentic_system`

* move straggler files and fix some important existing bugs

* ApiSurface -> Api

* refactor a method out

* Adapter -> Provider

* Make each inference provider into its own subdirectory

* installation fixes

* Rename Distribution -> DistributionSpec, simplify RemoteProviders

* dict key instead of attr

* update inference config to take model and not model_dir

* Fix passthrough streaming, send headers properly not part of body :facepalm

* update safety to use model sku ids and not model dirs

* Update cli_reference.md

* minor fixes

* add DistributionConfig, fix a bug in model download

* Make install + start scripts do proper configuration automatically

* Update CLI_reference

* Nuke fp8_requirements, fold fbgemm into common requirements

* Update README, add newline between API surface configurations

* Refactor download functionality out of the Command so can be reused

* Add `llama model download` alias for `llama download`

* Show message about checksum file so users can check themselves

* Simpler intro statements

* get ollama working

* Reduce a bunch of dependencies from toolchain

Some improvements to the distribution install script

* Avoid using `conda run` since it buffers everything

* update dependencies and rely on LLAMA_TOOLCHAIN_DIR for dev purposes

* add validation for configuration input

* resort imports

* make optional subclasses default to yes for configuration

* Remove additional_pip_packages; move deps to providers

* for inline make 8b model the default

* Add scripts to MANIFEST

* allow installing from test.pypi.org

* Fix #2 to help with testing packages

* Must install llama-models at that same version first

* fix PIP_ARGS

---------

Co-authored-by: Hardik Shah <hjshah@fb.com>
Co-authored-by: Hardik Shah <hjshah@meta.com>
2024-08-08 13:38:41 -07:00

120 lines
3.8 KiB
Python

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import uuid
from typing import Any, List, Optional
from llama_models.llama3_1.api.datatypes import BuiltinTool, Message, SamplingParams
from llama_toolchain.agentic_system.api import (
AgenticSystemCreateRequest,
AgenticSystemInstanceConfig,
AgenticSystemSessionCreateRequest,
AgenticSystemToolDefinition,
)
from llama_toolchain.agentic_system.client import AgenticSystemClient
from llama_toolchain.agentic_system.tools.custom.execute import (
execute_with_custom_tools,
)
from llama_toolchain.safety.api.datatypes import BuiltinShield, ShieldDefinition
# TODO: this should move back to the llama-agentic-system repo
class AgenticSystemClientWrapper:
def __init__(self, api, system_id, custom_tools):
self.api = api
self.system_id = system_id
self.custom_tools = custom_tools
self.session_id = None
async def create_session(self, name: str = None):
if name is None:
name = f"Session-{uuid.uuid4()}"
response = await self.api.create_agentic_system_session(
AgenticSystemSessionCreateRequest(
system_id=self.system_id,
session_name=name,
)
)
self.session_id = response.session_id
return self.session_id
async def run(self, messages: List[Message], stream: bool = True):
async for chunk in execute_with_custom_tools(
self.api,
self.system_id,
self.session_id,
messages,
self.custom_tools,
stream=stream,
):
yield chunk
async def get_agent_system_instance(
host: str,
port: int,
custom_tools: Optional[List[Any]] = None,
disable_safety: bool = False,
model: str = "Meta-Llama3.1-8B-Instruct",
) -> AgenticSystemClientWrapper:
custom_tools = custom_tools or []
api = AgenticSystemClient(base_url=f"http://{host}:{port}")
tool_definitions = [
AgenticSystemToolDefinition(
tool_name=BuiltinTool.brave_search,
),
AgenticSystemToolDefinition(
tool_name=BuiltinTool.wolfram_alpha,
),
AgenticSystemToolDefinition(
tool_name=BuiltinTool.photogen,
),
AgenticSystemToolDefinition(
tool_name=BuiltinTool.code_interpreter,
),
] + [t.get_tool_definition() for t in custom_tools]
if not disable_safety:
for t in tool_definitions:
t.input_shields = [ShieldDefinition(shield_type=BuiltinShield.llama_guard)]
t.output_shields = [
ShieldDefinition(shield_type=BuiltinShield.llama_guard),
ShieldDefinition(shield_type=BuiltinShield.injection_shield),
]
create_request = AgenticSystemCreateRequest(
model=model,
instance_config=AgenticSystemInstanceConfig(
instructions="You are a helpful assistant",
available_tools=tool_definitions,
input_shields=(
[]
if disable_safety
else [
ShieldDefinition(shield_type=BuiltinShield.llama_guard),
ShieldDefinition(shield_type=BuiltinShield.jailbreak_shield),
]
),
output_shields=(
[]
if disable_safety
else [
ShieldDefinition(shield_type=BuiltinShield.llama_guard),
]
),
sampling_params=SamplingParams(),
),
)
create_response = await api.create_agentic_system(create_request)
return AgenticSystemClientWrapper(api, create_response.system_id, custom_tools)