From d28c3dfe0f704ab0c46785e8381f5518bad47d33 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Mon, 30 Sep 2024 15:04:04 -0700 Subject: [PATCH] [CLI] simplify docker run (#159) * bake run.yaml inside docker, simplify run * add docker template examples * delete generated Dockerfile * unique deps * clean up debug * default entrypoint * address comments, update output msg * update msg * build output msg * configure msg * unique special_deps * remove quotes in configure --- llama_stack/cli/stack/configure.py | 6 -- llama_stack/distribution/build.py | 2 + llama_stack/distribution/build_container.sh | 11 +-- .../distribution/configure_container.sh | 3 +- llama_stack/distribution/server/server.py | 6 +- .../docker/llamastack-local-cpu/build.yaml | 15 +++++ .../docker/llamastack-local-cpu/run.yaml | 64 ++++++++++++++++++ .../docker/llamastack-local-gpu/build.yaml | 11 +++ .../docker/llamastack-local-gpu/run.yaml | 67 +++++++++++++++++++ 9 files changed, 172 insertions(+), 13 deletions(-) create mode 100644 llama_stack/distribution/templates/docker/llamastack-local-cpu/build.yaml create mode 100644 llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml create mode 100644 llama_stack/distribution/templates/docker/llamastack-local-gpu/build.yaml create mode 100644 llama_stack/distribution/templates/docker/llamastack-local-gpu/run.yaml diff --git a/llama_stack/cli/stack/configure.py b/llama_stack/cli/stack/configure.py index e8105b7e0..7a1cbdf98 100644 --- a/llama_stack/cli/stack/configure.py +++ b/llama_stack/cli/stack/configure.py @@ -112,12 +112,6 @@ class StackConfigure(Subcommand): ) return - build_name = docker_image.removeprefix("llamastack-") - saved_file = str(builds_dir / f"{build_name}-run.yaml") - cprint( - f"YAML configuration has been written to {saved_file}. You can now run `llama stack run {saved_file}`", - color="green", - ) return def _configure_llama_distribution( diff --git a/llama_stack/distribution/build.py b/llama_stack/distribution/build.py index 1047c6418..dabcad2a6 100644 --- a/llama_stack/distribution/build.py +++ b/llama_stack/distribution/build.py @@ -73,6 +73,8 @@ def build_image(build_config: BuildConfig, build_file_path: Path): special_deps.append(package) else: deps.append(package) + deps = list(set(deps)) + special_deps = list(set(special_deps)) if build_config.image_type == ImageType.docker.value: script = pkg_resources.resource_filename( diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh index 705fd9505..c9b99d376 100755 --- a/llama_stack/distribution/build_container.sh +++ b/llama_stack/distribution/build_container.sh @@ -29,9 +29,12 @@ SCRIPT_DIR=$(dirname "$(readlink -f "$0")") REPO_DIR=$(dirname $(dirname "$SCRIPT_DIR")) DOCKER_BINARY=${DOCKER_BINARY:-docker} DOCKER_OPTS=${DOCKER_OPTS:-} +REPO_CONFIGS_DIR="$REPO_DIR/tmp/configs" TEMP_DIR=$(mktemp -d) +llama stack configure $build_file_path --output-dir $REPO_CONFIGS_DIR + add_to_docker() { local input output_file="$TEMP_DIR/Dockerfile" @@ -103,11 +106,12 @@ add_to_docker < Dict[Api, An return impls, specs -def main(yaml_config: str, port: int = 5000, disable_ipv6: bool = False): +def main( + yaml_config: str = "llamastack-run.yaml", + port: int = 5000, + disable_ipv6: bool = False, +): with open(yaml_config, "r") as fp: config = StackRunConfig(**yaml.safe_load(fp)) diff --git a/llama_stack/distribution/templates/docker/llamastack-local-cpu/build.yaml b/llama_stack/distribution/templates/docker/llamastack-local-cpu/build.yaml new file mode 100644 index 000000000..9db019454 --- /dev/null +++ b/llama_stack/distribution/templates/docker/llamastack-local-cpu/build.yaml @@ -0,0 +1,15 @@ +name: local-cpu +distribution_spec: + description: remote inference + local safety/agents/memory + docker_image: null + providers: + inference: + - remote::ollama + - remote::tgi + - remote::together + - remote::fireworks + safety: meta-reference + agents: meta-reference + memory: meta-reference + telemetry: meta-reference +image_type: docker diff --git a/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml b/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml new file mode 100644 index 000000000..6a4b2e464 --- /dev/null +++ b/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml @@ -0,0 +1,64 @@ +built_at: '2024-09-30T09:04:30.533391' +image_name: local-cpu +docker_image: local-cpu +conda_env: null +apis_to_serve: +- agents +- inference +- models +- memory +- safety +- shields +- memory_banks +api_providers: + inference: + providers: + - remote::ollama + safety: + providers: + - meta-reference + agents: + provider_id: meta-reference + config: + persistence_store: + namespace: null + type: sqlite + db_path: /home/xiyan/.llama/runtime/kvstore.db + memory: + providers: + - meta-reference + telemetry: + provider_id: meta-reference + config: {} +routing_table: + inference: + - provider_id: remote::ollama + config: + host: localhost + port: 6000 + routing_key: Meta-Llama3.1-8B-Instruct + safety: + - provider_id: meta-reference + config: + llama_guard_shield: null + prompt_guard_shield: null + routing_key: llama_guard + - provider_id: meta-reference + config: + llama_guard_shield: null + prompt_guard_shield: null + routing_key: code_scanner_guard + - provider_id: meta-reference + config: + llama_guard_shield: null + prompt_guard_shield: null + routing_key: injection_shield + - provider_id: meta-reference + config: + llama_guard_shield: null + prompt_guard_shield: null + routing_key: jailbreak_shield + memory: + - provider_id: meta-reference + config: {} + routing_key: vector diff --git a/llama_stack/distribution/templates/docker/llamastack-local-gpu/build.yaml b/llama_stack/distribution/templates/docker/llamastack-local-gpu/build.yaml new file mode 100644 index 000000000..11d1ac01c --- /dev/null +++ b/llama_stack/distribution/templates/docker/llamastack-local-gpu/build.yaml @@ -0,0 +1,11 @@ +name: local-gpu +distribution_spec: + description: local meta reference + docker_image: null + providers: + inference: meta-reference + safety: meta-reference + agents: meta-reference + memory: meta-reference + telemetry: meta-reference +image_type: docker diff --git a/llama_stack/distribution/templates/docker/llamastack-local-gpu/run.yaml b/llama_stack/distribution/templates/docker/llamastack-local-gpu/run.yaml new file mode 100644 index 000000000..2969479dc --- /dev/null +++ b/llama_stack/distribution/templates/docker/llamastack-local-gpu/run.yaml @@ -0,0 +1,67 @@ +built_at: '2024-09-30T09:00:56.693751' +image_name: local-gpu +docker_image: local-gpu +conda_env: null +apis_to_serve: +- memory +- inference +- agents +- shields +- safety +- models +- memory_banks +api_providers: + inference: + providers: + - meta-reference + safety: + providers: + - meta-reference + agents: + provider_id: meta-reference + config: + persistence_store: + namespace: null + type: sqlite + db_path: /home/xiyan/.llama/runtime/kvstore.db + memory: + providers: + - meta-reference + telemetry: + provider_id: meta-reference + config: {} +routing_table: + inference: + - provider_id: meta-reference + config: + model: Llama3.1-8B-Instruct + quantization: null + torch_seed: null + max_seq_len: 4096 + max_batch_size: 1 + routing_key: Llama3.1-8B-Instruct + safety: + - provider_id: meta-reference + config: + llama_guard_shield: null + prompt_guard_shield: null + routing_key: llama_guard + - provider_id: meta-reference + config: + llama_guard_shield: null + prompt_guard_shield: null + routing_key: code_scanner_guard + - provider_id: meta-reference + config: + llama_guard_shield: null + prompt_guard_shield: null + routing_key: injection_shield + - provider_id: meta-reference + config: + llama_guard_shield: null + prompt_guard_shield: null + routing_key: jailbreak_shield + memory: + - provider_id: meta-reference + config: {} + routing_key: vector