Make the "all-remote" distribution lightweight in dependencies and size

2025-06-27 18:50:41 +00:00 · 2024-09-24 14:18:57 -07:00 · 2024-09-24 14:18:57 -07:00 · bda974e660
commit bda974e660
parent 445536de64
4 changed files with 65 additions and 18 deletions
--- a/llama_stack/distribution/build.py
+++ b/llama_stack/distribution/build.py
@ -66,6 +66,14 @@ def build_image(build_config: BuildConfig, build_file_path: Path):
            if provider_spec.docker_image:
                raise ValueError("A stack's dependencies cannot have a docker image")

+    special_deps = []
+    deps = []
+    for package in package_deps.pip_packages:
+        if "--no-deps" in package or "--index-url" in package:
+            special_deps.append(package)
+        else:
+            deps.append(package)
+
    if build_config.image_type == ImageType.docker.value:
        script = pkg_resources.resource_filename(
            "llama_stack", "distribution/build_container.sh"
@ -75,7 +83,7 @@ def build_image(build_config: BuildConfig, build_file_path: Path):
            build_config.name,
            package_deps.docker_image,
            str(build_file_path),
-            " ".join(package_deps.pip_packages),
+            " ".join(deps),
        ]
    else:
        script = pkg_resources.resource_filename(
@ -84,14 +92,17 @@ def build_image(build_config: BuildConfig, build_file_path: Path):
        args = [
            script,
            build_config.name,
-            " ".join(package_deps.pip_packages),
+            " ".join(deps),
        ]

+    if special_deps:
+        args.append("#".join(special_deps))
+
    return_code = run_with_pty(args)
    if return_code != 0:
        cprint(
            f"Failed to build target {build_config.name} with return code {return_code}",
            color="red",
        )
-    
+
    return return_code
--- a/llama_stack/distribution/build_conda_env.sh
+++ b/llama_stack/distribution/build_conda_env.sh
@ -17,14 +17,16 @@ if [ -n "$LLAMA_MODELS_DIR" ]; then
  echo "Using llama-models-dir=$LLAMA_MODELS_DIR"
 fi

-set -euo pipefail
-
-if [ "$#" -ne 2 ]; then
-  echo "Usage: $0 <distribution_type> <build_name> <pip_dependencies>" >&2
+if [ "$#" -lt 2 ]; then
+  echo "Usage: $0 <distribution_type> <build_name> <pip_dependencies> [<special_pip_deps>]" >&2
  echo "Example: $0 <distribution_type> mybuild 'numpy pandas scipy'" >&2
  exit 1
 fi

+special_pip_deps="$3"
+
+set -euo pipefail
+
 build_name="$1"
 env_name="llamastack-$build_name"
 pip_dependencies="$2"
@ -43,6 +45,7 @@ source "$SCRIPT_DIR/common.sh"
 ensure_conda_env_python310() {
  local env_name="$1"
  local pip_dependencies="$2"
+  local special_pip_deps="$3"
  local python_version="3.10"

  # Check if conda command is available
@ -78,7 +81,12 @@ ensure_conda_env_python310() {
  if [ -n "$TEST_PYPI_VERSION" ]; then
    # these packages are damaged in test-pypi, so install them first
    $CONDA_PREFIX/bin/pip install fastapi libcst
-    $CONDA_PREFIX/bin/pip install --extra-index-url https://test.pypi.org/simple/ llama-models==$TEST_PYPI_VERSION llama-stack==$TEST_PYPI_VERSION $pip_dependencies
+    $CONDA_PREFIX/bin/pip install --extra-index-url https://test.pypi.org/simple/ \
+      llama-models==$TEST_PYPI_VERSION llama-stack==$TEST_PYPI_VERSION \
+      $pip_dependencies
+    if [ -n "$special_pip_deps" ]; then
+      $CONDA_PREFIX/bin/pip install --no-deps "$special_pip_deps"
+    fi
  else
    # Re-installing llama-stack in the new conda environment
    if [ -n "$LLAMA_STACK_DIR" ]; then
@ -105,11 +113,16 @@ ensure_conda_env_python310() {
    fi

    # Install pip dependencies
-    if [ -n "$pip_dependencies" ]; then
-      printf "Installing pip dependencies: $pip_dependencies\n"
-      $CONDA_PREFIX/bin/pip install $pip_dependencies
+    printf "Installing pip dependencies\n"
+    $CONDA_PREFIX/bin/pip install $pip_dependencies
+    if [ -n "$special_pip_deps" ]; then
+      IFS='#' read -ra parts <<< "$special_pip_deps"
+      for part in "${parts[@]}"; do
+        echo "$part"
+        $CONDA_PREFIX/bin/pip install $part
+      done
    fi
  fi
 }

-ensure_conda_env_python310 "$env_name" "$pip_dependencies"
+ensure_conda_env_python310 "$env_name" "$pip_dependencies" "$special_pip_deps"
--- a/llama_stack/distribution/build_container.sh
+++ b/llama_stack/distribution/build_container.sh
@ -4,12 +4,16 @@ LLAMA_MODELS_DIR=${LLAMA_MODELS_DIR:-}
 LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}
 TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}

-if [ "$#" -ne 4 ]; then
-  echo "Usage: $0 <build_name> <docker_base> <pip_dependencies>
-  echo "Example: $0 my-fastapi-app python:3.9-slim 'fastapi uvicorn'
+if [ "$#" -lt 4 ]; then
+  echo "Usage: $0 <build_name> <docker_base> <pip_dependencies> [<special_pip_deps>]" >&2
+  echo "Example: $0 my-fastapi-app python:3.9-slim 'fastapi uvicorn' " >&2
  exit 1
 fi

+special_pip_deps="$5"
+
+set -euo pipefail
+
 build_name="$1"
 image_name="llamastack-$build_name"
 docker_base=$2
@ -21,8 +25,6 @@ RED='\033[0;31m'
 GREEN='\033[0;32m'
 NC='\033[0m' # No Color

-set -euo pipefail
-
 SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
 REPO_DIR=$(dirname $(dirname "$SCRIPT_DIR"))
 DOCKER_BINARY=${DOCKER_BINARY:-docker}
@ -85,6 +87,13 @@ if [ -n "$pip_dependencies" ]; then
  add_to_docker "RUN pip install $pip_dependencies"
 fi

+if [ -n "$special_pip_deps" ]; then
+  IFS='#' read -ra parts <<< "$special_pip_deps"
+  for part in "${parts[@]}"; do
+    add_to_docker "RUN pip install $part"
+  done
+fi
+
 add_to_docker <<EOF

 # This would be good in production but for debugging flexibility lets not add it right now
--- a/llama_stack/providers/registry/memory.py
+++ b/llama_stack/providers/registry/memory.py
@ -8,11 +8,25 @@ from typing import List

 from llama_stack.distribution.datatypes import *  # noqa: F403

+
 EMBEDDING_DEPS = [
    "blobfile",
    "chardet",
    "pypdf",
-    "sentence-transformers",
+    "tqdm",
+    "numpy",
+    "scikit-learn",
+    "scipy",
+    "nltk",
+    "sentencepiece",
+    "transformers",
+    # this happens to work because special dependencies are always installed last
+    # so if there was a regular torch installed first, this would be ignored
+    # we need a better way to do this to identify potential conflicts, etc.
+    # for now, this lets us significantly reduce the size of the container which
+    # does not have any "local" inference code (and hence does not need GPU-enabled torch)
+    "torch --index-url https://download.pytorch.org/whl/cpu",
+    "sentence-transformers --no-deps",
 ]