Make the "all-remote" distribution lightweight in dependencies and size

2024-09-24 14:18:57 -07:00 · 2024-09-24 14:18:57 -07:00 · bda974e660
commit bda974e660
parent 445536de64
4 changed files with 65 additions and 18 deletions
--- a/llama_stack/distribution/build.py
+++ b/llama_stack/distribution/build.py
@ -66,6 +66,14 @@ def build_image(build_config: BuildConfig, build_file_path: Path):
            if provider_spec.docker_image:
                raise ValueError("A stack's dependencies cannot have a docker image")
    special_deps = []
    deps = []
    for package in package_deps.pip_packages:
        if "--no-deps" in package or "--index-url" in package:
            special_deps.append(package)
        else:
            deps.append(package)
    if build_config.image_type == ImageType.docker.value:
        script = pkg_resources.resource_filename(
            "llama_stack", "distribution/build_container.sh"
@ -75,7 +83,7 @@ def build_image(build_config: BuildConfig, build_file_path: Path):
            build_config.name,
            package_deps.docker_image,
            str(build_file_path),
-            " ".join(package_deps.pip_packages),
+            " ".join(deps),
        ]
    else:
        script = pkg_resources.resource_filename(
@ -84,14 +92,17 @@ def build_image(build_config: BuildConfig, build_file_path: Path):
        args = [
            script,
            build_config.name,
-            " ".join(package_deps.pip_packages),
+            " ".join(deps),
        ]
    if special_deps:
        args.append("#".join(special_deps))
    return_code = run_with_pty(args)
    if return_code != 0:
        cprint(
            f"Failed to build target {build_config.name} with return code {return_code}",
            color="red",
        )
-    
+
    return return_code
--- a/llama_stack/distribution/build_conda_env.sh
+++ b/llama_stack/distribution/build_conda_env.sh
@ -17,14 +17,16 @@ if [ -n "$LLAMA_MODELS_DIR" ]; then
  echo "Using llama-models-dir=$LLAMA_MODELS_DIR"
 fi
-set -euo pipefail
+if [ "$#" -lt 2 ]; then
-
+  echo "Usage: $0 <distribution_type> <build_name> <pip_dependencies> [<special_pip_deps>]" >&2
 if [ "$#" -ne 2 ]; then
  echo "Usage: $0 <distribution_type> <build_name> <pip_dependencies>" >&2
  echo "Example: $0 <distribution_type> mybuild 'numpy pandas scipy'" >&2
  exit 1
 fi
 special_pip_deps="$3"
 set -euo pipefail
 build_name="$1"
 env_name="llamastack-$build_name"
 pip_dependencies="$2"
@ -43,6 +45,7 @@ source "$SCRIPT_DIR/common.sh"
 ensure_conda_env_python310() {
  local env_name="$1"
  local pip_dependencies="$2"
  local special_pip_deps="$3"
  local python_version="3.10"
  # Check if conda command is available
@ -78,7 +81,12 @@ ensure_conda_env_python310() {
  if [ -n "$TEST_PYPI_VERSION" ]; then
    # these packages are damaged in test-pypi, so install them first
    $CONDA_PREFIX/bin/pip install fastapi libcst
-    $CONDA_PREFIX/bin/pip install --extra-index-url https://test.pypi.org/simple/ llama-models==$TEST_PYPI_VERSION llama-stack==$TEST_PYPI_VERSION $pip_dependencies
+    $CONDA_PREFIX/bin/pip install --extra-index-url https://test.pypi.org/simple/ \
      llama-models==$TEST_PYPI_VERSION llama-stack==$TEST_PYPI_VERSION \
      $pip_dependencies
    if [ -n "$special_pip_deps" ]; then
      $CONDA_PREFIX/bin/pip install --no-deps "$special_pip_deps"
    fi
  else
    # Re-installing llama-stack in the new conda environment
    if [ -n "$LLAMA_STACK_DIR" ]; then
@ -105,11 +113,16 @@ ensure_conda_env_python310() {
    fi
    # Install pip dependencies
-    if [ -n "$pip_dependencies" ]; then
+    printf "Installing pip dependencies\n"
-      printf "Installing pip dependencies: $pip_dependencies\n"
+    $CONDA_PREFIX/bin/pip install $pip_dependencies
-      $CONDA_PREFIX/bin/pip install $pip_dependencies
+    if [ -n "$special_pip_deps" ]; then
      IFS='#' read -ra parts <<< "$special_pip_deps"
      for part in "${parts[@]}"; do
        echo "$part"
        $CONDA_PREFIX/bin/pip install $part
      done
    fi
  fi
 }
-ensure_conda_env_python310 "$env_name" "$pip_dependencies"
+ensure_conda_env_python310 "$env_name" "$pip_dependencies" "$special_pip_deps"
--- a/llama_stack/distribution/build_container.sh
+++ b/llama_stack/distribution/build_container.sh
@ -4,12 +4,16 @@ LLAMA_MODELS_DIR=${LLAMA_MODELS_DIR:-}
 LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}
 TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}
-if [ "$#" -ne 4 ]; then
+if [ "$#" -lt 4 ]; then
-  echo "Usage: $0 <build_name> <docker_base> <pip_dependencies>
+  echo "Usage: $0 <build_name> <docker_base> <pip_dependencies> [<special_pip_deps>]" >&2
-  echo "Example: $0 my-fastapi-app python:3.9-slim 'fastapi uvicorn'
+  echo "Example: $0 my-fastapi-app python:3.9-slim 'fastapi uvicorn' " >&2
  exit 1
 fi
 special_pip_deps="$5"
 set -euo pipefail
 build_name="$1"
 image_name="llamastack-$build_name"
 docker_base=$2
@ -21,8 +25,6 @@ RED='\033[0;31m'
 GREEN='\033[0;32m'
 NC='\033[0m' # No Color
 set -euo pipefail
 SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
 REPO_DIR=$(dirname $(dirname "$SCRIPT_DIR"))
 DOCKER_BINARY=${DOCKER_BINARY:-docker}
@ -85,6 +87,13 @@ if [ -n "$pip_dependencies" ]; then
  add_to_docker "RUN pip install $pip_dependencies"
 fi
 if [ -n "$special_pip_deps" ]; then
  IFS='#' read -ra parts <<< "$special_pip_deps"
  for part in "${parts[@]}"; do
    add_to_docker "RUN pip install $part"
  done
 fi
 add_to_docker <<EOF
 # This would be good in production but for debugging flexibility lets not add it right now
--- a/llama_stack/providers/registry/memory.py
+++ b/llama_stack/providers/registry/memory.py
@ -8,11 +8,25 @@ from typing import List
 from llama_stack.distribution.datatypes import *  # noqa: F403
 EMBEDDING_DEPS = [
    "blobfile",
    "chardet",
    "pypdf",
-    "sentence-transformers",
+    "tqdm",
    "numpy",
    "scikit-learn",
    "scipy",
    "nltk",
    "sentencepiece",
    "transformers",
    # this happens to work because special dependencies are always installed last
    # so if there was a regular torch installed first, this would be ignored
    # we need a better way to do this to identify potential conflicts, etc.
    # for now, this lets us significantly reduce the size of the container which
    # does not have any "local" inference code (and hence does not need GPU-enabled torch)
    "torch --index-url https://download.pytorch.org/whl/cpu",
    "sentence-transformers --no-deps",
 ]