From 66412b932bdffb48bdc95a293a1458369fc124e5 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Wed, 7 Aug 2024 13:58:13 -0700
Subject: [PATCH] Nuke fp8_requirements, fold fbgemm into common requirements

---
 README.md                                | 12 ----------
 fp8_requirements.txt                     | 28 ------------------------
 llama_toolchain/distribution/registry.py |  6 +++--
 requirements.txt                         |  1 +
 4 files changed, 5 insertions(+), 42 deletions(-)
 delete mode 100644 fp8_requirements.txt

diff --git a/README.md b/README.md
index 2054bf2e0..1cd0e58d4 100644
--- a/README.md
+++ b/README.md
@@ -28,15 +28,3 @@ pip install -e .
 ## The Llama CLI
 
 The `llama` CLI makes it easy to configure and run the Llama toolchain. Read the [CLI reference](docs/cli_reference.md) for details.
-
-## Appendix: Running FP8
-
-If you want to run FP8, you need the `fbgemm-gpu` package which requires `torch >= 2.4.0` (currently only in nightly, but releasing shortly...)
-
-```bash
-ENV=fp8_env
-conda create -n $ENV python=3.10
-conda activate $ENV
-
-pip3 install -r fp8_requirements.txt
-```
diff --git a/fp8_requirements.txt b/fp8_requirements.txt
deleted file mode 100644
index 8a58cff62..000000000
--- a/fp8_requirements.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-torch>=2.4.0
-accelerate
-black==24.4.2
-codeshield
-fairscale
-fastapi
-fire
-flake8
-huggingface-hub
-httpx
-json-strong-typing
-matplotlib
-omegaconf
-pandas
-Pillow
-pre-commit
-pydantic==1.10.13
-pydantic_core==2.18.2
-python-dotenv
-python-openapi
-requests
-tiktoken
-transformers
-ufmt==2.7.0
-usort==1.0.8
-uvicorn
-zmq
-fbgemm-gpu==0.8.0
diff --git a/llama_toolchain/distribution/registry.py b/llama_toolchain/distribution/registry.py
index a89bd9b7d..2aff35407 100644
--- a/llama_toolchain/distribution/registry.py
+++ b/llama_toolchain/distribution/registry.py
@@ -26,7 +26,6 @@ COMMON_DEPENDENCIES = [
     "huggingface-hub",
     "json-strong-typing",
     "llama-models",
-    "omegaconf",
     "pandas",
     "Pillow",
     "pydantic==1.10.13",
@@ -60,7 +59,10 @@ def available_distribution_specs() -> List[DistributionSpec]:
         DistributionSpec(
             spec_id="inline",
             description="Use code from `llama_toolchain` itself to serve all llama stack APIs",
-            additional_pip_packages=COMMON_DEPENDENCIES,
+            additional_pip_packages=COMMON_DEPENDENCIES
+            + [
+                "fbgemm-gpu==0.8.0",
+            ],
             provider_specs={
                 Api.inference: providers[Api.inference]["meta-reference"],
                 Api.safety: providers[Api.safety]["meta-reference"],
diff --git a/requirements.txt b/requirements.txt
index 726233036..cda7964c7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
 black==24.4.2
 fastapi
+fbgemm-gpu==0.8.0
 fire
 flake8
 httpx