prototype: use pyproject and uv to build distribution

Goals: * remove the need of a custom tool to install a collection of python packages AKA `llama stack build` * use the power of 'uv', which was designed to manage dependencies * `llama stack build` can "probably" go away and be replaced with uv Howto, with the pyproject, you can install an Ollama distribution in a virtual env like so: ``` uv venv --python 3.10 ollama-distro source ollama-distro/bin/activate uv sync --extra ollama llama stack run llama_stack/templates/ollama/run.yaml ``` Caveats: * external provider, we could still use a build file or add the known external providers to the pyproject? * growth of the uv.lock? We create a requirements.txt for convenience as some users are most familiar with this format than looking at pyproject. Signed-off-by: Sébastien Han <seb@redhat.com>
2025-07-07 14:26:44 +00:00 · 2025-05-27 20:31:57 +02:00 · 2025-05-27 20:31:57 +02:00 · b6ebbe1bc0
commit b6ebbe1bc0
parent 6832e8a658
13 changed files with 5579 additions and 679 deletions
--- a/.github/actions/setup-runner/action.yml
+++ b/.github/actions/setup-runner/action.yml
@ -5,6 +5,10 @@ inputs:
    description: The Python version to use
    required: false
    default: "3.11"
  install-ollama:
    description: Install ollama
    required: false
    default: true
 runs:
  using: "composite"
  steps:
@ -17,11 +21,13 @@ runs:
    - name: Install dependencies
      shell: bash
      env:
        INSTALL_OLLAMA: ${{ inputs.install-ollama }}
      if: ${{ env.INSTALL_OLLAMA == 'true' }}
      run: |
-        uv sync --all-groups
+        uv sync --all-groups --extra ollama
-        uv pip install ollama faiss-cpu
+
        # always test against the latest version of the client
        # TODO: this is not necessarily a good idea. we need to test against both published and latest
        # to find out backwards compatibility issues.
        uv pip install git+https://github.com/meta-llama/llama-stack-client-python.git@main
        uv pip install -e .
--- a/.github/workflows/integration-auth-tests.yml
+++ b/.github/workflows/integration-auth-tests.yml
@ -33,9 +33,6 @@ jobs:
      - name: Install dependencies
        uses: ./.github/actions/setup-runner
      - name: Build Llama Stack
        run: |
          llama stack build --template ollama --image-type venv
      - name: Install minikube
        if: ${{ matrix.auth-provider == 'kubernetes' }}
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@ -41,16 +41,12 @@ jobs:
      - name: Setup ollama
        uses: ./.github/actions/setup-ollama
      - name: Build Llama Stack
        run: |
          uv run llama stack build --template ollama --image-type venv
      - name: Start Llama Stack server in background
        if: matrix.client-type == 'http'
        env:
          INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
        run: |
-          LLAMA_STACK_LOG_FILE=server.log nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv --env OLLAMA_URL="http://0.0.0.0:11434" &
+          LLAMA_STACK_LOG_FILE=server.log nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --env OLLAMA_URL="http://0.0.0.0:11434"  &
      - name: Wait for Llama Stack server to be ready
        if: matrix.client-type == 'http'
--- a/.github/workflows/providers-build.yml
+++ b/.github/workflows/providers-build.yml
@ -36,7 +36,7 @@ jobs:
      - name: Generate Template List
        id: set-matrix
        run: |
-          templates=$(ls llama_stack/templates/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
+          templates=$(ls llama_stack/templates/*/*build.yaml | grep -v "experimental-post-training" | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
          echo "templates=$templates" >> "$GITHUB_OUTPUT"
  build:
@ -54,16 +54,42 @@ jobs:
      - name: Install dependencies
        uses: ./.github/actions/setup-runner
        with:
          install-ollama: false
-      - name: Print build dependencies
+      - name: Print dependencies in the image
        if: matrix.image-type == 'venv'
        run: |
-          uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test --print-deps-only
+          uv pip list
-      - name: Run Llama Stack Build
+      - name: Run Llama Stack Build - VENV
        if: matrix.image-type == 'venv'
        run: |
-          # USE_COPY_NOT_MOUNT is set to true since mounting is not supported by docker buildx, we use COPY instead
+          uv sync --no-default-groups --extra ${{ matrix.template }}
-          # LLAMA_STACK_DIR is set to the current directory so we are building from the source
+
-          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test
+      # TODO
      - name: Run Llama Stack Build - CONTAINER
        if: matrix.image-type == 'container'
        run: |
          # TODO: use llama_stack/templates/Containerfile when we have a new release!
          cat << 'EOF' > Containerfile
          FROM registry.access.redhat.com/ubi9
          WORKDIR /app
          ARG TEMPLATE
          RUN dnf -y update \
              && dnf install -y python3.11 python3.11-pip python3.11-wheel python3.11-setuptools python3.11-devel gcc make \
              && ln -s /bin/pip3.11 /bin/pip \
              && ln -s /bin/python3.11 /bin/python \
              && dnf clean all
          RUN mkdir -p /.llama/providers.d /.cache
          COPY . /app/llama-stack
          RUN cd llama-stack && pip install --no-cache .[${TEMPLATE}]
          RUN chmod -R g+rw /app /.llama /.cache
          ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--config", "/app/llama-stack/templates/${TEMPLATE}/run.yaml"]
          EOF
          docker build --build-arg TEMPLATE=${{ matrix.template }} -f Containerfile -t ${{ matrix.template }} .
      - name: Print dependencies in the image
        if: matrix.image-type == 'venv'
--- a/.github/workflows/update-readthedocs.yml
+++ b/.github/workflows/update-readthedocs.yml
@ -43,7 +43,7 @@ jobs:
      - name: Build HTML
        run: |
          cd docs
-          uv run make html
+          uv run --group docs make html
      - name: Trigger ReadTheDocs build
        if: github.event_name != 'pull_request'
--- a/llama_stack/providers/registry/inference.py
+++ b/llama_stack/providers/registry/inference.py
@ -40,7 +40,7 @@ def available_providers() -> list[ProviderSpec]:
            api=Api.inference,
            provider_type="inline::vllm",
            pip_packages=[
-                "vllm",
+                "vllm; sys_platform == 'linux'",
            ],
            module="llama_stack.providers.inline.inference.vllm",
            config_class="llama_stack.providers.inline.inference.vllm.VLLMConfig",
@ -49,8 +49,9 @@ def available_providers() -> list[ProviderSpec]:
            api=Api.inference,
            provider_type="inline::sentence-transformers",
            pip_packages=[
-                "torch torchvision --index-url https://download.pytorch.org/whl/cpu",
+                "torch",
-                "sentence-transformers --no-deps",
+                "torchvision",
                "sentence-transformers",
            ],
            module="llama_stack.providers.inline.inference.sentence_transformers",
            config_class="llama_stack.providers.inline.inference.sentence_transformers.config.SentenceTransformersInferenceConfig",
--- a/llama_stack/templates/Containerfile
+++ b/llama_stack/templates/Containerfile
@ -0,0 +1,15 @@
 # Usage:
 # podman build --build-arg TEMPLATE={TEMPLATE_NAME} -f llama_stack/templates/Containerfile -t TEMPLATE_NAME .
 FROM registry.access.redhat.com/ubi9
 WORKDIR /app
 ARG TEMPLATE
 RUN dnf -y update \
    && dnf install -y python3.11 python3.11-pip python3.11-wheel python3.11-setuptools python3.11-devel gcc make \
    && ln -s /bin/pip3.11 /bin/pip \
    && ln -s /bin/python3.11 /bin/python \
    && dnf clean all
 RUN mkdir -p /.llama/providers.d /.cache
 RUN pip install --no-cache llama-stack[${TEMPLATE}]
 RUN chmod -R g+rw /app /.llama /.cache
 ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--config", "/app/llama-stack/templates/${TEMPLATE}/run.yaml"]
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,5 +1,5 @@
 [build-system]
-requires = ["setuptools>=61.0"]
+requires = ["setuptools>=80.0"]
 build-backend = "setuptools.build_meta"
 [project]
@ -53,6 +53,866 @@ ui = [
    "streamlit-option-menu",
 ]
 #################
 # DISTRIBUTIONS #
 #################
 bedrock = [
    "aiosqlite",
    "autoevals",
    "boto3",
    "chardet",
    "chromadb-client",
    "datasets",
    "emoji",
    "faiss-cpu",
    "fastapi",
    "fire",
    "httpx",
    "langdetect",
    "matplotlib",
    "mcp",
    "nltk",
    "numpy",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pymongo",
    "pypdf",
    "pythainlp",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentencepiece",
    "sqlalchemy[asyncio]",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
 ]
 cerebras = [
    "aiosqlite",
    "autoevals",
    "cerebras_cloud_sdk",
    "chardet",
    "chromadb-client",
    "datasets",
    "emoji",
    "faiss-cpu",
    "fastapi",
    "fire",
    "httpx",
    "langdetect",
    "matplotlib",
    "nltk",
    "numpy",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pymongo",
    "pypdf",
    "pythainlp",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentence-transformers",
    "sentencepiece",
    "sqlalchemy[asyncio]",
    "torch",
    "torchvision",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
 ]
 ci-tests = [
    "aiosqlite",
    "autoevals",
    "chardet",
    "chromadb-client",
    "datasets",
    "emoji",
    "fastapi",
    "fire",
    "fireworks-ai",
    "httpx",
    "langdetect",
    "matplotlib",
    "mcp",
    "nltk",
    "numpy",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pymongo",
    "pypdf",
    "pythainlp",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentence-transformers",
    "sentencepiece",
    "sqlalchemy[asyncio]",
    "sqlite-vec",
    "torch",
    "torchvision",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
 ]
 dell = [
    "aiohttp",
    "aiosqlite",
    "autoevals",
    "chardet",
    "chromadb-client",
    "datasets",
    "emoji",
    "faiss-cpu",
    "fastapi",
    "fire",
    "httpx",
    "huggingface_hub",
    "langdetect",
    "matplotlib",
    "nltk",
    "numpy",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pymongo",
    "pypdf",
    "pythainlp",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentence-transformers",
    "sentencepiece",
    "sqlalchemy[asyncio]",
    "torch",
    "torchvision",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
 ]
 fireworks = [
    "aiosqlite",
    "asyncpg",
    "autoevals",
    "chardet",
    "chromadb-client",
    "datasets",
    "emoji",
    "faiss-cpu",
    "fastapi",
    "fire",
    "fireworks-ai",
    "httpx",
    "langdetect",
    "matplotlib",
    "mcp",
    "nltk",
    "numpy",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pymongo",
    "pypdf",
    "pythainlp",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentence-transformers",
    "sentencepiece",
    "sqlalchemy[asyncio]",
    "torch",
    "torchvision",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
 ]
 groq = [
    "aiosqlite",
    "autoevals",
    "chardet",
    "datasets",
    "emoji",
    "faiss-cpu",
    "fastapi",
    "fire",
    "httpx",
    "langdetect",
    "litellm",
    "matplotlib",
    "nltk",
    "numpy",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pymongo",
    "pypdf",
    "pythainlp",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentencepiece",
    "sqlalchemy[asyncio]",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
 ]
 hf-endpoint = [
    "aiohttp",
    "aiosqlite",
    "autoevals",
    "chardet",
    "chromadb-client",
    "datasets",
    "emoji",
    "faiss-cpu",
    "fastapi",
    "fire",
    "httpx",
    "huggingface_hub",
    "langdetect",
    "matplotlib",
    "mcp",
    "nltk",
    "numpy",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pymongo",
    "pypdf",
    "pythainlp",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentencepiece",
    "sqlalchemy[asyncio]",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
 ]
 hf-serverless = [
    "aiohttp",
    "aiosqlite",
    "autoevals",
    "chardet",
    "chromadb-client",
    "datasets",
    "emoji",
    "faiss-cpu",
    "fastapi",
    "fire",
    "httpx",
    "huggingface_hub",
    "langdetect",
    "matplotlib",
    "mcp",
    "nltk",
    "numpy",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pymongo",
    "pypdf",
    "pythainlp",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentence-transformers",
    "sentencepiece",
    "sqlalchemy[asyncio]",
    "torch",
    "torchvision",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
 ]
 llama_api = [
    "aiosqlite",
    "autoevals",
    "chardet",
    "chromadb-client",
    "datasets",
    "emoji",
    "fastapi",
    "fire",
    "httpx",
    "langdetect",
    "litellm",
    "matplotlib",
    "mcp",
    "nltk",
    "numpy",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pymongo",
    "pypdf",
    "pythainlp",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentence-transformers",
    "sentencepiece",
    "sqlalchemy[asyncio]",
    "sqlite-vec",
    "torch",
    "torchvision",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
 ]
 meta-reference-gpu = [
    "accelerate",
    "aiosqlite",
    "autoevals",
    "chardet",
    "chromadb-client",
    "datasets",
    "emoji",
    "fairscale",
    "faiss-cpu",
    "fastapi",
    "fbgemm-gpu-genai==1.1.2",
    "fire",
    "httpx",
    "langdetect",
    "lm-format-enforcer",
    "matplotlib",
    "mcp",
    "nltk",
    "numpy",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pymongo",
    "pypdf",
    "pythainlp",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentence-transformers",
    "sentencepiece",
    "sqlalchemy[asyncio]",
    "torch",
    "torchao==0.8.0",
    "torchvision",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
    "zmq",
 ]
 nvidia = [
    "aiohttp",
    "aiosqlite",
    "chardet",
    "datasets",
    "faiss-cpu",
    "fastapi",
    "fire",
    "httpx",
    "matplotlib",
    "nltk",
    "numpy",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pymongo",
    "pypdf",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentencepiece",
    "sqlalchemy[asyncio]",
    "tqdm",
    "transformers",
    "uvicorn",
 ]
 ollama = [
    "aiohttp",
    "aiosqlite",
    "autoevals",
    "chardet",
    "chromadb-client",
    "datasets",
    "emoji",
    "faiss-cpu",
    "fastapi",
    "fire",
    "httpx",
    "langdetect",
    "matplotlib",
    "mcp",
    "nltk",
    "numpy",
    "ollama",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "peft",
    "pillow",
    "psycopg2-binary",
    "pymongo",
    "pypdf",
    "pythainlp",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentencepiece",
    "sqlalchemy[asyncio]",
    "torch",
    "tqdm",
    "transformers",
    "tree_sitter",
    "trl",
    "uvicorn",
 ]
 open-benchmark = [
    "aiosqlite",
    "autoevals",
    "chardet",
    "chromadb-client",
    "datasets",
    "emoji",
    "fastapi",
    "fire",
    "httpx",
    "langdetect",
    "litellm",
    "matplotlib",
    "mcp",
    "nltk",
    "numpy",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pymongo",
    "pypdf",
    "pythainlp",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentencepiece",
    "sqlalchemy[asyncio]",
    "sqlite-vec",
    "together",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
 ]
 passthrough = [
    "aiosqlite",
    "autoevals",
    "chardet",
    "chromadb-client",
    "datasets",
    "emoji",
    "faiss-cpu",
    "fastapi",
    "fire",
    "httpx",
    "langdetect",
    "matplotlib",
    "mcp",
    "nltk",
    "numpy",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pymongo",
    "pypdf",
    "pythainlp",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentence-transformers",
    "sentencepiece",
    "sqlalchemy[asyncio]",
    "torch",
    "torchvision",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
 ]
 postgres-demo = [
    "aiosqlite",
    "asyncpg",
    "chardet",
    "chromadb-client",
    "fastapi",
    "fire",
    "httpx",
    "matplotlib",
    "mcp",
    "nltk",
    "numpy",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pymongo",
    "pypdf",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentence-transformers",
    "sentencepiece",
    "sqlalchemy[asyncio]",
    "torch",
    "torchvision",
    "tqdm",
    "transformers",
    "uvicorn",
 ]
 remote-vllm = [
    "aiosqlite",
    "autoevals",
    "chardet",
    "chromadb-client",
    "datasets",
    "emoji",
    "faiss-cpu",
    "fastapi",
    "fire",
    "httpx",
    "langdetect",
    "matplotlib",
    "mcp",
    "nltk",
    "numpy",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pymongo",
    "pypdf",
    "pythainlp",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentence-transformers",
    "sentencepiece",
    "sqlalchemy[asyncio]",
    "torch",
    "torchvision",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
 ]
 sambanova = [
    "aiosqlite",
    "chardet",
    "chromadb-client",
    "faiss-cpu",
    "fastapi",
    "fire",
    "httpx",
    "litellm",
    "matplotlib",
    "mcp",
    "nltk",
    "numpy",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pymongo",
    "pypdf",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentence-transformers",
    "sentencepiece",
    "sqlalchemy[asyncio]",
    "torch",
    "torchvision",
    "tqdm",
    "transformers",
    "uvicorn",
 ]
 starter = [
    "aiohttp",
    "aiosqlite",
    "asyncpg",
    "autoevals",
    "chardet",
    "chromadb-client",
    "datasets",
    "emoji",
    "fastapi",
    "fire",
    "fireworks-ai",
    "httpx",
    "langdetect",
    "litellm",
    "matplotlib",
    "mcp",
    "nltk",
    "numpy",
    "ollama",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pymongo",
    "pypdf",
    "pythainlp",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentence-transformers",
    "sentencepiece",
    "sqlalchemy[asyncio]",
    "sqlite-vec",
    "together",
    "torch",
    "torchvision",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
 ]
 tgi = [
    "aiohttp",
    "aiosqlite",
    "autoevals",
    "chardet",
    "chromadb-client",
    "datasets",
    "emoji",
    "faiss-cpu",
    "fastapi",
    "fire",
    "httpx",
    "huggingface_hub",
    "langdetect",
    "matplotlib",
    "mcp",
    "nltk",
    "numpy",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pymongo",
    "pypdf",
    "pythainlp",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentence-transformers",
    "sentencepiece",
    "sqlalchemy[asyncio]",
    "torch",
    "torchvision",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
 ]
 together = [
    "aiosqlite",
    "autoevals",
    "chardet",
    "chromadb-client",
    "datasets",
    "emoji",
    "faiss-cpu",
    "fastapi",
    "fire",
    "httpx",
    "langdetect",
    "matplotlib",
    "mcp",
    "nltk",
    "numpy",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pymongo",
    "pypdf",
    "pythainlp",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentence-transformers",
    "sentencepiece",
    "sqlalchemy[asyncio]",
    "together",
    "torch",
    "torchvision",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
 ]
 vllm-gpu = [
    "aiosqlite",
    "autoevals",
    "chardet",
    "chromadb-client",
    "datasets",
    "emoji",
    "faiss-cpu",
    "fastapi",
    "fire",
    "httpx",
    "langdetect",
    "matplotlib",
    "mcp",
    "nltk",
    "numpy",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pymongo",
    "pypdf",
    "pythainlp",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentence-transformers",
    "sentencepiece",
    "sqlalchemy[asyncio]",
    "torch",
    "torchvision",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
    "vllm; sys_platform == 'linux'",
 ]
 watsonx = [
    "aiosqlite",
    "autoevals",
    "chardet",
    "datasets",
    "emoji",
    "faiss-cpu",
    "fastapi",
    "fire",
    "httpx",
    "ibm_watson_machine_learning",
    "langdetect",
    "matplotlib",
    "mcp",
    "nltk",
    "numpy",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pymongo",
    "pypdf",
    "pythainlp",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentence-transformers",
    "sentencepiece",
    "sqlalchemy[asyncio]",
    "torch",
    "torchvision",
    "tqdm",
    "transformers",
    "tree_sitter",
    "uvicorn",
 ]
 [dependency-groups]
 dev = [
    "pytest",
@ -123,7 +983,7 @@ docs = [
    "linkify",
    "sphinxcontrib.openapi",
 ]
-codegen = ["rich", "pydantic", "jinja2>=3.1.6"]
+codegen = ["rich", "pydantic", "jinja2>=3.1.6", "tomlkit"]
 [project.urls]
 Homepage = "https://github.com/meta-llama/llama-stack"
@ -145,6 +1005,11 @@ explicit = true
 torch = [{ index = "pytorch-cpu" }]
 torchvision = [{ index = "pytorch-cpu" }]
 [[tool.uv.dependency-metadata]]
 name = "sentence-transformers"
 requires-dist = [
 ] # This instructs UV to not install any dependencies for this package (torch is installed by default)
 [tool.ruff]
 line-length = 120
 exclude = [
--- a/requirements-ollama.txt
+++ b/requirements-ollama.txt
@ -0,0 +1,159 @@
 # This file was autogenerated by uv via the following command:
 #    uv export --frozen --no-hashes --no-emit-project --output-file=requirements-ollama.txt --no-annotate --no-default-groups --extra ollama
 accelerate==1.7.0
 aiohappyeyeballs==2.5.0
 aiohttp==3.11.13
 aiosignal==1.3.2
 aiosqlite==0.21.0
 annotated-types==0.7.0
 anyio==4.8.0
 async-timeout==5.0.1 ; python_full_version < '3.11.3'
 attrs==25.1.0
 autoevals==0.0.122
 backoff==2.2.1
 braintrust-core==0.0.58
 certifi==2025.1.31
 chardet==5.2.0
 charset-normalizer==3.4.1
 chevron==0.14.0
 chromadb-client==1.0.12
 click==8.1.8
 colorama==0.4.6 ; sys_platform == 'win32'
 contourpy==1.3.2
 cycler==0.12.1
 datasets==3.3.2
 deprecated==1.2.18
 dill==0.3.8
 distro==1.9.0
 dnspython==2.7.0
 ecdsa==0.19.1
 emoji==2.14.1
 exceptiongroup==1.2.2 ; python_full_version < '3.11'
 faiss-cpu==1.11.0
 fastapi==0.115.8
 filelock==3.17.0
 fire==0.7.0
 fonttools==4.58.1
 frozenlist==1.5.0
 fsspec==2024.12.0
 googleapis-common-protos==1.67.0
 greenlet==3.2.2
 grpcio==1.71.0
 h11==0.16.0
 hf-xet==1.1.2 ; (platform_machine == 'aarch64' and sys_platform != 'darwin') or (platform_machine == 'amd64' and sys_platform != 'darwin') or (platform_machine == 'arm64' and sys_platform != 'darwin') or (platform_machine == 'x86_64' and sys_platform != 'darwin')
 httpcore==1.0.9
 httpx==0.28.1
 httpx-sse==0.4.0
 huggingface-hub==0.29.0 ; sys_platform == 'darwin'
 huggingface-hub==0.32.3 ; sys_platform != 'darwin'
 idna==3.10
 importlib-metadata==8.0.0 ; sys_platform != 'darwin'
 importlib-metadata==8.5.0 ; sys_platform == 'darwin'
 jinja2==3.1.6
 jiter==0.8.2
 joblib==1.5.1
 jsonschema==4.23.0
 jsonschema-specifications==2024.10.1
 kiwisolver==1.4.8
 langdetect==1.0.9
 levenshtein==0.27.1
 llama-stack-client==0.2.9
 markdown-it-py==3.0.0
 markupsafe==3.0.2
 matplotlib==3.10.3
 mcp==1.3.0
 mdurl==0.1.2
 mpmath==1.3.0
 multidict==6.1.0
 multiprocess==0.70.16
 networkx==3.4.2
 nltk==3.9.1
 numpy==1.26.4
 ollama==0.5.1
 openai==1.71.0
 opentelemetry-api==1.26.0 ; sys_platform != 'darwin'
 opentelemetry-api==1.30.0 ; sys_platform == 'darwin'
 opentelemetry-exporter-otlp-proto-common==1.26.0 ; sys_platform != 'darwin'
 opentelemetry-exporter-otlp-proto-common==1.30.0 ; sys_platform == 'darwin'
 opentelemetry-exporter-otlp-proto-grpc==1.26.0 ; sys_platform != 'darwin'
 opentelemetry-exporter-otlp-proto-grpc==1.30.0 ; sys_platform == 'darwin'
 opentelemetry-exporter-otlp-proto-http==1.26.0 ; sys_platform != 'darwin'
 opentelemetry-exporter-otlp-proto-http==1.30.0 ; sys_platform == 'darwin'
 opentelemetry-proto==1.26.0 ; sys_platform != 'darwin'
 opentelemetry-proto==1.30.0 ; sys_platform == 'darwin'
 opentelemetry-sdk==1.26.0 ; sys_platform != 'darwin'
 opentelemetry-sdk==1.30.0 ; sys_platform == 'darwin'
 opentelemetry-semantic-conventions==0.47b0 ; sys_platform != 'darwin'
 opentelemetry-semantic-conventions==0.51b0 ; sys_platform == 'darwin'
 orjson==3.10.18
 overrides==7.7.0
 packaging==24.2
 pandas==2.1.4
 peft==0.15.2
 pillow==11.1.0
 posthog==4.2.0
 prompt-toolkit==3.0.50
 propcache==0.3.0
 protobuf==4.25.8 ; sys_platform != 'darwin'
 protobuf==5.29.3 ; sys_platform == 'darwin'
 psutil==7.0.0
 psycopg2-binary==2.9.10
 pyaml==25.1.0
 pyarrow==19.0.1
 pyasn1==0.4.8
 pydantic==2.10.6
 pydantic-core==2.27.2
 pydantic-settings==2.8.1
 pygments==2.19.1
 pymongo==4.13.0
 pyparsing==3.2.3
 pypdf==5.3.1
 pythainlp==5.1.2
 python-dateutil==2.9.0.post0
 python-dotenv==1.0.1
 python-jose==3.4.0
 python-multipart==0.0.20
 pytz==2025.1
 pyyaml==6.0.2
 rapidfuzz==3.12.2
 redis==6.2.0
 referencing==0.36.2
 regex==2024.11.6
 requests==2.32.2 ; (python_full_version < '3.11' and sys_platform == 'darwin') or (python_full_version >= '3.11' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')
 requests==2.32.3 ; (python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.11' and sys_platform == 'darwin')
 rich==13.9.4
 rpds-py==0.22.3
 rsa==4.9
 safetensors==0.5.3
 scikit-learn==1.6.1
 scipy==1.15.3
 sentencepiece==0.2.0
 setuptools==80.8.0
 six==1.17.0
 sniffio==1.3.1
 sqlalchemy==2.0.41
 sse-starlette==2.2.1
 starlette==0.45.3
 sympy==1.13.1
 tenacity==9.1.2
 termcolor==2.5.0
 threadpoolctl==3.6.0
 tiktoken==0.9.0
 tokenizers==0.21.1
 torch==2.6.0 ; sys_platform == 'darwin'
 torch==2.6.0+cpu ; sys_platform != 'darwin'
 tqdm==4.67.1
 transformers==4.50.3 ; sys_platform == 'darwin'
 transformers==4.52.4 ; sys_platform != 'darwin'
 tree-sitter==0.24.0
 trl==0.18.1
 typing-extensions==4.12.2
 tzdata==2025.1
 urllib3==2.1.0 ; (python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.11' and sys_platform == 'darwin')
 urllib3==2.3.0 ; (python_full_version < '3.11' and sys_platform == 'darwin') or (python_full_version >= '3.11' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')
 uvicorn==0.34.0
 wcwidth==0.2.13
 wrapt==1.17.2
 xxhash==3.5.0
 yarl==1.18.3
 zipp==3.21.0
--- a/requirements.txt
+++ b/requirements.txt
@ -14,8 +14,6 @@ anyio==4.8.0
    #   llama-stack-client
    #   openai
    #   starlette
 async-timeout==5.0.1 ; python_full_version < '3.11'
    # via aiohttp
 attrs==25.1.0
    # via
    #   aiohttp
@ -40,8 +38,6 @@ distro==1.9.0
    #   openai
 ecdsa==0.19.1
    # via python-jose
 exceptiongroup==1.2.2 ; python_full_version < '3.11'
    # via anyio
 fastapi==0.115.8
    # via llama-stack
 filelock==3.17.0
@ -58,6 +54,8 @@ h11==0.16.0
    # via
    #   httpcore
    #   llama-stack
 hf-xet==1.1.4 ; (platform_machine == 'aarch64' and sys_platform != 'darwin') or (platform_machine == 'amd64' and sys_platform != 'darwin') or (platform_machine == 'arm64' and sys_platform != 'darwin') or (platform_machine == 'x86_64' and sys_platform != 'darwin')
    # via huggingface-hub
 httpcore==1.0.9
    # via httpx
 httpx==0.28.1
@ -65,7 +63,9 @@ httpx==0.28.1
    #   llama-stack
    #   llama-stack-client
    #   openai
-huggingface-hub==0.29.0
+huggingface-hub==0.29.0 ; sys_platform == 'darwin'
    # via llama-stack
 huggingface-hub==0.33.0 ; sys_platform != 'darwin'
    # via llama-stack
 idna==3.10
    # via
@ -99,7 +99,7 @@ openai==1.71.0
    # via llama-stack
 packaging==24.2
    # via huggingface-hub
-pandas==2.2.3
+pandas==2.1.1
    # via llama-stack-client
 pillow==11.1.0
    # via llama-stack
@ -147,7 +147,12 @@ referencing==0.36.2
    #   jsonschema-specifications
 regex==2024.11.6
    # via tiktoken
-requests==2.32.4
+requests==2.32.2 ; (python_full_version < '3.12' and sys_platform == 'darwin') or (python_full_version >= '3.12' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')
    # via
    #   huggingface-hub
    #   llama-stack
    #   tiktoken
 requests==2.32.4 ; (python_full_version < '3.12' and platform_machine == 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'darwin')
    # via
    #   huggingface-hub
    #   llama-stack
@ -195,15 +200,15 @@ typing-extensions==4.12.2
    #   fastapi
    #   huggingface-hub
    #   llama-stack-client
    #   multidict
    #   openai
    #   pydantic
    #   pydantic-core
    #   referencing
    #   rich
 tzdata==2025.1
    # via pandas
-urllib3==2.3.0
+urllib3==2.1.0 ; (python_full_version < '3.12' and platform_machine == 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'darwin')
    # via requests
 urllib3==2.3.0 ; (python_full_version < '3.12' and sys_platform == 'darwin') or (python_full_version >= '3.12' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')
    # via requests
 wcwidth==0.2.13
    # via prompt-toolkit
--- a/scripts/distro_codegen.py
+++ b/scripts/distro_codegen.py
@ -13,8 +13,14 @@ from collections.abc import Iterable
 from functools import partial
 from pathlib import Path
 import tomlkit
 from rich.progress import Progress, SpinnerColumn, TextColumn
 from llama_stack.distribution.build import (
    SERVER_DEPENDENCIES,
    get_provider_dependencies,
 )
 REPO_ROOT = Path(__file__).parent.parent
@ -85,6 +91,24 @@ def check_for_changes(change_tracker: ChangedPathTracker) -> bool:
    return has_changes
 def collect_template_dependencies(template_dir: Path) -> tuple[str | None, list[str]]:
    try:
        module_name = f"llama_stack.templates.{template_dir.name}"
        module = importlib.import_module(module_name)
        if template_func := getattr(module, "get_distribution_template", None):
            template = template_func()
            normal_deps, special_deps = get_provider_dependencies(template)
            # Combine all dependencies in order: normal deps, special deps, server deps
            all_deps = sorted(set(normal_deps + SERVER_DEPENDENCIES)) + sorted(set(special_deps))
            return template.name, all_deps
    except Exception as e:
        print("Error collecting template dependencies for", template_dir, e)
        return None, []
    return None, []
 def pre_import_templates(template_dirs: list[Path]) -> None:
    # Pre-import all template modules to avoid deadlocks.
    for template_dir in template_dirs:
@ -92,6 +116,53 @@ def pre_import_templates(template_dirs: list[Path]) -> None:
        importlib.import_module(module_name)
 def generate_dependencies_files(change_tracker: ChangedPathTracker):
    templates_dir = REPO_ROOT / "llama_stack" / "templates"
    distribution_deps = {}
    for template_dir in find_template_dirs(templates_dir):
        print("template_dir", template_dir)
        name, deps = collect_template_dependencies(template_dir)
        if name:
            distribution_deps[name] = deps
        else:
            print("No template function found for", template_dir)
    # First, remove any distributions that are no longer present
    pyproject_file = REPO_ROOT / "pyproject.toml"
    change_tracker.add_paths(pyproject_file)
    # Read and parse the current pyproject.toml content
    with open(pyproject_file) as fp:
        pyproject = tomlkit.load(fp)
    # Get current optional dependencies
    current_deps = pyproject["project"]["optional-dependencies"]
    # Store ui dependencies if they exist
    ui_deps = current_deps.get("ui")
    # Remove distributions that are no longer present
    for name in list(current_deps.keys()):
        if name not in distribution_deps.keys() and name != "ui":
            del current_deps[name]
    # Now add/update the remaining distributions
    for name, deps in distribution_deps.items():
        deps_array = tomlkit.array()
        for dep in sorted(deps):
            deps_array.append(dep)
        current_deps[name] = deps_array.multiline(True)
    # Restore ui dependencies if they existed
    if ui_deps is not None:
        current_deps["ui"] = ui_deps
    # Write back to pyproject.toml
    with open(pyproject_file, "w") as fp:
        tomlkit.dump(pyproject, fp)
 def main():
    templates_dir = REPO_ROOT / "llama_stack" / "templates"
    change_tracker = ChangedPathTracker()
@ -114,6 +185,9 @@ def main():
            list(executor.map(process_func, template_dirs))
            progress.update(task, advance=len(template_dirs))
    # TODO: generate a Containerfile for each distribution as well?
    generate_dependencies_files(change_tracker)
    if check_for_changes(change_tracker):
        print(
            "Distribution template changes detected. Please commit the changes.",
--- a/scripts/unit-tests.sh
+++ b/scripts/unit-tests.sh
@ -16,4 +16,4 @@ if [ $FOUND_PYTHON -ne 0 ]; then
     uv python install "$PYTHON_VERSION"
 fi
-uv run --python "$PYTHON_VERSION" --with-editable . --group unit pytest --asyncio-mode=auto -s -v tests/unit/ $@
+uv run --python "$PYTHON_VERSION" --with-editable . --group dev --group unit pytest --asyncio-mode=auto -s -v tests/unit/ $@
--- a/uv.lock
+++ b/uv.lock