From 98033293503dbce8e9f53c47759879a9bc63ff35 Mon Sep 17 00:00:00 2001
From: Artemy <ahladenk@redhat.com>
Date: Tue, 12 Aug 2025 12:50:50 +0100
Subject: [PATCH] RHAIENG-565: purge the midstream repo content to only host
 the build artifacts, so only the redhat-distribution should remain

---
 redhat-distribution/Containerfile             |  48 ++++++
 redhat-distribution/Containerfile.in          |  11 ++
 redhat-distribution/README.md                 |  48 ++++++
 redhat-distribution/build.py                  | 153 ++++++++++++++++++
 redhat-distribution/build.yaml                |  36 +++++
 .../remote/eval/trustyai_lmeval.yaml          |   7 +
 .../remote/safety/trustyai_fms.yaml           |   7 +
 redhat-distribution/run.yaml                  | 138 ++++++++++++++++
 8 files changed, 448 insertions(+)
 create mode 100644 redhat-distribution/Containerfile
 create mode 100644 redhat-distribution/Containerfile.in
 create mode 100644 redhat-distribution/README.md
 create mode 100755 redhat-distribution/build.py
 create mode 100644 redhat-distribution/build.yaml
 create mode 100644 redhat-distribution/providers.d/remote/eval/trustyai_lmeval.yaml
 create mode 100644 redhat-distribution/providers.d/remote/safety/trustyai_fms.yaml
 create mode 100644 redhat-distribution/run.yaml

diff --git a/redhat-distribution/Containerfile b/redhat-distribution/Containerfile
new file mode 100644
index 000000000..e2cde2c36
--- /dev/null
+++ b/redhat-distribution/Containerfile
@@ -0,0 +1,48 @@
+# WARNING: This file is auto-generated. Do not modify it manually.
+# Generated by: redhat-distribution/build.py
+
+FROM registry.access.redhat.com/ubi9/python-312@sha256:95ec8d3ee9f875da011639213fd254256c29bc58861ac0b11f290a291fa04435
+WORKDIR /opt/app-root
+
+RUN pip install sqlalchemy # somehow sqlalchemy[asyncio] is not sufficient
+RUN pip install \
+    aiosqlite \
+    autoevals \
+    chardet \
+    datasets \
+    fastapi \
+    fire \
+    httpx \
+    kubernetes \
+    llama_stack_provider_lmeval==0.1.7 \
+    llama_stack_provider_trustyai_fms==0.1.2 \
+    matplotlib \
+    mcp \
+    nltk \
+    numpy \
+    openai \
+    opentelemetry-exporter-otlp-proto-http \
+    opentelemetry-sdk \
+    pandas \
+    pillow \
+    psycopg2-binary \
+    pymilvus>=2.4.10 \
+    pymongo \
+    pypdf \
+    redis \
+    requests \
+    scikit-learn \
+    scipy \
+    sentencepiece \
+    sqlalchemy[asyncio] \
+    tqdm \
+    transformers \
+    uvicorn
+RUN pip install --index-url https://download.pytorch.org/whl/cpu torch torchvision
+RUN pip install --no-deps sentence-transformers
+RUN pip install --no-cache llama-stack==0.2.14
+RUN mkdir -p ${HOME}/.llama/providers.d ${HOME}/.cache
+COPY redhat-distribution/run.yaml ${APP_ROOT}/run.yaml
+COPY redhat-distribution/providers.d/ ${HOME}/.llama/providers.d/
+
+ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--config", "/opt/app-root/run.yaml"]
diff --git a/redhat-distribution/Containerfile.in b/redhat-distribution/Containerfile.in
new file mode 100644
index 000000000..d3305c032
--- /dev/null
+++ b/redhat-distribution/Containerfile.in
@@ -0,0 +1,11 @@
+FROM registry.access.redhat.com/ubi9/python-312@sha256:95ec8d3ee9f875da011639213fd254256c29bc58861ac0b11f290a291fa04435
+WORKDIR /opt/app-root
+
+RUN pip install sqlalchemy # somehow sqlalchemy[asyncio] is not sufficient
+{dependencies}
+RUN pip install --no-cache llama-stack==0.2.14
+RUN mkdir -p ${{HOME}}/.llama/providers.d ${{HOME}}/.cache
+COPY redhat-distribution/run.yaml ${{APP_ROOT}}/run.yaml
+COPY redhat-distribution/providers.d/ ${{HOME}}/.llama/providers.d/
+
+ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--config", "/opt/app-root/run.yaml"]
diff --git a/redhat-distribution/README.md b/redhat-distribution/README.md
new file mode 100644
index 000000000..742c01673
--- /dev/null
+++ b/redhat-distribution/README.md
@@ -0,0 +1,48 @@
+# Red Hat Distribution Build Instructions
+
+This directory contains the necessary files to build a Red Hat compatible container image for the llama-stack.
+
+## Prerequisites
+
+- Python >=3.11
+- `llama` CLI tool installed: `pip install llama-stack`
+- Podman or Docker installed
+
+## Generating the Containerfile
+
+The Containerfile is auto-generated from a template. To generate it:
+
+1. Make sure you have the `llama` CLI tool installed
+2. Run the build script from root of this git repo:
+   ```bash
+   ./redhat-distribution/build.py
+   ```
+
+This will:
+- Check for the llama CLI installation
+- Generate dependencies using `llama stack build`
+- Create a new `Containerfile` with the required dependencies
+
+## Editing the Containerfile
+
+The Containerfile is auto-generated from a template. To edit it, you can modify the template in `redhat-distribution/Containerfile.in` and run the build script again.
+NEVER edit the generated `Containerfile` manually.
+
+## Building the Container Image
+
+Once the Containerfile is generated, you can build the image using either Podman or Docker:
+
+### Using Podman build image for x86_64
+```bash
+podman build --platform linux/amd64 -f redhat-distribution/Containerfile -t rh .
+```
+
+## Notes
+
+- The generated Containerfile should not be modified manually as it will be overwritten the next time you run the build script
+
+## Push the image to a registry
+
+```bash
+podman push <build-ID> quay.io/opendatahub/llama-stack:rh-distribution
+```
diff --git a/redhat-distribution/build.py b/redhat-distribution/build.py
new file mode 100755
index 000000000..85675b5ea
--- /dev/null
+++ b/redhat-distribution/build.py
@@ -0,0 +1,153 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+# Usage: ./redhat-distribution/build.py
+
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+BASE_REQUIREMENTS = [
+    "llama-stack==0.2.14",
+]
+
+
+def check_llama_installed():
+    """Check if llama binary is installed and accessible."""
+    if not shutil.which("llama"):
+        print("Error: llama binary not found. Please install it first.")
+        sys.exit(1)
+
+
+def check_llama_stack_version():
+    """Check if the llama-stack version in BASE_REQUIREMENTS matches the installed version."""
+    try:
+        result = subprocess.run(
+            ["llama stack --version"],
+            shell=True,
+            capture_output=True,
+            text=True,
+            check=True,
+        )
+        installed_version = result.stdout.strip()
+
+        # Extract version from BASE_REQUIREMENTS
+        expected_version = None
+        for req in BASE_REQUIREMENTS:
+            if req.startswith("llama-stack=="):
+                expected_version = req.split("==")[1]
+                break
+
+        if expected_version and installed_version != expected_version:
+            print("Error: llama-stack version mismatch!")
+            print(f"  Expected: {expected_version}")
+            print(f"  Installed: {installed_version}")
+            print(
+                "  If you just bumped the llama-stack version in BASE_REQUIREMENTS, you must update the version from .pre-commit-config.yaml"
+            )
+            sys.exit(1)
+
+    except subprocess.CalledProcessError as e:
+        print(f"Warning: Could not check llama-stack version: {e}")
+        print("Continuing without version validation...")
+
+
+def get_dependencies():
+    """Execute the llama stack build command and capture dependencies."""
+    cmd = "llama stack build --config redhat-distribution/build.yaml --print-deps-only"
+    try:
+        result = subprocess.run(cmd, shell=True, capture_output=True, text=True, check=True)
+        # Categorize and sort different types of pip install commands
+        standard_deps = []
+        torch_deps = []
+        no_deps = []
+        no_cache = []
+
+        for line in result.stdout.splitlines():
+            if line.strip().startswith("uv pip"):
+                # Split the line into command and packages
+                parts = line.replace("uv ", "RUN ", 1).split(" ", 3)
+                if len(parts) >= 4:  # We have packages to sort
+                    cmd_parts = parts[:3]  # "RUN pip install"
+                    packages = sorted(set(parts[3].split()))  # Sort the package names and remove duplicates
+
+                    # Determine command type and format accordingly
+                    if "--index-url" in line:
+                        full_cmd = " ".join(cmd_parts + [" ".join(packages)])
+                        torch_deps.append(full_cmd)
+                    elif "--no-deps" in line:
+                        full_cmd = " ".join(cmd_parts + [" ".join(packages)])
+                        no_deps.append(full_cmd)
+                    elif "--no-cache" in line:
+                        full_cmd = " ".join(cmd_parts + [" ".join(packages)])
+                        no_cache.append(full_cmd)
+                    else:
+                        formatted_packages = " \\\n    ".join(packages)
+                        full_cmd = f"{' '.join(cmd_parts)} \\\n    {formatted_packages}"
+                        standard_deps.append(full_cmd)
+                else:
+                    standard_deps.append(" ".join(parts))
+
+        # Combine all dependencies in specific order
+        all_deps = []
+        all_deps.extend(sorted(standard_deps))  # Regular pip installs first
+        all_deps.extend(sorted(torch_deps))  # PyTorch specific installs
+        all_deps.extend(sorted(no_deps))  # No-deps installs
+        all_deps.extend(sorted(no_cache))  # No-cache installs
+
+        return "\n".join(all_deps)
+    except subprocess.CalledProcessError as e:
+        print(f"Error executing command: {e}")
+        print(f"Command output: {e.output}")
+        sys.exit(1)
+
+
+def generate_containerfile(dependencies):
+    """Generate Containerfile from template with dependencies."""
+    template_path = Path("redhat-distribution/Containerfile.in")
+    output_path = Path("redhat-distribution/Containerfile")
+
+    if not template_path.exists():
+        print(f"Error: Template file {template_path} not found")
+        sys.exit(1)
+
+    # Read template
+    with open(template_path) as f:
+        template_content = f.read()
+
+    # Add warning message at the top
+    warning = "# WARNING: This file is auto-generated. Do not modify it manually.\n# Generated by: redhat-distribution/build.py\n\n"
+
+    # Process template using string formatting
+    containerfile_content = warning + template_content.format(dependencies=dependencies.rstrip())
+
+    # Write output
+    with open(output_path, "w") as f:
+        f.write(containerfile_content)
+
+    print(f"Successfully generated {output_path}")
+
+
+def main():
+    print("Checking llama installation...")
+    check_llama_installed()
+
+    print("Checking llama-stack version...")
+    check_llama_stack_version()
+
+    print("Getting dependencies...")
+    dependencies = get_dependencies()
+
+    print("Generating Containerfile...")
+    generate_containerfile(dependencies)
+
+    print("Done!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/redhat-distribution/build.yaml b/redhat-distribution/build.yaml
new file mode 100644
index 000000000..663fb002f
--- /dev/null
+++ b/redhat-distribution/build.yaml
@@ -0,0 +1,36 @@
+version: '2'
+distribution_spec:
+  description: Red Hat distribution of Llama Stack
+  providers:
+    inference:
+    - remote::vllm
+    - inline::sentence-transformers
+    vector_io:
+    - inline::milvus
+    safety:
+    - remote::trustyai_fms
+    agents:
+    - inline::meta-reference
+    eval:
+    - remote::trustyai_lmeval
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
+    telemetry:
+    - inline::meta-reference
+    tool_runtime:
+    - remote::brave-search
+    - remote::tavily-search
+    - inline::rag-runtime
+    - remote::model-context-protocol
+  container_image: registry.redhat.io/ubi9/python-311:9.6-1749631027
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
+image_type: container
+image_name: llama-stack-rh
+external_providers_dir: redhat-distribution/providers.d
diff --git a/redhat-distribution/providers.d/remote/eval/trustyai_lmeval.yaml b/redhat-distribution/providers.d/remote/eval/trustyai_lmeval.yaml
new file mode 100644
index 000000000..c3b83beef
--- /dev/null
+++ b/redhat-distribution/providers.d/remote/eval/trustyai_lmeval.yaml
@@ -0,0 +1,7 @@
+adapter:
+  adapter_type: trustyai_lmeval
+  pip_packages: ["kubernetes", "llama_stack_provider_lmeval==0.1.7"]
+  config_class: llama_stack_provider_lmeval.config.LMEvalEvalProviderConfig
+  module: llama_stack_provider_lmeval
+api_dependencies: ["inference"]
+optional_api_dependencies: []
diff --git a/redhat-distribution/providers.d/remote/safety/trustyai_fms.yaml b/redhat-distribution/providers.d/remote/safety/trustyai_fms.yaml
new file mode 100644
index 000000000..4694f761e
--- /dev/null
+++ b/redhat-distribution/providers.d/remote/safety/trustyai_fms.yaml
@@ -0,0 +1,7 @@
+adapter:
+  adapter_type: trustyai_fms
+  pip_packages: ["llama_stack_provider_trustyai_fms==0.1.2"]
+  config_class: llama_stack_provider_trustyai_fms.config.FMSSafetyProviderConfig
+  module: llama_stack_provider_trustyai_fms
+api_dependencies: ["safety"]
+optional_api_dependencies: ["shields"]
diff --git a/redhat-distribution/run.yaml b/redhat-distribution/run.yaml
new file mode 100644
index 000000000..b58f04d31
--- /dev/null
+++ b/redhat-distribution/run.yaml
@@ -0,0 +1,138 @@
+version: 2
+image_name: rh
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- safety
+- scoring
+- telemetry
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: vllm-inference
+    provider_type: remote::vllm
+    config:
+      url: ${env.VLLM_URL:=http://localhost:8000/v1}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+    config: {}
+  vector_io:
+  - provider_id: milvus
+    provider_type: inline::milvus
+    config:
+      db_path: /opt/app-root/src/.llama/distributions/rh/milvus.db
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: /opt/app-root/src/.llama/distributions/rh/milvus_registry.db
+  safety:
+    - provider_id: trustyai_fms
+      provider_type: remote::trustyai_fms
+      config:
+        orchestrator_url: ${env.FMS_ORCHESTRATOR_URL:=}
+        ssl_cert_path: ${env.FMS_SSL_CERT_PATH:=}
+        shields: {}
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sqlite
+        namespace: null
+        db_path: /opt/app-root/src/.llama/distributions/rh/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: /opt/app-root/src/.llama/distributions/rh/responses_store.db
+  eval:
+  - provider_id: trustyai_lmeval
+    provider_type: remote::trustyai_lmeval
+    config:
+        use_k8s: True
+        base_url: ${env.VLLM_URL:=http://localhost:8000/v1}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: /opt/app-root/src/.llama/distributions/rh/huggingface_datasetio.db
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: /opt/app-root/src/.llama/distributions/rh/localfs_datasetio.db
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: /opt/app-root/src/.llama/distributions/rh/trace_store.db
+      otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
+metadata_store:
+  type: sqlite
+  db_path: /opt/app-root/src/.llama/distributions/rh/registry.db
+inference_store:
+  type: sqlite
+  db_path: /opt/app-root/src/.llama/distributions/rh/inference_store.db
+models:
+- metadata: {}
+  model_id: ${env.INFERENCE_MODEL}
+  provider_id: vllm-inference
+  model_type: llm
+- metadata:
+    embedding_dimension: 768
+  model_id: granite-embedding-125m
+  provider_id: sentence-transformers
+  provider_model_id: ibm-granite/granite-embedding-125m-english
+  model_type: embedding
+shields: []
+vector_dbs: []
+datasets: []
+scoring_fns: []
+benchmarks: []
+tool_groups:
+- toolgroup_id: builtin::websearch
+  provider_id: tavily-search
+- toolgroup_id: builtin::rag
+  provider_id: rag-runtime
+server:
+  port: 8321
+external_providers_dir: /opt/app-root/src/.llama/providers.d