RHAIENG-565: purge the midstream repo content to only host the build artifacts, so only the redhat-distribution should remain

2025-12-17 21:39:48 +00:00 · 2025-08-12 12:50:50 +01:00 · 2025-08-12 12:50:50 +01:00 · 9803329350
commit 9803329350
parent 5d65c017b0
8 changed files with 448 additions and 0 deletions
--- a/redhat-distribution/Containerfile
+++ b/redhat-distribution/Containerfile
@ -0,0 +1,48 @@
 # WARNING: This file is auto-generated. Do not modify it manually.
 # Generated by: redhat-distribution/build.py
 FROM registry.access.redhat.com/ubi9/python-312@sha256:95ec8d3ee9f875da011639213fd254256c29bc58861ac0b11f290a291fa04435
 WORKDIR /opt/app-root
 RUN pip install sqlalchemy # somehow sqlalchemy[asyncio] is not sufficient
 RUN pip install \
    aiosqlite \
    autoevals \
    chardet \
    datasets \
    fastapi \
    fire \
    httpx \
    kubernetes \
    llama_stack_provider_lmeval==0.1.7 \
    llama_stack_provider_trustyai_fms==0.1.2 \
    matplotlib \
    mcp \
    nltk \
    numpy \
    openai \
    opentelemetry-exporter-otlp-proto-http \
    opentelemetry-sdk \
    pandas \
    pillow \
    psycopg2-binary \
    pymilvus>=2.4.10 \
    pymongo \
    pypdf \
    redis \
    requests \
    scikit-learn \
    scipy \
    sentencepiece \
    sqlalchemy[asyncio] \
    tqdm \
    transformers \
    uvicorn
 RUN pip install --index-url https://download.pytorch.org/whl/cpu torch torchvision
 RUN pip install --no-deps sentence-transformers
 RUN pip install --no-cache llama-stack==0.2.14
 RUN mkdir -p ${HOME}/.llama/providers.d ${HOME}/.cache
 COPY redhat-distribution/run.yaml ${APP_ROOT}/run.yaml
 COPY redhat-distribution/providers.d/ ${HOME}/.llama/providers.d/
 ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--config", "/opt/app-root/run.yaml"]
--- a/redhat-distribution/Containerfile.in
+++ b/redhat-distribution/Containerfile.in
@ -0,0 +1,11 @@
 FROM registry.access.redhat.com/ubi9/python-312@sha256:95ec8d3ee9f875da011639213fd254256c29bc58861ac0b11f290a291fa04435
 WORKDIR /opt/app-root
 RUN pip install sqlalchemy # somehow sqlalchemy[asyncio] is not sufficient
 {dependencies}
 RUN pip install --no-cache llama-stack==0.2.14
 RUN mkdir -p ${{HOME}}/.llama/providers.d ${{HOME}}/.cache
 COPY redhat-distribution/run.yaml ${{APP_ROOT}}/run.yaml
 COPY redhat-distribution/providers.d/ ${{HOME}}/.llama/providers.d/
 ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--config", "/opt/app-root/run.yaml"]
--- a/redhat-distribution/README.md
+++ b/redhat-distribution/README.md
@ -0,0 +1,48 @@
 # Red Hat Distribution Build Instructions
 This directory contains the necessary files to build a Red Hat compatible container image for the llama-stack.
 ## Prerequisites
 - Python >=3.11
 - `llama` CLI tool installed: `pip install llama-stack`
 - Podman or Docker installed
 ## Generating the Containerfile
 The Containerfile is auto-generated from a template. To generate it:
 1. Make sure you have the `llama` CLI tool installed
 2. Run the build script from root of this git repo:
   ```bash
   ./redhat-distribution/build.py
   ```
 This will:
 - Check for the llama CLI installation
 - Generate dependencies using `llama stack build`
 - Create a new `Containerfile` with the required dependencies
 ## Editing the Containerfile
 The Containerfile is auto-generated from a template. To edit it, you can modify the template in `redhat-distribution/Containerfile.in` and run the build script again.
 NEVER edit the generated `Containerfile` manually.
 ## Building the Container Image
 Once the Containerfile is generated, you can build the image using either Podman or Docker:
 ### Using Podman build image for x86_64
 ```bash
 podman build --platform linux/amd64 -f redhat-distribution/Containerfile -t rh .
 ```
 ## Notes
 - The generated Containerfile should not be modified manually as it will be overwritten the next time you run the build script
 ## Push the image to a registry
 ```bash
 podman push <build-ID> quay.io/opendatahub/llama-stack:rh-distribution
 ```
--- a/redhat-distribution/build.py
+++ b/redhat-distribution/build.py
@ -0,0 +1,153 @@
 #!/usr/bin/env python3
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 # Usage: ./redhat-distribution/build.py
 import shutil
 import subprocess
 import sys
 from pathlib import Path
 BASE_REQUIREMENTS = [
    "llama-stack==0.2.14",
 ]
 def check_llama_installed():
    """Check if llama binary is installed and accessible."""
    if not shutil.which("llama"):
        print("Error: llama binary not found. Please install it first.")
        sys.exit(1)
 def check_llama_stack_version():
    """Check if the llama-stack version in BASE_REQUIREMENTS matches the installed version."""
    try:
        result = subprocess.run(
            ["llama stack --version"],
            shell=True,
            capture_output=True,
            text=True,
            check=True,
        )
        installed_version = result.stdout.strip()
        # Extract version from BASE_REQUIREMENTS
        expected_version = None
        for req in BASE_REQUIREMENTS:
            if req.startswith("llama-stack=="):
                expected_version = req.split("==")[1]
                break
        if expected_version and installed_version != expected_version:
            print("Error: llama-stack version mismatch!")
            print(f"  Expected: {expected_version}")
            print(f"  Installed: {installed_version}")
            print(
                "  If you just bumped the llama-stack version in BASE_REQUIREMENTS, you must update the version from .pre-commit-config.yaml"
            )
            sys.exit(1)
    except subprocess.CalledProcessError as e:
        print(f"Warning: Could not check llama-stack version: {e}")
        print("Continuing without version validation...")
 def get_dependencies():
    """Execute the llama stack build command and capture dependencies."""
    cmd = "llama stack build --config redhat-distribution/build.yaml --print-deps-only"
    try:
        result = subprocess.run(cmd, shell=True, capture_output=True, text=True, check=True)
        # Categorize and sort different types of pip install commands
        standard_deps = []
        torch_deps = []
        no_deps = []
        no_cache = []
        for line in result.stdout.splitlines():
            if line.strip().startswith("uv pip"):
                # Split the line into command and packages
                parts = line.replace("uv ", "RUN ", 1).split(" ", 3)
                if len(parts) >= 4:  # We have packages to sort
                    cmd_parts = parts[:3]  # "RUN pip install"
                    packages = sorted(set(parts[3].split()))  # Sort the package names and remove duplicates
                    # Determine command type and format accordingly
                    if "--index-url" in line:
                        full_cmd = " ".join(cmd_parts + [" ".join(packages)])
                        torch_deps.append(full_cmd)
                    elif "--no-deps" in line:
                        full_cmd = " ".join(cmd_parts + [" ".join(packages)])
                        no_deps.append(full_cmd)
                    elif "--no-cache" in line:
                        full_cmd = " ".join(cmd_parts + [" ".join(packages)])
                        no_cache.append(full_cmd)
                    else:
                        formatted_packages = " \\\n    ".join(packages)
                        full_cmd = f"{' '.join(cmd_parts)} \\\n    {formatted_packages}"
                        standard_deps.append(full_cmd)
                else:
                    standard_deps.append(" ".join(parts))
        # Combine all dependencies in specific order
        all_deps = []
        all_deps.extend(sorted(standard_deps))  # Regular pip installs first
        all_deps.extend(sorted(torch_deps))  # PyTorch specific installs
        all_deps.extend(sorted(no_deps))  # No-deps installs
        all_deps.extend(sorted(no_cache))  # No-cache installs
        return "\n".join(all_deps)
    except subprocess.CalledProcessError as e:
        print(f"Error executing command: {e}")
        print(f"Command output: {e.output}")
        sys.exit(1)
 def generate_containerfile(dependencies):
    """Generate Containerfile from template with dependencies."""
    template_path = Path("redhat-distribution/Containerfile.in")
    output_path = Path("redhat-distribution/Containerfile")
    if not template_path.exists():
        print(f"Error: Template file {template_path} not found")
        sys.exit(1)
    # Read template
    with open(template_path) as f:
        template_content = f.read()
    # Add warning message at the top
    warning = "# WARNING: This file is auto-generated. Do not modify it manually.\n# Generated by: redhat-distribution/build.py\n\n"
    # Process template using string formatting
    containerfile_content = warning + template_content.format(dependencies=dependencies.rstrip())
    # Write output
    with open(output_path, "w") as f:
        f.write(containerfile_content)
    print(f"Successfully generated {output_path}")
 def main():
    print("Checking llama installation...")
    check_llama_installed()
    print("Checking llama-stack version...")
    check_llama_stack_version()
    print("Getting dependencies...")
    dependencies = get_dependencies()
    print("Generating Containerfile...")
    generate_containerfile(dependencies)
    print("Done!")
 if __name__ == "__main__":
    main()
--- a/redhat-distribution/build.yaml
+++ b/redhat-distribution/build.yaml
@ -0,0 +1,36 @@
 version: '2'
 distribution_spec:
  description: Red Hat distribution of Llama Stack
  providers:
    inference:
    - remote::vllm
    - inline::sentence-transformers
    vector_io:
    - inline::milvus
    safety:
    - remote::trustyai_fms
    agents:
    - inline::meta-reference
    eval:
    - remote::trustyai_lmeval
    datasetio:
    - remote::huggingface
    - inline::localfs
    scoring:
    - inline::basic
    - inline::llm-as-judge
    - inline::braintrust
    telemetry:
    - inline::meta-reference
    tool_runtime:
    - remote::brave-search
    - remote::tavily-search
    - inline::rag-runtime
    - remote::model-context-protocol
  container_image: registry.redhat.io/ubi9/python-311:9.6-1749631027
 additional_pip_packages:
 - aiosqlite
 - sqlalchemy[asyncio]
 image_type: container
 image_name: llama-stack-rh
 external_providers_dir: redhat-distribution/providers.d
--- a/redhat-distribution/providers.d/remote/eval/trustyai_lmeval.yaml
+++ b/redhat-distribution/providers.d/remote/eval/trustyai_lmeval.yaml
@ -0,0 +1,7 @@
 adapter:
  adapter_type: trustyai_lmeval
  pip_packages: ["kubernetes", "llama_stack_provider_lmeval==0.1.7"]
  config_class: llama_stack_provider_lmeval.config.LMEvalEvalProviderConfig
  module: llama_stack_provider_lmeval
 api_dependencies: ["inference"]
 optional_api_dependencies: []
--- a/redhat-distribution/providers.d/remote/safety/trustyai_fms.yaml
+++ b/redhat-distribution/providers.d/remote/safety/trustyai_fms.yaml
@ -0,0 +1,7 @@
 adapter:
  adapter_type: trustyai_fms
  pip_packages: ["llama_stack_provider_trustyai_fms==0.1.2"]
  config_class: llama_stack_provider_trustyai_fms.config.FMSSafetyProviderConfig
  module: llama_stack_provider_trustyai_fms
 api_dependencies: ["safety"]
 optional_api_dependencies: ["shields"]
--- a/redhat-distribution/run.yaml
+++ b/redhat-distribution/run.yaml
@ -0,0 +1,138 @@
 version: 2
 image_name: rh
 apis:
 - agents
 - datasetio
 - eval
 - inference
 - safety
 - scoring
 - telemetry
 - tool_runtime
 - vector_io
 providers:
  inference:
  - provider_id: vllm-inference
    provider_type: remote::vllm
    config:
      url: ${env.VLLM_URL:=http://localhost:8000/v1}
      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
      api_token: ${env.VLLM_API_TOKEN:=fake}
      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
  - provider_id: sentence-transformers
    provider_type: inline::sentence-transformers
    config: {}
  vector_io:
  - provider_id: milvus
    provider_type: inline::milvus
    config:
      db_path: /opt/app-root/src/.llama/distributions/rh/milvus.db
      kvstore:
        type: sqlite
        namespace: null
        db_path: /opt/app-root/src/.llama/distributions/rh/milvus_registry.db
  safety:
    - provider_id: trustyai_fms
      provider_type: remote::trustyai_fms
      config:
        orchestrator_url: ${env.FMS_ORCHESTRATOR_URL:=}
        ssl_cert_path: ${env.FMS_SSL_CERT_PATH:=}
        shields: {}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence_store:
        type: sqlite
        namespace: null
        db_path: /opt/app-root/src/.llama/distributions/rh/agents_store.db
      responses_store:
        type: sqlite
        db_path: /opt/app-root/src/.llama/distributions/rh/responses_store.db
  eval:
  - provider_id: trustyai_lmeval
    provider_type: remote::trustyai_lmeval
    config:
        use_k8s: True
        base_url: ${env.VLLM_URL:=http://localhost:8000/v1}
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: /opt/app-root/src/.llama/distributions/rh/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: /opt/app-root/src/.llama/distributions/rh/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
    config: {}
  - provider_id: llm-as-judge
    provider_type: inline::llm-as-judge
    config: {}
  - provider_id: braintrust
    provider_type: inline::braintrust
    config:
      openai_api_key: ${env.OPENAI_API_KEY:=}
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
      sqlite_db_path: /opt/app-root/src/.llama/distributions/rh/trace_store.db
      otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
  tool_runtime:
  - provider_id: brave-search
    provider_type: remote::brave-search
    config:
      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
      max_results: 3
  - provider_id: tavily-search
    provider_type: remote::tavily-search
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
      max_results: 3
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
  - provider_id: model-context-protocol
    provider_type: remote::model-context-protocol
    config: {}
 metadata_store:
  type: sqlite
  db_path: /opt/app-root/src/.llama/distributions/rh/registry.db
 inference_store:
  type: sqlite
  db_path: /opt/app-root/src/.llama/distributions/rh/inference_store.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
  provider_id: vllm-inference
  model_type: llm
 - metadata:
    embedding_dimension: 768
  model_id: granite-embedding-125m
  provider_id: sentence-transformers
  provider_model_id: ibm-granite/granite-embedding-125m-english
  model_type: embedding
 shields: []
 vector_dbs: []
 datasets: []
 scoring_fns: []
 benchmarks: []
 tool_groups:
 - toolgroup_id: builtin::websearch
  provider_id: tavily-search
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
 server:
  port: 8321
 external_providers_dir: /opt/app-root/src/.llama/providers.d