From 98033293503dbce8e9f53c47759879a9bc63ff35 Mon Sep 17 00:00:00 2001 From: Artemy Date: Tue, 12 Aug 2025 12:50:50 +0100 Subject: [PATCH] RHAIENG-565: purge the midstream repo content to only host the build artifacts, so only the redhat-distribution should remain --- redhat-distribution/Containerfile | 48 ++++++ redhat-distribution/Containerfile.in | 11 ++ redhat-distribution/README.md | 48 ++++++ redhat-distribution/build.py | 153 ++++++++++++++++++ redhat-distribution/build.yaml | 36 +++++ .../remote/eval/trustyai_lmeval.yaml | 7 + .../remote/safety/trustyai_fms.yaml | 7 + redhat-distribution/run.yaml | 138 ++++++++++++++++ 8 files changed, 448 insertions(+) create mode 100644 redhat-distribution/Containerfile create mode 100644 redhat-distribution/Containerfile.in create mode 100644 redhat-distribution/README.md create mode 100755 redhat-distribution/build.py create mode 100644 redhat-distribution/build.yaml create mode 100644 redhat-distribution/providers.d/remote/eval/trustyai_lmeval.yaml create mode 100644 redhat-distribution/providers.d/remote/safety/trustyai_fms.yaml create mode 100644 redhat-distribution/run.yaml diff --git a/redhat-distribution/Containerfile b/redhat-distribution/Containerfile new file mode 100644 index 000000000..e2cde2c36 --- /dev/null +++ b/redhat-distribution/Containerfile @@ -0,0 +1,48 @@ +# WARNING: This file is auto-generated. Do not modify it manually. +# Generated by: redhat-distribution/build.py + +FROM registry.access.redhat.com/ubi9/python-312@sha256:95ec8d3ee9f875da011639213fd254256c29bc58861ac0b11f290a291fa04435 +WORKDIR /opt/app-root + +RUN pip install sqlalchemy # somehow sqlalchemy[asyncio] is not sufficient +RUN pip install \ + aiosqlite \ + autoevals \ + chardet \ + datasets \ + fastapi \ + fire \ + httpx \ + kubernetes \ + llama_stack_provider_lmeval==0.1.7 \ + llama_stack_provider_trustyai_fms==0.1.2 \ + matplotlib \ + mcp \ + nltk \ + numpy \ + openai \ + opentelemetry-exporter-otlp-proto-http \ + opentelemetry-sdk \ + pandas \ + pillow \ + psycopg2-binary \ + pymilvus>=2.4.10 \ + pymongo \ + pypdf \ + redis \ + requests \ + scikit-learn \ + scipy \ + sentencepiece \ + sqlalchemy[asyncio] \ + tqdm \ + transformers \ + uvicorn +RUN pip install --index-url https://download.pytorch.org/whl/cpu torch torchvision +RUN pip install --no-deps sentence-transformers +RUN pip install --no-cache llama-stack==0.2.14 +RUN mkdir -p ${HOME}/.llama/providers.d ${HOME}/.cache +COPY redhat-distribution/run.yaml ${APP_ROOT}/run.yaml +COPY redhat-distribution/providers.d/ ${HOME}/.llama/providers.d/ + +ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--config", "/opt/app-root/run.yaml"] diff --git a/redhat-distribution/Containerfile.in b/redhat-distribution/Containerfile.in new file mode 100644 index 000000000..d3305c032 --- /dev/null +++ b/redhat-distribution/Containerfile.in @@ -0,0 +1,11 @@ +FROM registry.access.redhat.com/ubi9/python-312@sha256:95ec8d3ee9f875da011639213fd254256c29bc58861ac0b11f290a291fa04435 +WORKDIR /opt/app-root + +RUN pip install sqlalchemy # somehow sqlalchemy[asyncio] is not sufficient +{dependencies} +RUN pip install --no-cache llama-stack==0.2.14 +RUN mkdir -p ${{HOME}}/.llama/providers.d ${{HOME}}/.cache +COPY redhat-distribution/run.yaml ${{APP_ROOT}}/run.yaml +COPY redhat-distribution/providers.d/ ${{HOME}}/.llama/providers.d/ + +ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--config", "/opt/app-root/run.yaml"] diff --git a/redhat-distribution/README.md b/redhat-distribution/README.md new file mode 100644 index 000000000..742c01673 --- /dev/null +++ b/redhat-distribution/README.md @@ -0,0 +1,48 @@ +# Red Hat Distribution Build Instructions + +This directory contains the necessary files to build a Red Hat compatible container image for the llama-stack. + +## Prerequisites + +- Python >=3.11 +- `llama` CLI tool installed: `pip install llama-stack` +- Podman or Docker installed + +## Generating the Containerfile + +The Containerfile is auto-generated from a template. To generate it: + +1. Make sure you have the `llama` CLI tool installed +2. Run the build script from root of this git repo: + ```bash + ./redhat-distribution/build.py + ``` + +This will: +- Check for the llama CLI installation +- Generate dependencies using `llama stack build` +- Create a new `Containerfile` with the required dependencies + +## Editing the Containerfile + +The Containerfile is auto-generated from a template. To edit it, you can modify the template in `redhat-distribution/Containerfile.in` and run the build script again. +NEVER edit the generated `Containerfile` manually. + +## Building the Container Image + +Once the Containerfile is generated, you can build the image using either Podman or Docker: + +### Using Podman build image for x86_64 +```bash +podman build --platform linux/amd64 -f redhat-distribution/Containerfile -t rh . +``` + +## Notes + +- The generated Containerfile should not be modified manually as it will be overwritten the next time you run the build script + +## Push the image to a registry + +```bash +podman push quay.io/opendatahub/llama-stack:rh-distribution +``` diff --git a/redhat-distribution/build.py b/redhat-distribution/build.py new file mode 100755 index 000000000..85675b5ea --- /dev/null +++ b/redhat-distribution/build.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +# Usage: ./redhat-distribution/build.py + +import shutil +import subprocess +import sys +from pathlib import Path + +BASE_REQUIREMENTS = [ + "llama-stack==0.2.14", +] + + +def check_llama_installed(): + """Check if llama binary is installed and accessible.""" + if not shutil.which("llama"): + print("Error: llama binary not found. Please install it first.") + sys.exit(1) + + +def check_llama_stack_version(): + """Check if the llama-stack version in BASE_REQUIREMENTS matches the installed version.""" + try: + result = subprocess.run( + ["llama stack --version"], + shell=True, + capture_output=True, + text=True, + check=True, + ) + installed_version = result.stdout.strip() + + # Extract version from BASE_REQUIREMENTS + expected_version = None + for req in BASE_REQUIREMENTS: + if req.startswith("llama-stack=="): + expected_version = req.split("==")[1] + break + + if expected_version and installed_version != expected_version: + print("Error: llama-stack version mismatch!") + print(f" Expected: {expected_version}") + print(f" Installed: {installed_version}") + print( + " If you just bumped the llama-stack version in BASE_REQUIREMENTS, you must update the version from .pre-commit-config.yaml" + ) + sys.exit(1) + + except subprocess.CalledProcessError as e: + print(f"Warning: Could not check llama-stack version: {e}") + print("Continuing without version validation...") + + +def get_dependencies(): + """Execute the llama stack build command and capture dependencies.""" + cmd = "llama stack build --config redhat-distribution/build.yaml --print-deps-only" + try: + result = subprocess.run(cmd, shell=True, capture_output=True, text=True, check=True) + # Categorize and sort different types of pip install commands + standard_deps = [] + torch_deps = [] + no_deps = [] + no_cache = [] + + for line in result.stdout.splitlines(): + if line.strip().startswith("uv pip"): + # Split the line into command and packages + parts = line.replace("uv ", "RUN ", 1).split(" ", 3) + if len(parts) >= 4: # We have packages to sort + cmd_parts = parts[:3] # "RUN pip install" + packages = sorted(set(parts[3].split())) # Sort the package names and remove duplicates + + # Determine command type and format accordingly + if "--index-url" in line: + full_cmd = " ".join(cmd_parts + [" ".join(packages)]) + torch_deps.append(full_cmd) + elif "--no-deps" in line: + full_cmd = " ".join(cmd_parts + [" ".join(packages)]) + no_deps.append(full_cmd) + elif "--no-cache" in line: + full_cmd = " ".join(cmd_parts + [" ".join(packages)]) + no_cache.append(full_cmd) + else: + formatted_packages = " \\\n ".join(packages) + full_cmd = f"{' '.join(cmd_parts)} \\\n {formatted_packages}" + standard_deps.append(full_cmd) + else: + standard_deps.append(" ".join(parts)) + + # Combine all dependencies in specific order + all_deps = [] + all_deps.extend(sorted(standard_deps)) # Regular pip installs first + all_deps.extend(sorted(torch_deps)) # PyTorch specific installs + all_deps.extend(sorted(no_deps)) # No-deps installs + all_deps.extend(sorted(no_cache)) # No-cache installs + + return "\n".join(all_deps) + except subprocess.CalledProcessError as e: + print(f"Error executing command: {e}") + print(f"Command output: {e.output}") + sys.exit(1) + + +def generate_containerfile(dependencies): + """Generate Containerfile from template with dependencies.""" + template_path = Path("redhat-distribution/Containerfile.in") + output_path = Path("redhat-distribution/Containerfile") + + if not template_path.exists(): + print(f"Error: Template file {template_path} not found") + sys.exit(1) + + # Read template + with open(template_path) as f: + template_content = f.read() + + # Add warning message at the top + warning = "# WARNING: This file is auto-generated. Do not modify it manually.\n# Generated by: redhat-distribution/build.py\n\n" + + # Process template using string formatting + containerfile_content = warning + template_content.format(dependencies=dependencies.rstrip()) + + # Write output + with open(output_path, "w") as f: + f.write(containerfile_content) + + print(f"Successfully generated {output_path}") + + +def main(): + print("Checking llama installation...") + check_llama_installed() + + print("Checking llama-stack version...") + check_llama_stack_version() + + print("Getting dependencies...") + dependencies = get_dependencies() + + print("Generating Containerfile...") + generate_containerfile(dependencies) + + print("Done!") + + +if __name__ == "__main__": + main() diff --git a/redhat-distribution/build.yaml b/redhat-distribution/build.yaml new file mode 100644 index 000000000..663fb002f --- /dev/null +++ b/redhat-distribution/build.yaml @@ -0,0 +1,36 @@ +version: '2' +distribution_spec: + description: Red Hat distribution of Llama Stack + providers: + inference: + - remote::vllm + - inline::sentence-transformers + vector_io: + - inline::milvus + safety: + - remote::trustyai_fms + agents: + - inline::meta-reference + eval: + - remote::trustyai_lmeval + datasetio: + - remote::huggingface + - inline::localfs + scoring: + - inline::basic + - inline::llm-as-judge + - inline::braintrust + telemetry: + - inline::meta-reference + tool_runtime: + - remote::brave-search + - remote::tavily-search + - inline::rag-runtime + - remote::model-context-protocol + container_image: registry.redhat.io/ubi9/python-311:9.6-1749631027 +additional_pip_packages: +- aiosqlite +- sqlalchemy[asyncio] +image_type: container +image_name: llama-stack-rh +external_providers_dir: redhat-distribution/providers.d diff --git a/redhat-distribution/providers.d/remote/eval/trustyai_lmeval.yaml b/redhat-distribution/providers.d/remote/eval/trustyai_lmeval.yaml new file mode 100644 index 000000000..c3b83beef --- /dev/null +++ b/redhat-distribution/providers.d/remote/eval/trustyai_lmeval.yaml @@ -0,0 +1,7 @@ +adapter: + adapter_type: trustyai_lmeval + pip_packages: ["kubernetes", "llama_stack_provider_lmeval==0.1.7"] + config_class: llama_stack_provider_lmeval.config.LMEvalEvalProviderConfig + module: llama_stack_provider_lmeval +api_dependencies: ["inference"] +optional_api_dependencies: [] diff --git a/redhat-distribution/providers.d/remote/safety/trustyai_fms.yaml b/redhat-distribution/providers.d/remote/safety/trustyai_fms.yaml new file mode 100644 index 000000000..4694f761e --- /dev/null +++ b/redhat-distribution/providers.d/remote/safety/trustyai_fms.yaml @@ -0,0 +1,7 @@ +adapter: + adapter_type: trustyai_fms + pip_packages: ["llama_stack_provider_trustyai_fms==0.1.2"] + config_class: llama_stack_provider_trustyai_fms.config.FMSSafetyProviderConfig + module: llama_stack_provider_trustyai_fms +api_dependencies: ["safety"] +optional_api_dependencies: ["shields"] diff --git a/redhat-distribution/run.yaml b/redhat-distribution/run.yaml new file mode 100644 index 000000000..b58f04d31 --- /dev/null +++ b/redhat-distribution/run.yaml @@ -0,0 +1,138 @@ +version: 2 +image_name: rh +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: vllm-inference + provider_type: remote::vllm + config: + url: ${env.VLLM_URL:=http://localhost:8000/v1} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: milvus + provider_type: inline::milvus + config: + db_path: /opt/app-root/src/.llama/distributions/rh/milvus.db + kvstore: + type: sqlite + namespace: null + db_path: /opt/app-root/src/.llama/distributions/rh/milvus_registry.db + safety: + - provider_id: trustyai_fms + provider_type: remote::trustyai_fms + config: + orchestrator_url: ${env.FMS_ORCHESTRATOR_URL:=} + ssl_cert_path: ${env.FMS_SSL_CERT_PATH:=} + shields: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: /opt/app-root/src/.llama/distributions/rh/agents_store.db + responses_store: + type: sqlite + db_path: /opt/app-root/src/.llama/distributions/rh/responses_store.db + eval: + - provider_id: trustyai_lmeval + provider_type: remote::trustyai_lmeval + config: + use_k8s: True + base_url: ${env.VLLM_URL:=http://localhost:8000/v1} + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + namespace: null + db_path: /opt/app-root/src/.llama/distributions/rh/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + namespace: null + db_path: /opt/app-root/src/.llama/distributions/rh/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: /opt/app-root/src/.llama/distributions/rh/trace_store.db + otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} +metadata_store: + type: sqlite + db_path: /opt/app-root/src/.llama/distributions/rh/registry.db +inference_store: + type: sqlite + db_path: /opt/app-root/src/.llama/distributions/rh/inference_store.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: vllm-inference + model_type: llm +- metadata: + embedding_dimension: 768 + model_id: granite-embedding-125m + provider_id: sentence-transformers + provider_model_id: ibm-granite/granite-embedding-125m-english + model_type: embedding +shields: [] +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 +external_providers_dir: /opt/app-root/src/.llama/providers.d