RHAIENG-565: purge the midstream repo content to only host the build artifacts, so only the redhat-distribution should remain

This commit is contained in:
Artemy 2025-08-12 12:50:50 +01:00
parent 5d65c017b0
commit 9803329350
8 changed files with 448 additions and 0 deletions

View file

@ -0,0 +1,48 @@
# WARNING: This file is auto-generated. Do not modify it manually.
# Generated by: redhat-distribution/build.py
FROM registry.access.redhat.com/ubi9/python-312@sha256:95ec8d3ee9f875da011639213fd254256c29bc58861ac0b11f290a291fa04435
WORKDIR /opt/app-root
RUN pip install sqlalchemy # somehow sqlalchemy[asyncio] is not sufficient
RUN pip install \
aiosqlite \
autoevals \
chardet \
datasets \
fastapi \
fire \
httpx \
kubernetes \
llama_stack_provider_lmeval==0.1.7 \
llama_stack_provider_trustyai_fms==0.1.2 \
matplotlib \
mcp \
nltk \
numpy \
openai \
opentelemetry-exporter-otlp-proto-http \
opentelemetry-sdk \
pandas \
pillow \
psycopg2-binary \
pymilvus>=2.4.10 \
pymongo \
pypdf \
redis \
requests \
scikit-learn \
scipy \
sentencepiece \
sqlalchemy[asyncio] \
tqdm \
transformers \
uvicorn
RUN pip install --index-url https://download.pytorch.org/whl/cpu torch torchvision
RUN pip install --no-deps sentence-transformers
RUN pip install --no-cache llama-stack==0.2.14
RUN mkdir -p ${HOME}/.llama/providers.d ${HOME}/.cache
COPY redhat-distribution/run.yaml ${APP_ROOT}/run.yaml
COPY redhat-distribution/providers.d/ ${HOME}/.llama/providers.d/
ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--config", "/opt/app-root/run.yaml"]

View file

@ -0,0 +1,11 @@
FROM registry.access.redhat.com/ubi9/python-312@sha256:95ec8d3ee9f875da011639213fd254256c29bc58861ac0b11f290a291fa04435
WORKDIR /opt/app-root
RUN pip install sqlalchemy # somehow sqlalchemy[asyncio] is not sufficient
{dependencies}
RUN pip install --no-cache llama-stack==0.2.14
RUN mkdir -p ${{HOME}}/.llama/providers.d ${{HOME}}/.cache
COPY redhat-distribution/run.yaml ${{APP_ROOT}}/run.yaml
COPY redhat-distribution/providers.d/ ${{HOME}}/.llama/providers.d/
ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--config", "/opt/app-root/run.yaml"]

View file

@ -0,0 +1,48 @@
# Red Hat Distribution Build Instructions
This directory contains the necessary files to build a Red Hat compatible container image for the llama-stack.
## Prerequisites
- Python >=3.11
- `llama` CLI tool installed: `pip install llama-stack`
- Podman or Docker installed
## Generating the Containerfile
The Containerfile is auto-generated from a template. To generate it:
1. Make sure you have the `llama` CLI tool installed
2. Run the build script from root of this git repo:
```bash
./redhat-distribution/build.py
```
This will:
- Check for the llama CLI installation
- Generate dependencies using `llama stack build`
- Create a new `Containerfile` with the required dependencies
## Editing the Containerfile
The Containerfile is auto-generated from a template. To edit it, you can modify the template in `redhat-distribution/Containerfile.in` and run the build script again.
NEVER edit the generated `Containerfile` manually.
## Building the Container Image
Once the Containerfile is generated, you can build the image using either Podman or Docker:
### Using Podman build image for x86_64
```bash
podman build --platform linux/amd64 -f redhat-distribution/Containerfile -t rh .
```
## Notes
- The generated Containerfile should not be modified manually as it will be overwritten the next time you run the build script
## Push the image to a registry
```bash
podman push <build-ID> quay.io/opendatahub/llama-stack:rh-distribution
```

153
redhat-distribution/build.py Executable file
View file

@ -0,0 +1,153 @@
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
# Usage: ./redhat-distribution/build.py
import shutil
import subprocess
import sys
from pathlib import Path
BASE_REQUIREMENTS = [
"llama-stack==0.2.14",
]
def check_llama_installed():
"""Check if llama binary is installed and accessible."""
if not shutil.which("llama"):
print("Error: llama binary not found. Please install it first.")
sys.exit(1)
def check_llama_stack_version():
"""Check if the llama-stack version in BASE_REQUIREMENTS matches the installed version."""
try:
result = subprocess.run(
["llama stack --version"],
shell=True,
capture_output=True,
text=True,
check=True,
)
installed_version = result.stdout.strip()
# Extract version from BASE_REQUIREMENTS
expected_version = None
for req in BASE_REQUIREMENTS:
if req.startswith("llama-stack=="):
expected_version = req.split("==")[1]
break
if expected_version and installed_version != expected_version:
print("Error: llama-stack version mismatch!")
print(f" Expected: {expected_version}")
print(f" Installed: {installed_version}")
print(
" If you just bumped the llama-stack version in BASE_REQUIREMENTS, you must update the version from .pre-commit-config.yaml"
)
sys.exit(1)
except subprocess.CalledProcessError as e:
print(f"Warning: Could not check llama-stack version: {e}")
print("Continuing without version validation...")
def get_dependencies():
"""Execute the llama stack build command and capture dependencies."""
cmd = "llama stack build --config redhat-distribution/build.yaml --print-deps-only"
try:
result = subprocess.run(cmd, shell=True, capture_output=True, text=True, check=True)
# Categorize and sort different types of pip install commands
standard_deps = []
torch_deps = []
no_deps = []
no_cache = []
for line in result.stdout.splitlines():
if line.strip().startswith("uv pip"):
# Split the line into command and packages
parts = line.replace("uv ", "RUN ", 1).split(" ", 3)
if len(parts) >= 4: # We have packages to sort
cmd_parts = parts[:3] # "RUN pip install"
packages = sorted(set(parts[3].split())) # Sort the package names and remove duplicates
# Determine command type and format accordingly
if "--index-url" in line:
full_cmd = " ".join(cmd_parts + [" ".join(packages)])
torch_deps.append(full_cmd)
elif "--no-deps" in line:
full_cmd = " ".join(cmd_parts + [" ".join(packages)])
no_deps.append(full_cmd)
elif "--no-cache" in line:
full_cmd = " ".join(cmd_parts + [" ".join(packages)])
no_cache.append(full_cmd)
else:
formatted_packages = " \\\n ".join(packages)
full_cmd = f"{' '.join(cmd_parts)} \\\n {formatted_packages}"
standard_deps.append(full_cmd)
else:
standard_deps.append(" ".join(parts))
# Combine all dependencies in specific order
all_deps = []
all_deps.extend(sorted(standard_deps)) # Regular pip installs first
all_deps.extend(sorted(torch_deps)) # PyTorch specific installs
all_deps.extend(sorted(no_deps)) # No-deps installs
all_deps.extend(sorted(no_cache)) # No-cache installs
return "\n".join(all_deps)
except subprocess.CalledProcessError as e:
print(f"Error executing command: {e}")
print(f"Command output: {e.output}")
sys.exit(1)
def generate_containerfile(dependencies):
"""Generate Containerfile from template with dependencies."""
template_path = Path("redhat-distribution/Containerfile.in")
output_path = Path("redhat-distribution/Containerfile")
if not template_path.exists():
print(f"Error: Template file {template_path} not found")
sys.exit(1)
# Read template
with open(template_path) as f:
template_content = f.read()
# Add warning message at the top
warning = "# WARNING: This file is auto-generated. Do not modify it manually.\n# Generated by: redhat-distribution/build.py\n\n"
# Process template using string formatting
containerfile_content = warning + template_content.format(dependencies=dependencies.rstrip())
# Write output
with open(output_path, "w") as f:
f.write(containerfile_content)
print(f"Successfully generated {output_path}")
def main():
print("Checking llama installation...")
check_llama_installed()
print("Checking llama-stack version...")
check_llama_stack_version()
print("Getting dependencies...")
dependencies = get_dependencies()
print("Generating Containerfile...")
generate_containerfile(dependencies)
print("Done!")
if __name__ == "__main__":
main()

View file

@ -0,0 +1,36 @@
version: '2'
distribution_spec:
description: Red Hat distribution of Llama Stack
providers:
inference:
- remote::vllm
- inline::sentence-transformers
vector_io:
- inline::milvus
safety:
- remote::trustyai_fms
agents:
- inline::meta-reference
eval:
- remote::trustyai_lmeval
datasetio:
- remote::huggingface
- inline::localfs
scoring:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
telemetry:
- inline::meta-reference
tool_runtime:
- remote::brave-search
- remote::tavily-search
- inline::rag-runtime
- remote::model-context-protocol
container_image: registry.redhat.io/ubi9/python-311:9.6-1749631027
additional_pip_packages:
- aiosqlite
- sqlalchemy[asyncio]
image_type: container
image_name: llama-stack-rh
external_providers_dir: redhat-distribution/providers.d

View file

@ -0,0 +1,7 @@
adapter:
adapter_type: trustyai_lmeval
pip_packages: ["kubernetes", "llama_stack_provider_lmeval==0.1.7"]
config_class: llama_stack_provider_lmeval.config.LMEvalEvalProviderConfig
module: llama_stack_provider_lmeval
api_dependencies: ["inference"]
optional_api_dependencies: []

View file

@ -0,0 +1,7 @@
adapter:
adapter_type: trustyai_fms
pip_packages: ["llama_stack_provider_trustyai_fms==0.1.2"]
config_class: llama_stack_provider_trustyai_fms.config.FMSSafetyProviderConfig
module: llama_stack_provider_trustyai_fms
api_dependencies: ["safety"]
optional_api_dependencies: ["shields"]

View file

@ -0,0 +1,138 @@
version: 2
image_name: rh
apis:
- agents
- datasetio
- eval
- inference
- safety
- scoring
- telemetry
- tool_runtime
- vector_io
providers:
inference:
- provider_id: vllm-inference
provider_type: remote::vllm
config:
url: ${env.VLLM_URL:=http://localhost:8000/v1}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
config: {}
vector_io:
- provider_id: milvus
provider_type: inline::milvus
config:
db_path: /opt/app-root/src/.llama/distributions/rh/milvus.db
kvstore:
type: sqlite
namespace: null
db_path: /opt/app-root/src/.llama/distributions/rh/milvus_registry.db
safety:
- provider_id: trustyai_fms
provider_type: remote::trustyai_fms
config:
orchestrator_url: ${env.FMS_ORCHESTRATOR_URL:=}
ssl_cert_path: ${env.FMS_SSL_CERT_PATH:=}
shields: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: /opt/app-root/src/.llama/distributions/rh/agents_store.db
responses_store:
type: sqlite
db_path: /opt/app-root/src/.llama/distributions/rh/responses_store.db
eval:
- provider_id: trustyai_lmeval
provider_type: remote::trustyai_lmeval
config:
use_k8s: True
base_url: ${env.VLLM_URL:=http://localhost:8000/v1}
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
type: sqlite
namespace: null
db_path: /opt/app-root/src/.llama/distributions/rh/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: /opt/app-root/src/.llama/distributions/rh/localfs_datasetio.db
scoring:
- provider_id: basic
provider_type: inline::basic
config: {}
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
config: {}
- provider_id: braintrust
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:=}
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
sqlite_db_path: /opt/app-root/src/.llama/distributions/rh/trace_store.db
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
max_results: 3
- provider_id: rag-runtime
provider_type: inline::rag-runtime
config: {}
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
config: {}
metadata_store:
type: sqlite
db_path: /opt/app-root/src/.llama/distributions/rh/registry.db
inference_store:
type: sqlite
db_path: /opt/app-root/src/.llama/distributions/rh/inference_store.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: vllm-inference
model_type: llm
- metadata:
embedding_dimension: 768
model_id: granite-embedding-125m
provider_id: sentence-transformers
provider_model_id: ibm-granite/granite-embedding-125m-english
model_type: embedding
shields: []
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
external_providers_dir: /opt/app-root/src/.llama/providers.d