mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-17 21:29:48 +00:00
RHAIENG-565: purge the midstream repo content to only host the build artifacts, so only the redhat-distribution should remain
This commit is contained in:
parent
5d65c017b0
commit
9803329350
8 changed files with 448 additions and 0 deletions
48
redhat-distribution/Containerfile
Normal file
48
redhat-distribution/Containerfile
Normal file
|
|
@ -0,0 +1,48 @@
|
||||||
|
# WARNING: This file is auto-generated. Do not modify it manually.
|
||||||
|
# Generated by: redhat-distribution/build.py
|
||||||
|
|
||||||
|
FROM registry.access.redhat.com/ubi9/python-312@sha256:95ec8d3ee9f875da011639213fd254256c29bc58861ac0b11f290a291fa04435
|
||||||
|
WORKDIR /opt/app-root
|
||||||
|
|
||||||
|
RUN pip install sqlalchemy # somehow sqlalchemy[asyncio] is not sufficient
|
||||||
|
RUN pip install \
|
||||||
|
aiosqlite \
|
||||||
|
autoevals \
|
||||||
|
chardet \
|
||||||
|
datasets \
|
||||||
|
fastapi \
|
||||||
|
fire \
|
||||||
|
httpx \
|
||||||
|
kubernetes \
|
||||||
|
llama_stack_provider_lmeval==0.1.7 \
|
||||||
|
llama_stack_provider_trustyai_fms==0.1.2 \
|
||||||
|
matplotlib \
|
||||||
|
mcp \
|
||||||
|
nltk \
|
||||||
|
numpy \
|
||||||
|
openai \
|
||||||
|
opentelemetry-exporter-otlp-proto-http \
|
||||||
|
opentelemetry-sdk \
|
||||||
|
pandas \
|
||||||
|
pillow \
|
||||||
|
psycopg2-binary \
|
||||||
|
pymilvus>=2.4.10 \
|
||||||
|
pymongo \
|
||||||
|
pypdf \
|
||||||
|
redis \
|
||||||
|
requests \
|
||||||
|
scikit-learn \
|
||||||
|
scipy \
|
||||||
|
sentencepiece \
|
||||||
|
sqlalchemy[asyncio] \
|
||||||
|
tqdm \
|
||||||
|
transformers \
|
||||||
|
uvicorn
|
||||||
|
RUN pip install --index-url https://download.pytorch.org/whl/cpu torch torchvision
|
||||||
|
RUN pip install --no-deps sentence-transformers
|
||||||
|
RUN pip install --no-cache llama-stack==0.2.14
|
||||||
|
RUN mkdir -p ${HOME}/.llama/providers.d ${HOME}/.cache
|
||||||
|
COPY redhat-distribution/run.yaml ${APP_ROOT}/run.yaml
|
||||||
|
COPY redhat-distribution/providers.d/ ${HOME}/.llama/providers.d/
|
||||||
|
|
||||||
|
ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--config", "/opt/app-root/run.yaml"]
|
||||||
11
redhat-distribution/Containerfile.in
Normal file
11
redhat-distribution/Containerfile.in
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
FROM registry.access.redhat.com/ubi9/python-312@sha256:95ec8d3ee9f875da011639213fd254256c29bc58861ac0b11f290a291fa04435
|
||||||
|
WORKDIR /opt/app-root
|
||||||
|
|
||||||
|
RUN pip install sqlalchemy # somehow sqlalchemy[asyncio] is not sufficient
|
||||||
|
{dependencies}
|
||||||
|
RUN pip install --no-cache llama-stack==0.2.14
|
||||||
|
RUN mkdir -p ${{HOME}}/.llama/providers.d ${{HOME}}/.cache
|
||||||
|
COPY redhat-distribution/run.yaml ${{APP_ROOT}}/run.yaml
|
||||||
|
COPY redhat-distribution/providers.d/ ${{HOME}}/.llama/providers.d/
|
||||||
|
|
||||||
|
ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--config", "/opt/app-root/run.yaml"]
|
||||||
48
redhat-distribution/README.md
Normal file
48
redhat-distribution/README.md
Normal file
|
|
@ -0,0 +1,48 @@
|
||||||
|
# Red Hat Distribution Build Instructions
|
||||||
|
|
||||||
|
This directory contains the necessary files to build a Red Hat compatible container image for the llama-stack.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
- Python >=3.11
|
||||||
|
- `llama` CLI tool installed: `pip install llama-stack`
|
||||||
|
- Podman or Docker installed
|
||||||
|
|
||||||
|
## Generating the Containerfile
|
||||||
|
|
||||||
|
The Containerfile is auto-generated from a template. To generate it:
|
||||||
|
|
||||||
|
1. Make sure you have the `llama` CLI tool installed
|
||||||
|
2. Run the build script from root of this git repo:
|
||||||
|
```bash
|
||||||
|
./redhat-distribution/build.py
|
||||||
|
```
|
||||||
|
|
||||||
|
This will:
|
||||||
|
- Check for the llama CLI installation
|
||||||
|
- Generate dependencies using `llama stack build`
|
||||||
|
- Create a new `Containerfile` with the required dependencies
|
||||||
|
|
||||||
|
## Editing the Containerfile
|
||||||
|
|
||||||
|
The Containerfile is auto-generated from a template. To edit it, you can modify the template in `redhat-distribution/Containerfile.in` and run the build script again.
|
||||||
|
NEVER edit the generated `Containerfile` manually.
|
||||||
|
|
||||||
|
## Building the Container Image
|
||||||
|
|
||||||
|
Once the Containerfile is generated, you can build the image using either Podman or Docker:
|
||||||
|
|
||||||
|
### Using Podman build image for x86_64
|
||||||
|
```bash
|
||||||
|
podman build --platform linux/amd64 -f redhat-distribution/Containerfile -t rh .
|
||||||
|
```
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
|
||||||
|
- The generated Containerfile should not be modified manually as it will be overwritten the next time you run the build script
|
||||||
|
|
||||||
|
## Push the image to a registry
|
||||||
|
|
||||||
|
```bash
|
||||||
|
podman push <build-ID> quay.io/opendatahub/llama-stack:rh-distribution
|
||||||
|
```
|
||||||
153
redhat-distribution/build.py
Executable file
153
redhat-distribution/build.py
Executable file
|
|
@ -0,0 +1,153 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
# Usage: ./redhat-distribution/build.py
|
||||||
|
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
BASE_REQUIREMENTS = [
|
||||||
|
"llama-stack==0.2.14",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def check_llama_installed():
|
||||||
|
"""Check if llama binary is installed and accessible."""
|
||||||
|
if not shutil.which("llama"):
|
||||||
|
print("Error: llama binary not found. Please install it first.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
def check_llama_stack_version():
|
||||||
|
"""Check if the llama-stack version in BASE_REQUIREMENTS matches the installed version."""
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
["llama stack --version"],
|
||||||
|
shell=True,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
check=True,
|
||||||
|
)
|
||||||
|
installed_version = result.stdout.strip()
|
||||||
|
|
||||||
|
# Extract version from BASE_REQUIREMENTS
|
||||||
|
expected_version = None
|
||||||
|
for req in BASE_REQUIREMENTS:
|
||||||
|
if req.startswith("llama-stack=="):
|
||||||
|
expected_version = req.split("==")[1]
|
||||||
|
break
|
||||||
|
|
||||||
|
if expected_version and installed_version != expected_version:
|
||||||
|
print("Error: llama-stack version mismatch!")
|
||||||
|
print(f" Expected: {expected_version}")
|
||||||
|
print(f" Installed: {installed_version}")
|
||||||
|
print(
|
||||||
|
" If you just bumped the llama-stack version in BASE_REQUIREMENTS, you must update the version from .pre-commit-config.yaml"
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"Warning: Could not check llama-stack version: {e}")
|
||||||
|
print("Continuing without version validation...")
|
||||||
|
|
||||||
|
|
||||||
|
def get_dependencies():
|
||||||
|
"""Execute the llama stack build command and capture dependencies."""
|
||||||
|
cmd = "llama stack build --config redhat-distribution/build.yaml --print-deps-only"
|
||||||
|
try:
|
||||||
|
result = subprocess.run(cmd, shell=True, capture_output=True, text=True, check=True)
|
||||||
|
# Categorize and sort different types of pip install commands
|
||||||
|
standard_deps = []
|
||||||
|
torch_deps = []
|
||||||
|
no_deps = []
|
||||||
|
no_cache = []
|
||||||
|
|
||||||
|
for line in result.stdout.splitlines():
|
||||||
|
if line.strip().startswith("uv pip"):
|
||||||
|
# Split the line into command and packages
|
||||||
|
parts = line.replace("uv ", "RUN ", 1).split(" ", 3)
|
||||||
|
if len(parts) >= 4: # We have packages to sort
|
||||||
|
cmd_parts = parts[:3] # "RUN pip install"
|
||||||
|
packages = sorted(set(parts[3].split())) # Sort the package names and remove duplicates
|
||||||
|
|
||||||
|
# Determine command type and format accordingly
|
||||||
|
if "--index-url" in line:
|
||||||
|
full_cmd = " ".join(cmd_parts + [" ".join(packages)])
|
||||||
|
torch_deps.append(full_cmd)
|
||||||
|
elif "--no-deps" in line:
|
||||||
|
full_cmd = " ".join(cmd_parts + [" ".join(packages)])
|
||||||
|
no_deps.append(full_cmd)
|
||||||
|
elif "--no-cache" in line:
|
||||||
|
full_cmd = " ".join(cmd_parts + [" ".join(packages)])
|
||||||
|
no_cache.append(full_cmd)
|
||||||
|
else:
|
||||||
|
formatted_packages = " \\\n ".join(packages)
|
||||||
|
full_cmd = f"{' '.join(cmd_parts)} \\\n {formatted_packages}"
|
||||||
|
standard_deps.append(full_cmd)
|
||||||
|
else:
|
||||||
|
standard_deps.append(" ".join(parts))
|
||||||
|
|
||||||
|
# Combine all dependencies in specific order
|
||||||
|
all_deps = []
|
||||||
|
all_deps.extend(sorted(standard_deps)) # Regular pip installs first
|
||||||
|
all_deps.extend(sorted(torch_deps)) # PyTorch specific installs
|
||||||
|
all_deps.extend(sorted(no_deps)) # No-deps installs
|
||||||
|
all_deps.extend(sorted(no_cache)) # No-cache installs
|
||||||
|
|
||||||
|
return "\n".join(all_deps)
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"Error executing command: {e}")
|
||||||
|
print(f"Command output: {e.output}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_containerfile(dependencies):
|
||||||
|
"""Generate Containerfile from template with dependencies."""
|
||||||
|
template_path = Path("redhat-distribution/Containerfile.in")
|
||||||
|
output_path = Path("redhat-distribution/Containerfile")
|
||||||
|
|
||||||
|
if not template_path.exists():
|
||||||
|
print(f"Error: Template file {template_path} not found")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Read template
|
||||||
|
with open(template_path) as f:
|
||||||
|
template_content = f.read()
|
||||||
|
|
||||||
|
# Add warning message at the top
|
||||||
|
warning = "# WARNING: This file is auto-generated. Do not modify it manually.\n# Generated by: redhat-distribution/build.py\n\n"
|
||||||
|
|
||||||
|
# Process template using string formatting
|
||||||
|
containerfile_content = warning + template_content.format(dependencies=dependencies.rstrip())
|
||||||
|
|
||||||
|
# Write output
|
||||||
|
with open(output_path, "w") as f:
|
||||||
|
f.write(containerfile_content)
|
||||||
|
|
||||||
|
print(f"Successfully generated {output_path}")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("Checking llama installation...")
|
||||||
|
check_llama_installed()
|
||||||
|
|
||||||
|
print("Checking llama-stack version...")
|
||||||
|
check_llama_stack_version()
|
||||||
|
|
||||||
|
print("Getting dependencies...")
|
||||||
|
dependencies = get_dependencies()
|
||||||
|
|
||||||
|
print("Generating Containerfile...")
|
||||||
|
generate_containerfile(dependencies)
|
||||||
|
|
||||||
|
print("Done!")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
36
redhat-distribution/build.yaml
Normal file
36
redhat-distribution/build.yaml
Normal file
|
|
@ -0,0 +1,36 @@
|
||||||
|
version: '2'
|
||||||
|
distribution_spec:
|
||||||
|
description: Red Hat distribution of Llama Stack
|
||||||
|
providers:
|
||||||
|
inference:
|
||||||
|
- remote::vllm
|
||||||
|
- inline::sentence-transformers
|
||||||
|
vector_io:
|
||||||
|
- inline::milvus
|
||||||
|
safety:
|
||||||
|
- remote::trustyai_fms
|
||||||
|
agents:
|
||||||
|
- inline::meta-reference
|
||||||
|
eval:
|
||||||
|
- remote::trustyai_lmeval
|
||||||
|
datasetio:
|
||||||
|
- remote::huggingface
|
||||||
|
- inline::localfs
|
||||||
|
scoring:
|
||||||
|
- inline::basic
|
||||||
|
- inline::llm-as-judge
|
||||||
|
- inline::braintrust
|
||||||
|
telemetry:
|
||||||
|
- inline::meta-reference
|
||||||
|
tool_runtime:
|
||||||
|
- remote::brave-search
|
||||||
|
- remote::tavily-search
|
||||||
|
- inline::rag-runtime
|
||||||
|
- remote::model-context-protocol
|
||||||
|
container_image: registry.redhat.io/ubi9/python-311:9.6-1749631027
|
||||||
|
additional_pip_packages:
|
||||||
|
- aiosqlite
|
||||||
|
- sqlalchemy[asyncio]
|
||||||
|
image_type: container
|
||||||
|
image_name: llama-stack-rh
|
||||||
|
external_providers_dir: redhat-distribution/providers.d
|
||||||
|
|
@ -0,0 +1,7 @@
|
||||||
|
adapter:
|
||||||
|
adapter_type: trustyai_lmeval
|
||||||
|
pip_packages: ["kubernetes", "llama_stack_provider_lmeval==0.1.7"]
|
||||||
|
config_class: llama_stack_provider_lmeval.config.LMEvalEvalProviderConfig
|
||||||
|
module: llama_stack_provider_lmeval
|
||||||
|
api_dependencies: ["inference"]
|
||||||
|
optional_api_dependencies: []
|
||||||
|
|
@ -0,0 +1,7 @@
|
||||||
|
adapter:
|
||||||
|
adapter_type: trustyai_fms
|
||||||
|
pip_packages: ["llama_stack_provider_trustyai_fms==0.1.2"]
|
||||||
|
config_class: llama_stack_provider_trustyai_fms.config.FMSSafetyProviderConfig
|
||||||
|
module: llama_stack_provider_trustyai_fms
|
||||||
|
api_dependencies: ["safety"]
|
||||||
|
optional_api_dependencies: ["shields"]
|
||||||
138
redhat-distribution/run.yaml
Normal file
138
redhat-distribution/run.yaml
Normal file
|
|
@ -0,0 +1,138 @@
|
||||||
|
version: 2
|
||||||
|
image_name: rh
|
||||||
|
apis:
|
||||||
|
- agents
|
||||||
|
- datasetio
|
||||||
|
- eval
|
||||||
|
- inference
|
||||||
|
- safety
|
||||||
|
- scoring
|
||||||
|
- telemetry
|
||||||
|
- tool_runtime
|
||||||
|
- vector_io
|
||||||
|
providers:
|
||||||
|
inference:
|
||||||
|
- provider_id: vllm-inference
|
||||||
|
provider_type: remote::vllm
|
||||||
|
config:
|
||||||
|
url: ${env.VLLM_URL:=http://localhost:8000/v1}
|
||||||
|
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||||
|
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||||
|
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||||
|
- provider_id: sentence-transformers
|
||||||
|
provider_type: inline::sentence-transformers
|
||||||
|
config: {}
|
||||||
|
vector_io:
|
||||||
|
- provider_id: milvus
|
||||||
|
provider_type: inline::milvus
|
||||||
|
config:
|
||||||
|
db_path: /opt/app-root/src/.llama/distributions/rh/milvus.db
|
||||||
|
kvstore:
|
||||||
|
type: sqlite
|
||||||
|
namespace: null
|
||||||
|
db_path: /opt/app-root/src/.llama/distributions/rh/milvus_registry.db
|
||||||
|
safety:
|
||||||
|
- provider_id: trustyai_fms
|
||||||
|
provider_type: remote::trustyai_fms
|
||||||
|
config:
|
||||||
|
orchestrator_url: ${env.FMS_ORCHESTRATOR_URL:=}
|
||||||
|
ssl_cert_path: ${env.FMS_SSL_CERT_PATH:=}
|
||||||
|
shields: {}
|
||||||
|
agents:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
provider_type: inline::meta-reference
|
||||||
|
config:
|
||||||
|
persistence_store:
|
||||||
|
type: sqlite
|
||||||
|
namespace: null
|
||||||
|
db_path: /opt/app-root/src/.llama/distributions/rh/agents_store.db
|
||||||
|
responses_store:
|
||||||
|
type: sqlite
|
||||||
|
db_path: /opt/app-root/src/.llama/distributions/rh/responses_store.db
|
||||||
|
eval:
|
||||||
|
- provider_id: trustyai_lmeval
|
||||||
|
provider_type: remote::trustyai_lmeval
|
||||||
|
config:
|
||||||
|
use_k8s: True
|
||||||
|
base_url: ${env.VLLM_URL:=http://localhost:8000/v1}
|
||||||
|
datasetio:
|
||||||
|
- provider_id: huggingface
|
||||||
|
provider_type: remote::huggingface
|
||||||
|
config:
|
||||||
|
kvstore:
|
||||||
|
type: sqlite
|
||||||
|
namespace: null
|
||||||
|
db_path: /opt/app-root/src/.llama/distributions/rh/huggingface_datasetio.db
|
||||||
|
- provider_id: localfs
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config:
|
||||||
|
kvstore:
|
||||||
|
type: sqlite
|
||||||
|
namespace: null
|
||||||
|
db_path: /opt/app-root/src/.llama/distributions/rh/localfs_datasetio.db
|
||||||
|
scoring:
|
||||||
|
- provider_id: basic
|
||||||
|
provider_type: inline::basic
|
||||||
|
config: {}
|
||||||
|
- provider_id: llm-as-judge
|
||||||
|
provider_type: inline::llm-as-judge
|
||||||
|
config: {}
|
||||||
|
- provider_id: braintrust
|
||||||
|
provider_type: inline::braintrust
|
||||||
|
config:
|
||||||
|
openai_api_key: ${env.OPENAI_API_KEY:=}
|
||||||
|
telemetry:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
provider_type: inline::meta-reference
|
||||||
|
config:
|
||||||
|
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||||
|
sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
|
||||||
|
sqlite_db_path: /opt/app-root/src/.llama/distributions/rh/trace_store.db
|
||||||
|
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
||||||
|
tool_runtime:
|
||||||
|
- provider_id: brave-search
|
||||||
|
provider_type: remote::brave-search
|
||||||
|
config:
|
||||||
|
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
|
||||||
|
max_results: 3
|
||||||
|
- provider_id: tavily-search
|
||||||
|
provider_type: remote::tavily-search
|
||||||
|
config:
|
||||||
|
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
|
||||||
|
max_results: 3
|
||||||
|
- provider_id: rag-runtime
|
||||||
|
provider_type: inline::rag-runtime
|
||||||
|
config: {}
|
||||||
|
- provider_id: model-context-protocol
|
||||||
|
provider_type: remote::model-context-protocol
|
||||||
|
config: {}
|
||||||
|
metadata_store:
|
||||||
|
type: sqlite
|
||||||
|
db_path: /opt/app-root/src/.llama/distributions/rh/registry.db
|
||||||
|
inference_store:
|
||||||
|
type: sqlite
|
||||||
|
db_path: /opt/app-root/src/.llama/distributions/rh/inference_store.db
|
||||||
|
models:
|
||||||
|
- metadata: {}
|
||||||
|
model_id: ${env.INFERENCE_MODEL}
|
||||||
|
provider_id: vllm-inference
|
||||||
|
model_type: llm
|
||||||
|
- metadata:
|
||||||
|
embedding_dimension: 768
|
||||||
|
model_id: granite-embedding-125m
|
||||||
|
provider_id: sentence-transformers
|
||||||
|
provider_model_id: ibm-granite/granite-embedding-125m-english
|
||||||
|
model_type: embedding
|
||||||
|
shields: []
|
||||||
|
vector_dbs: []
|
||||||
|
datasets: []
|
||||||
|
scoring_fns: []
|
||||||
|
benchmarks: []
|
||||||
|
tool_groups:
|
||||||
|
- toolgroup_id: builtin::websearch
|
||||||
|
provider_id: tavily-search
|
||||||
|
- toolgroup_id: builtin::rag
|
||||||
|
provider_id: rag-runtime
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
|
external_providers_dir: /opt/app-root/src/.llama/providers.d
|
||||||
Loading…
Add table
Add a link
Reference in a new issue