mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-17 15:52:39 +00:00
RHAIENG-565: purge the midstream repo content to only host the build artifacts, so only the redhat-distribution should remain
This commit is contained in:
parent
5d65c017b0
commit
9803329350
8 changed files with 448 additions and 0 deletions
48
redhat-distribution/Containerfile
Normal file
48
redhat-distribution/Containerfile
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
# WARNING: This file is auto-generated. Do not modify it manually.
|
||||
# Generated by: redhat-distribution/build.py
|
||||
|
||||
FROM registry.access.redhat.com/ubi9/python-312@sha256:95ec8d3ee9f875da011639213fd254256c29bc58861ac0b11f290a291fa04435
|
||||
WORKDIR /opt/app-root
|
||||
|
||||
RUN pip install sqlalchemy # somehow sqlalchemy[asyncio] is not sufficient
|
||||
RUN pip install \
|
||||
aiosqlite \
|
||||
autoevals \
|
||||
chardet \
|
||||
datasets \
|
||||
fastapi \
|
||||
fire \
|
||||
httpx \
|
||||
kubernetes \
|
||||
llama_stack_provider_lmeval==0.1.7 \
|
||||
llama_stack_provider_trustyai_fms==0.1.2 \
|
||||
matplotlib \
|
||||
mcp \
|
||||
nltk \
|
||||
numpy \
|
||||
openai \
|
||||
opentelemetry-exporter-otlp-proto-http \
|
||||
opentelemetry-sdk \
|
||||
pandas \
|
||||
pillow \
|
||||
psycopg2-binary \
|
||||
pymilvus>=2.4.10 \
|
||||
pymongo \
|
||||
pypdf \
|
||||
redis \
|
||||
requests \
|
||||
scikit-learn \
|
||||
scipy \
|
||||
sentencepiece \
|
||||
sqlalchemy[asyncio] \
|
||||
tqdm \
|
||||
transformers \
|
||||
uvicorn
|
||||
RUN pip install --index-url https://download.pytorch.org/whl/cpu torch torchvision
|
||||
RUN pip install --no-deps sentence-transformers
|
||||
RUN pip install --no-cache llama-stack==0.2.14
|
||||
RUN mkdir -p ${HOME}/.llama/providers.d ${HOME}/.cache
|
||||
COPY redhat-distribution/run.yaml ${APP_ROOT}/run.yaml
|
||||
COPY redhat-distribution/providers.d/ ${HOME}/.llama/providers.d/
|
||||
|
||||
ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--config", "/opt/app-root/run.yaml"]
|
||||
11
redhat-distribution/Containerfile.in
Normal file
11
redhat-distribution/Containerfile.in
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
FROM registry.access.redhat.com/ubi9/python-312@sha256:95ec8d3ee9f875da011639213fd254256c29bc58861ac0b11f290a291fa04435
|
||||
WORKDIR /opt/app-root
|
||||
|
||||
RUN pip install sqlalchemy # somehow sqlalchemy[asyncio] is not sufficient
|
||||
{dependencies}
|
||||
RUN pip install --no-cache llama-stack==0.2.14
|
||||
RUN mkdir -p ${{HOME}}/.llama/providers.d ${{HOME}}/.cache
|
||||
COPY redhat-distribution/run.yaml ${{APP_ROOT}}/run.yaml
|
||||
COPY redhat-distribution/providers.d/ ${{HOME}}/.llama/providers.d/
|
||||
|
||||
ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--config", "/opt/app-root/run.yaml"]
|
||||
48
redhat-distribution/README.md
Normal file
48
redhat-distribution/README.md
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
# Red Hat Distribution Build Instructions
|
||||
|
||||
This directory contains the necessary files to build a Red Hat compatible container image for the llama-stack.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Python >=3.11
|
||||
- `llama` CLI tool installed: `pip install llama-stack`
|
||||
- Podman or Docker installed
|
||||
|
||||
## Generating the Containerfile
|
||||
|
||||
The Containerfile is auto-generated from a template. To generate it:
|
||||
|
||||
1. Make sure you have the `llama` CLI tool installed
|
||||
2. Run the build script from root of this git repo:
|
||||
```bash
|
||||
./redhat-distribution/build.py
|
||||
```
|
||||
|
||||
This will:
|
||||
- Check for the llama CLI installation
|
||||
- Generate dependencies using `llama stack build`
|
||||
- Create a new `Containerfile` with the required dependencies
|
||||
|
||||
## Editing the Containerfile
|
||||
|
||||
The Containerfile is auto-generated from a template. To edit it, you can modify the template in `redhat-distribution/Containerfile.in` and run the build script again.
|
||||
NEVER edit the generated `Containerfile` manually.
|
||||
|
||||
## Building the Container Image
|
||||
|
||||
Once the Containerfile is generated, you can build the image using either Podman or Docker:
|
||||
|
||||
### Using Podman build image for x86_64
|
||||
```bash
|
||||
podman build --platform linux/amd64 -f redhat-distribution/Containerfile -t rh .
|
||||
```
|
||||
|
||||
## Notes
|
||||
|
||||
- The generated Containerfile should not be modified manually as it will be overwritten the next time you run the build script
|
||||
|
||||
## Push the image to a registry
|
||||
|
||||
```bash
|
||||
podman push <build-ID> quay.io/opendatahub/llama-stack:rh-distribution
|
||||
```
|
||||
153
redhat-distribution/build.py
Executable file
153
redhat-distribution/build.py
Executable file
|
|
@ -0,0 +1,153 @@
|
|||
#!/usr/bin/env python3
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
# Usage: ./redhat-distribution/build.py
|
||||
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
BASE_REQUIREMENTS = [
|
||||
"llama-stack==0.2.14",
|
||||
]
|
||||
|
||||
|
||||
def check_llama_installed():
|
||||
"""Check if llama binary is installed and accessible."""
|
||||
if not shutil.which("llama"):
|
||||
print("Error: llama binary not found. Please install it first.")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def check_llama_stack_version():
|
||||
"""Check if the llama-stack version in BASE_REQUIREMENTS matches the installed version."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["llama stack --version"],
|
||||
shell=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
installed_version = result.stdout.strip()
|
||||
|
||||
# Extract version from BASE_REQUIREMENTS
|
||||
expected_version = None
|
||||
for req in BASE_REQUIREMENTS:
|
||||
if req.startswith("llama-stack=="):
|
||||
expected_version = req.split("==")[1]
|
||||
break
|
||||
|
||||
if expected_version and installed_version != expected_version:
|
||||
print("Error: llama-stack version mismatch!")
|
||||
print(f" Expected: {expected_version}")
|
||||
print(f" Installed: {installed_version}")
|
||||
print(
|
||||
" If you just bumped the llama-stack version in BASE_REQUIREMENTS, you must update the version from .pre-commit-config.yaml"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Warning: Could not check llama-stack version: {e}")
|
||||
print("Continuing without version validation...")
|
||||
|
||||
|
||||
def get_dependencies():
|
||||
"""Execute the llama stack build command and capture dependencies."""
|
||||
cmd = "llama stack build --config redhat-distribution/build.yaml --print-deps-only"
|
||||
try:
|
||||
result = subprocess.run(cmd, shell=True, capture_output=True, text=True, check=True)
|
||||
# Categorize and sort different types of pip install commands
|
||||
standard_deps = []
|
||||
torch_deps = []
|
||||
no_deps = []
|
||||
no_cache = []
|
||||
|
||||
for line in result.stdout.splitlines():
|
||||
if line.strip().startswith("uv pip"):
|
||||
# Split the line into command and packages
|
||||
parts = line.replace("uv ", "RUN ", 1).split(" ", 3)
|
||||
if len(parts) >= 4: # We have packages to sort
|
||||
cmd_parts = parts[:3] # "RUN pip install"
|
||||
packages = sorted(set(parts[3].split())) # Sort the package names and remove duplicates
|
||||
|
||||
# Determine command type and format accordingly
|
||||
if "--index-url" in line:
|
||||
full_cmd = " ".join(cmd_parts + [" ".join(packages)])
|
||||
torch_deps.append(full_cmd)
|
||||
elif "--no-deps" in line:
|
||||
full_cmd = " ".join(cmd_parts + [" ".join(packages)])
|
||||
no_deps.append(full_cmd)
|
||||
elif "--no-cache" in line:
|
||||
full_cmd = " ".join(cmd_parts + [" ".join(packages)])
|
||||
no_cache.append(full_cmd)
|
||||
else:
|
||||
formatted_packages = " \\\n ".join(packages)
|
||||
full_cmd = f"{' '.join(cmd_parts)} \\\n {formatted_packages}"
|
||||
standard_deps.append(full_cmd)
|
||||
else:
|
||||
standard_deps.append(" ".join(parts))
|
||||
|
||||
# Combine all dependencies in specific order
|
||||
all_deps = []
|
||||
all_deps.extend(sorted(standard_deps)) # Regular pip installs first
|
||||
all_deps.extend(sorted(torch_deps)) # PyTorch specific installs
|
||||
all_deps.extend(sorted(no_deps)) # No-deps installs
|
||||
all_deps.extend(sorted(no_cache)) # No-cache installs
|
||||
|
||||
return "\n".join(all_deps)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error executing command: {e}")
|
||||
print(f"Command output: {e.output}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def generate_containerfile(dependencies):
|
||||
"""Generate Containerfile from template with dependencies."""
|
||||
template_path = Path("redhat-distribution/Containerfile.in")
|
||||
output_path = Path("redhat-distribution/Containerfile")
|
||||
|
||||
if not template_path.exists():
|
||||
print(f"Error: Template file {template_path} not found")
|
||||
sys.exit(1)
|
||||
|
||||
# Read template
|
||||
with open(template_path) as f:
|
||||
template_content = f.read()
|
||||
|
||||
# Add warning message at the top
|
||||
warning = "# WARNING: This file is auto-generated. Do not modify it manually.\n# Generated by: redhat-distribution/build.py\n\n"
|
||||
|
||||
# Process template using string formatting
|
||||
containerfile_content = warning + template_content.format(dependencies=dependencies.rstrip())
|
||||
|
||||
# Write output
|
||||
with open(output_path, "w") as f:
|
||||
f.write(containerfile_content)
|
||||
|
||||
print(f"Successfully generated {output_path}")
|
||||
|
||||
|
||||
def main():
|
||||
print("Checking llama installation...")
|
||||
check_llama_installed()
|
||||
|
||||
print("Checking llama-stack version...")
|
||||
check_llama_stack_version()
|
||||
|
||||
print("Getting dependencies...")
|
||||
dependencies = get_dependencies()
|
||||
|
||||
print("Generating Containerfile...")
|
||||
generate_containerfile(dependencies)
|
||||
|
||||
print("Done!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
36
redhat-distribution/build.yaml
Normal file
36
redhat-distribution/build.yaml
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
version: '2'
|
||||
distribution_spec:
|
||||
description: Red Hat distribution of Llama Stack
|
||||
providers:
|
||||
inference:
|
||||
- remote::vllm
|
||||
- inline::sentence-transformers
|
||||
vector_io:
|
||||
- inline::milvus
|
||||
safety:
|
||||
- remote::trustyai_fms
|
||||
agents:
|
||||
- inline::meta-reference
|
||||
eval:
|
||||
- remote::trustyai_lmeval
|
||||
datasetio:
|
||||
- remote::huggingface
|
||||
- inline::localfs
|
||||
scoring:
|
||||
- inline::basic
|
||||
- inline::llm-as-judge
|
||||
- inline::braintrust
|
||||
telemetry:
|
||||
- inline::meta-reference
|
||||
tool_runtime:
|
||||
- remote::brave-search
|
||||
- remote::tavily-search
|
||||
- inline::rag-runtime
|
||||
- remote::model-context-protocol
|
||||
container_image: registry.redhat.io/ubi9/python-311:9.6-1749631027
|
||||
additional_pip_packages:
|
||||
- aiosqlite
|
||||
- sqlalchemy[asyncio]
|
||||
image_type: container
|
||||
image_name: llama-stack-rh
|
||||
external_providers_dir: redhat-distribution/providers.d
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
adapter:
|
||||
adapter_type: trustyai_lmeval
|
||||
pip_packages: ["kubernetes", "llama_stack_provider_lmeval==0.1.7"]
|
||||
config_class: llama_stack_provider_lmeval.config.LMEvalEvalProviderConfig
|
||||
module: llama_stack_provider_lmeval
|
||||
api_dependencies: ["inference"]
|
||||
optional_api_dependencies: []
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
adapter:
|
||||
adapter_type: trustyai_fms
|
||||
pip_packages: ["llama_stack_provider_trustyai_fms==0.1.2"]
|
||||
config_class: llama_stack_provider_trustyai_fms.config.FMSSafetyProviderConfig
|
||||
module: llama_stack_provider_trustyai_fms
|
||||
api_dependencies: ["safety"]
|
||||
optional_api_dependencies: ["shields"]
|
||||
138
redhat-distribution/run.yaml
Normal file
138
redhat-distribution/run.yaml
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
version: 2
|
||||
image_name: rh
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: vllm-inference
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: ${env.VLLM_URL:=http://localhost:8000/v1}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
vector_io:
|
||||
- provider_id: milvus
|
||||
provider_type: inline::milvus
|
||||
config:
|
||||
db_path: /opt/app-root/src/.llama/distributions/rh/milvus.db
|
||||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: /opt/app-root/src/.llama/distributions/rh/milvus_registry.db
|
||||
safety:
|
||||
- provider_id: trustyai_fms
|
||||
provider_type: remote::trustyai_fms
|
||||
config:
|
||||
orchestrator_url: ${env.FMS_ORCHESTRATOR_URL:=}
|
||||
ssl_cert_path: ${env.FMS_SSL_CERT_PATH:=}
|
||||
shields: {}
|
||||
agents:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
persistence_store:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: /opt/app-root/src/.llama/distributions/rh/agents_store.db
|
||||
responses_store:
|
||||
type: sqlite
|
||||
db_path: /opt/app-root/src/.llama/distributions/rh/responses_store.db
|
||||
eval:
|
||||
- provider_id: trustyai_lmeval
|
||||
provider_type: remote::trustyai_lmeval
|
||||
config:
|
||||
use_k8s: True
|
||||
base_url: ${env.VLLM_URL:=http://localhost:8000/v1}
|
||||
datasetio:
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: /opt/app-root/src/.llama/distributions/rh/huggingface_datasetio.db
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: /opt/app-root/src/.llama/distributions/rh/localfs_datasetio.db
|
||||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
config: {}
|
||||
- provider_id: llm-as-judge
|
||||
provider_type: inline::llm-as-judge
|
||||
config: {}
|
||||
- provider_id: braintrust
|
||||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:=}
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
|
||||
sqlite_db_path: /opt/app-root/src/.llama/distributions/rh/trace_store.db
|
||||
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
|
||||
max_results: 3
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: /opt/app-root/src/.llama/distributions/rh/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: /opt/app-root/src/.llama/distributions/rh/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: vllm-inference
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 768
|
||||
model_id: granite-embedding-125m
|
||||
provider_id: sentence-transformers
|
||||
provider_model_id: ibm-granite/granite-embedding-125m-english
|
||||
model_type: embedding
|
||||
shields: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
external_providers_dir: /opt/app-root/src/.llama/providers.d
|
||||
Loading…
Add table
Add a link
Reference in a new issue