Add a pre-commit for distro_codegen but it does not work yet

This commit is contained in:
Ashwin Bharambe 2024-11-18 15:20:49 -08:00
parent 57a9b4d57f
commit 3aedde2ab4
8 changed files with 66 additions and 4 deletions

View file

@ -57,3 +57,17 @@ repos:
# hooks: # hooks:
# - id: markdown-link-check # - id: markdown-link-check
# args: ['--quiet'] # args: ['--quiet']
# - repo: local
# hooks:
# - id: distro-codegen
# name: Distribution Template Codegen
# additional_dependencies:
# - rich
# - pydantic
# entry: python -m llama_stack.scripts.distro_codegen
# language: python
# pass_filenames: false
# require_serial: true
# files: ^llama_stack/templates/.*$
# stages: [manual]

View file

@ -13,6 +13,15 @@ The `llamastack/distribution-meta-reference-gpu` distribution consists of the fo
Note that you need access to nvidia GPUs to run this distribution. This distribution is not compatible with CPU-only machines or machines with AMD GPUs. Note that you need access to nvidia GPUs to run this distribution. This distribution is not compatible with CPU-only machines or machines with AMD GPUs.
### Environment Variables
The following environment variables can be configured:
- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
- `INFERENCE_MODEL`: Inference model loaded into the Meta Reference server (default: `meta-llama/Llama-3.2-3B-Instruct`)
- `INFERENCE_CHECKPOINT_DIR`: Directory containing the Meta Reference model checkpoint (default: `null`)
- `SAFETY_MODEL`: Name of the safety (Llama-Guard) model to use (default: `meta-llama/Llama-Guard-3-1B`)
- `SAFETY_CHECKPOINT_DIR`: Directory containing the Llama-Guard model checkpoint (default: `null`)
## Prerequisite: Downloading Models ## Prerequisite: Downloading Models

View file

@ -11,7 +11,15 @@ The `llamastack/distribution-ollama` distribution consists of the following prov
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |
You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration. You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.### Environment Variables
The following environment variables can be configured:
- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
- `OLLAMA_URL`: URL of the Ollama server (default: `http://127.0.0.1:11434`)
- `INFERENCE_MODEL`: Inference model loaded into the Ollama server (default: `meta-llama/Llama-3.2-3B-Instruct`)
- `SAFETY_MODEL`: Safety model loaded into the Ollama server (default: `meta-llama/Llama-Guard-3-1B`)
## Setting up Ollama server ## Setting up Ollama server

View file

@ -13,6 +13,16 @@ The `llamastack/distribution-remote-vllm` distribution consists of the following
You can use this distribution if you have GPUs and want to run an independent vLLM server container for running inference. You can use this distribution if you have GPUs and want to run an independent vLLM server container for running inference.
### Environment Variables
The following environment variables can be configured:
- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
- `INFERENCE_MODEL`: Inference model loaded into the vLLM server (default: `meta-llama/Llama-3.2-3B-Instruct`)
- `VLLM_URL`: URL of the vLLM server with the main inference model (default: `http://host.docker.internal:5100}/v1`)
- `MAX_TOKENS`: Maximum number of tokens for generation (default: `4096`)
- `SAFETY_VLLM_URL`: URL of the vLLM server with the safety model (default: `http://host.docker.internal:5101/v1`)
- `SAFETY_MODEL`: Name of the safety (Llama-Guard) model to use (default: `meta-llama/Llama-Guard-3-1B`)
## Setting up vLLM server ## Setting up vLLM server

View file

@ -6,6 +6,8 @@
import concurrent.futures import concurrent.futures
import importlib import importlib
import subprocess
import sys
from functools import partial from functools import partial
from pathlib import Path from pathlib import Path
from typing import Iterator from typing import Iterator
@ -55,6 +57,16 @@ def process_template(template_dir: Path, progress) -> None:
raise e raise e
def check_for_changes() -> bool:
"""Check if there are any uncommitted changes."""
result = subprocess.run(
["git", "diff", "--exit-code"],
cwd=REPO_ROOT,
capture_output=True,
)
return result.returncode != 0
def main(): def main():
templates_dir = REPO_ROOT / "llama_stack" / "templates" templates_dir = REPO_ROOT / "llama_stack" / "templates"
@ -76,6 +88,15 @@ def main():
list(executor.map(process_func, template_dirs)) list(executor.map(process_func, template_dirs))
progress.update(task, advance=len(template_dirs)) progress.update(task, advance=len(template_dirs))
if check_for_changes():
print(
"Distribution template changes detected. Please commit the changes.",
file=sys.stderr,
)
sys.exit(1)
sys.exit(0)
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View file

@ -75,7 +75,7 @@ def get_distribution_template() -> DistributionTemplate:
default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")], default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
), ),
}, },
docker_compose_env_vars={ run_config_env_vars={
"LLAMASTACK_PORT": ( "LLAMASTACK_PORT": (
"5001", "5001",
"Port for the Llama Stack distribution server", "Port for the Llama Stack distribution server",

View file

@ -63,7 +63,7 @@ def get_distribution_template() -> DistributionTemplate:
default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")], default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
), ),
}, },
docker_compose_env_vars={ run_config_env_vars={
"LLAMASTACK_PORT": ( "LLAMASTACK_PORT": (
"5001", "5001",
"Port for the Llama Stack distribution server", "Port for the Llama Stack distribution server",

View file

@ -71,7 +71,7 @@ def get_distribution_template() -> DistributionTemplate:
default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")], default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
), ),
}, },
docker_compose_env_vars={ run_config_env_vars={
"LLAMASTACK_PORT": ( "LLAMASTACK_PORT": (
"5001", "5001",
"Port for the Llama Stack distribution server", "Port for the Llama Stack distribution server",