forked from phoenix-oss/llama-stack-mirror
fix broken --list-templates with adding build.yaml files for packaging (#327)
* add build files to templates * fix templates * manifest * symlink * symlink * precommit * change everything to docker build.yaml * remove image_type in templates * fix build from templates CLI * fix readmes
This commit is contained in:
parent
afae4e3d8e
commit
07f9bf723f
32 changed files with 161 additions and 158 deletions
|
@ -1,4 +1,4 @@
|
|||
include requirements.txt
|
||||
include llama_stack/distribution/*.sh
|
||||
include llama_stack/cli/scripts/*.sh
|
||||
include distributions/*/build.yaml
|
||||
include llama_stack/templates/*/build.yaml
|
||||
|
|
|
@ -1,10 +0,0 @@
|
|||
name: bedrock
|
||||
distribution_spec:
|
||||
description: Use Amazon Bedrock APIs.
|
||||
providers:
|
||||
inference: remote::bedrock
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: conda
|
1
distributions/bedrock/build.yaml
Symbolic link
1
distributions/bedrock/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../llama_stack/templates/bedrock/build.yaml
|
|
@ -1,10 +0,0 @@
|
|||
name: databricks
|
||||
distribution_spec:
|
||||
description: Use Databricks for running LLM inference
|
||||
providers:
|
||||
inference: remote::databricks
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: conda
|
1
distributions/databricks/build.yaml
Symbolic link
1
distributions/databricks/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../llama_stack/templates/databricks/build.yaml
|
|
@ -49,7 +49,7 @@ inference:
|
|||
**Via Conda**
|
||||
|
||||
```bash
|
||||
llama stack build --config ./build.yaml
|
||||
llama stack build --template fireworks --image-type conda
|
||||
# -- modify run.yaml to a valid Fireworks server endpoint
|
||||
llama stack run ./run.yaml
|
||||
```
|
||||
|
|
|
@ -1,10 +0,0 @@
|
|||
name: fireworks
|
||||
distribution_spec:
|
||||
description: Use Fireworks.ai for running LLM inference
|
||||
providers:
|
||||
inference: remote::fireworks
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: docker
|
1
distributions/fireworks/build.yaml
Symbolic link
1
distributions/fireworks/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../llama_stack/templates/fireworks/build.yaml
|
|
@ -1,10 +0,0 @@
|
|||
name: hf-endpoint
|
||||
distribution_spec:
|
||||
description: "Like local, but use Hugging Face Inference Endpoints for running LLM inference.\nSee https://hf.co/docs/api-endpoints."
|
||||
providers:
|
||||
inference: remote::hf::endpoint
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: conda
|
1
distributions/hf-endpoint/build.yaml
Symbolic link
1
distributions/hf-endpoint/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../llama_stack/templates/hf-endpoint/build.yaml
|
|
@ -1,10 +0,0 @@
|
|||
name: hf-serverless
|
||||
distribution_spec:
|
||||
description: "Like local, but use Hugging Face Inference API (serverless) for running LLM inference.\nSee https://hf.co/docs/api-inference."
|
||||
providers:
|
||||
inference: remote::hf::serverless
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: conda
|
1
distributions/hf-serverless/build.yaml
Symbolic link
1
distributions/hf-serverless/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../llama_stack/templates/hf-serverless/build.yaml
|
|
@ -1,14 +0,0 @@
|
|||
name: meta-reference-gpu
|
||||
distribution_spec:
|
||||
docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
|
||||
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
||||
providers:
|
||||
inference: meta-reference
|
||||
memory:
|
||||
- meta-reference
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: docker
|
1
distributions/meta-reference-gpu/build.yaml
Symbolic link
1
distributions/meta-reference-gpu/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../llama_stack/templates/meta-reference-gpu/build.yaml
|
|
@ -1,14 +0,0 @@
|
|||
name: meta-reference-quantized-gpu
|
||||
distribution_spec:
|
||||
docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
|
||||
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
||||
providers:
|
||||
inference: meta-reference-quantized
|
||||
memory:
|
||||
- meta-reference
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: docker
|
1
distributions/meta-reference-quantized-gpu/build.yaml
Symbolic link
1
distributions/meta-reference-quantized-gpu/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../llama_stack/templates/meta-reference-quantized-gpu/build.yaml
|
|
@ -86,6 +86,6 @@ inference:
|
|||
**Via Conda**
|
||||
|
||||
```
|
||||
llama stack build --config ./build.yaml
|
||||
llama stack build --template ollama --image-type conda
|
||||
llama stack run ./gpu/run.yaml
|
||||
```
|
||||
|
|
|
@ -1,13 +0,0 @@
|
|||
name: ollama
|
||||
distribution_spec:
|
||||
description: Use ollama for running LLM inference
|
||||
providers:
|
||||
inference: remote::ollama
|
||||
memory:
|
||||
- meta-reference
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: docker
|
1
distributions/ollama/build.yaml
Symbolic link
1
distributions/ollama/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../llama_stack/templates/ollama/build.yaml
|
|
@ -88,7 +88,7 @@ inference:
|
|||
**Via Conda**
|
||||
|
||||
```bash
|
||||
llama stack build --config ./build.yaml
|
||||
llama stack build --template tgi --image-type conda
|
||||
# -- start a TGI server endpoint
|
||||
llama stack run ./gpu/run.yaml
|
||||
```
|
||||
|
|
|
@ -1,13 +0,0 @@
|
|||
name: tgi
|
||||
distribution_spec:
|
||||
description: Use TGI for running LLM inference
|
||||
providers:
|
||||
inference: remote::tgi
|
||||
memory:
|
||||
- meta-reference
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: docker
|
1
distributions/tgi/build.yaml
Symbolic link
1
distributions/tgi/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../llama_stack/templates/tgi/build.yaml
|
|
@ -62,7 +62,7 @@ memory:
|
|||
**Via Conda**
|
||||
|
||||
```bash
|
||||
llama stack build --config ./build.yaml
|
||||
llama stack build --template together --image-type conda
|
||||
# -- modify run.yaml to a valid Together server endpoint
|
||||
llama stack run ./run.yaml
|
||||
```
|
||||
|
|
|
@ -1,10 +0,0 @@
|
|||
name: together
|
||||
distribution_spec:
|
||||
description: Use Together.ai for running LLM inference
|
||||
providers:
|
||||
inference: remote::together
|
||||
memory: remote::weaviate
|
||||
safety: remote::together
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: docker
|
1
distributions/together/build.yaml
Symbolic link
1
distributions/together/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../llama_stack/templates/together/build.yaml
|
|
@ -1,10 +0,0 @@
|
|||
name: vllm
|
||||
distribution_spec:
|
||||
description: Like local, but use vLLM for running LLM inference
|
||||
providers:
|
||||
inference: vllm
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: conda
|
1
distributions/vllm/build.yaml
Symbolic link
1
distributions/vllm/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../llama_stack/templates/vllm/build.yaml
|
|
@ -279,11 +279,11 @@ llama stack build --list-templates
|
|||
You may then pick a template to build your distribution with providers fitted to your liking.
|
||||
|
||||
```
|
||||
llama stack build --template local-tgi --name my-tgi-stack
|
||||
llama stack build --template local-tgi --name my-tgi-stack --image-type conda
|
||||
```
|
||||
|
||||
```
|
||||
$ llama stack build --template local-tgi --name my-tgi-stack
|
||||
$ llama stack build --template local-tgi --name my-tgi-stack --image-type conda
|
||||
...
|
||||
...
|
||||
Build spec configuration saved at ~/.conda/envs/llamastack-my-tgi-stack/my-tgi-stack-build.yaml
|
||||
|
@ -293,10 +293,10 @@ You may now run `llama stack configure my-tgi-stack` or `llama stack configure ~
|
|||
#### Building from config file
|
||||
- In addition to templates, you may customize the build to your liking through editing config files and build from config files with the following command.
|
||||
|
||||
- The config file will be of contents like the ones in `llama_stack/distributions/templates/`.
|
||||
- The config file will be of contents like the ones in `llama_stack/templates/`.
|
||||
|
||||
```
|
||||
$ cat llama_stack/distribution/templates/local-ollama-build.yaml
|
||||
$ cat build.yaml
|
||||
|
||||
name: local-ollama
|
||||
distribution_spec:
|
||||
|
@ -311,7 +311,7 @@ image_type: conda
|
|||
```
|
||||
|
||||
```
|
||||
llama stack build --config llama_stack/distribution/templates/local-ollama-build.yaml
|
||||
llama stack build --config build.yaml
|
||||
```
|
||||
|
||||
#### How to build distribution with Docker image
|
||||
|
|
|
@ -35,11 +35,7 @@ You have two ways to start up Llama stack server:
|
|||
|
||||
1. **Starting up server via docker**:
|
||||
|
||||
We provide 2 pre-built Docker image of Llama Stack distribution, which can be found in the following links.
|
||||
- [llamastack-local-gpu](https://hub.docker.com/repository/docker/llamastack/llamastack-local-gpu/general)
|
||||
- This is a packaged version with our local meta-reference implementations, where you will be running inference locally with downloaded Llama model checkpoints.
|
||||
- [llamastack-local-cpu](https://hub.docker.com/repository/docker/llamastack/llamastack-local-cpu/general)
|
||||
- This is a lite version with remote inference where you can hook up to your favourite remote inference framework (e.g. ollama, fireworks, together, tgi) for running inference without GPU.
|
||||
We provide pre-built Docker image of Llama Stack distribution, which can be found in the following links in the [distributions](../distributions/) folder.
|
||||
|
||||
> [!NOTE]
|
||||
> For GPU inference, you need to set these environment variables for specifying local directory containing your model checkpoints, and enable GPU inference to start running docker container.
|
||||
|
|
|
@ -12,9 +12,7 @@ import os
|
|||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
|
||||
TEMPLATES_PATH = (
|
||||
Path(os.path.relpath(__file__)).parent.parent.parent.parent / "distributions"
|
||||
)
|
||||
TEMPLATES_PATH = Path(os.path.relpath(__file__)).parent.parent.parent / "templates"
|
||||
|
||||
|
||||
@lru_cache()
|
||||
|
@ -26,7 +24,6 @@ def available_templates_specs() -> List[BuildConfig]:
|
|||
with open(p, "r") as f:
|
||||
build_config = BuildConfig(**yaml.safe_load(f))
|
||||
template_specs.append(build_config)
|
||||
|
||||
return template_specs
|
||||
|
||||
|
||||
|
@ -99,19 +96,22 @@ class StackBuild(Subcommand):
|
|||
"You must specify a name for the build using --name when using a template"
|
||||
)
|
||||
return
|
||||
build_path = TEMPLATES_PATH / f"{args.template}-build.yaml"
|
||||
if not build_path.exists():
|
||||
self.parser.error(
|
||||
f"Could not find template {args.template}. Please run `llama stack build --list-templates` to check out the available templates"
|
||||
)
|
||||
return
|
||||
with open(build_path, "r") as f:
|
||||
build_config = BuildConfig(**yaml.safe_load(f))
|
||||
build_config.name = args.name
|
||||
if args.image_type:
|
||||
build_config.image_type = args.image_type
|
||||
self._run_stack_build_command_from_build_config(build_config)
|
||||
available_templates = available_templates_specs()
|
||||
for build_config in available_templates:
|
||||
if build_config.name == args.template:
|
||||
build_config.name = args.name
|
||||
if args.image_type:
|
||||
build_config.image_type = args.image_type
|
||||
else:
|
||||
self.parser.error(
|
||||
f"Please specify a image-type (docker | conda) for {args.template}"
|
||||
)
|
||||
self._run_stack_build_command_from_build_config(build_config)
|
||||
return
|
||||
|
||||
self.parser.error(
|
||||
f"Could not find template {args.template}. Please run `llama stack build --list-templates` to check out the available templates"
|
||||
)
|
||||
return
|
||||
|
||||
# try to see if we can find a pre-existing build config file through name
|
||||
|
|
|
@ -8,18 +8,19 @@ from enum import Enum
|
|||
from typing import List, Optional
|
||||
|
||||
import pkg_resources
|
||||
|
||||
from llama_stack.distribution.utils.exec import run_with_pty
|
||||
from pydantic import BaseModel
|
||||
|
||||
from termcolor import cprint
|
||||
|
||||
from llama_stack.distribution.utils.exec import run_with_pty
|
||||
|
||||
from llama_stack.distribution.datatypes import * # noqa: F403
|
||||
from pathlib import Path
|
||||
|
||||
from llama_stack.distribution.utils.config_dirs import BUILDS_BASE_DIR
|
||||
from llama_stack.distribution.distribution import get_provider_registry
|
||||
|
||||
from llama_stack.distribution.utils.config_dirs import BUILDS_BASE_DIR
|
||||
|
||||
|
||||
# These are the dependencies needed by the distribution server.
|
||||
# `llama-stack` is automatically installed by the installation script.
|
||||
|
|
|
@ -1,5 +1,11 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
if [[ $# -ne 1 ]]; then
|
||||
echo "Error: Please provide the name of CONDA environment you wish to create"
|
||||
exit 1
|
||||
|
|
9
llama_stack/templates/bedrock/build.yaml
Normal file
9
llama_stack/templates/bedrock/build.yaml
Normal file
|
@ -0,0 +1,9 @@
|
|||
name: bedrock
|
||||
distribution_spec:
|
||||
description: Use Amazon Bedrock APIs.
|
||||
providers:
|
||||
inference: remote::bedrock
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
9
llama_stack/templates/databricks/build.yaml
Normal file
9
llama_stack/templates/databricks/build.yaml
Normal file
|
@ -0,0 +1,9 @@
|
|||
name: databricks
|
||||
distribution_spec:
|
||||
description: Use Databricks for running LLM inference
|
||||
providers:
|
||||
inference: remote::databricks
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
9
llama_stack/templates/fireworks/build.yaml
Normal file
9
llama_stack/templates/fireworks/build.yaml
Normal file
|
@ -0,0 +1,9 @@
|
|||
name: fireworks
|
||||
distribution_spec:
|
||||
description: Use Fireworks.ai for running LLM inference
|
||||
providers:
|
||||
inference: remote::fireworks
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
9
llama_stack/templates/hf-endpoint/build.yaml
Normal file
9
llama_stack/templates/hf-endpoint/build.yaml
Normal file
|
@ -0,0 +1,9 @@
|
|||
name: hf-endpoint
|
||||
distribution_spec:
|
||||
description: "Like local, but use Hugging Face Inference Endpoints for running LLM inference.\nSee https://hf.co/docs/api-endpoints."
|
||||
providers:
|
||||
inference: remote::hf::endpoint
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
9
llama_stack/templates/hf-serverless/build.yaml
Normal file
9
llama_stack/templates/hf-serverless/build.yaml
Normal file
|
@ -0,0 +1,9 @@
|
|||
name: hf-serverless
|
||||
distribution_spec:
|
||||
description: "Like local, but use Hugging Face Inference API (serverless) for running LLM inference.\nSee https://hf.co/docs/api-inference."
|
||||
providers:
|
||||
inference: remote::hf::serverless
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
13
llama_stack/templates/meta-reference-gpu/build.yaml
Normal file
13
llama_stack/templates/meta-reference-gpu/build.yaml
Normal file
|
@ -0,0 +1,13 @@
|
|||
name: meta-reference-gpu
|
||||
distribution_spec:
|
||||
docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
|
||||
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
||||
providers:
|
||||
inference: meta-reference
|
||||
memory:
|
||||
- meta-reference
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
|
@ -0,0 +1,13 @@
|
|||
name: meta-reference-quantized-gpu
|
||||
distribution_spec:
|
||||
docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
|
||||
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
||||
providers:
|
||||
inference: meta-reference-quantized
|
||||
memory:
|
||||
- meta-reference
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
12
llama_stack/templates/ollama/build.yaml
Normal file
12
llama_stack/templates/ollama/build.yaml
Normal file
|
@ -0,0 +1,12 @@
|
|||
name: ollama
|
||||
distribution_spec:
|
||||
description: Use ollama for running LLM inference
|
||||
providers:
|
||||
inference: remote::ollama
|
||||
memory:
|
||||
- meta-reference
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
12
llama_stack/templates/tgi/build.yaml
Normal file
12
llama_stack/templates/tgi/build.yaml
Normal file
|
@ -0,0 +1,12 @@
|
|||
name: tgi
|
||||
distribution_spec:
|
||||
description: Use TGI for running LLM inference
|
||||
providers:
|
||||
inference: remote::tgi
|
||||
memory:
|
||||
- meta-reference
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
9
llama_stack/templates/together/build.yaml
Normal file
9
llama_stack/templates/together/build.yaml
Normal file
|
@ -0,0 +1,9 @@
|
|||
name: together
|
||||
distribution_spec:
|
||||
description: Use Together.ai for running LLM inference
|
||||
providers:
|
||||
inference: remote::together
|
||||
memory: remote::weaviate
|
||||
safety: remote::together
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
9
llama_stack/templates/vllm/build.yaml
Normal file
9
llama_stack/templates/vllm/build.yaml
Normal file
|
@ -0,0 +1,9 @@
|
|||
name: vllm
|
||||
distribution_spec:
|
||||
description: Like local, but use vLLM for running LLM inference
|
||||
providers:
|
||||
inference: vllm
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
Loading…
Add table
Add a link
Reference in a new issue