forked from phoenix-oss/llama-stack-mirror
fix broken --list-templates with adding build.yaml files for packaging (#327)
* add build files to templates * fix templates * manifest * symlink * symlink * precommit * change everything to docker build.yaml * remove image_type in templates * fix build from templates CLI * fix readmes
This commit is contained in:
parent
afae4e3d8e
commit
07f9bf723f
32 changed files with 161 additions and 158 deletions
|
@ -1,4 +1,4 @@
|
||||||
include requirements.txt
|
include requirements.txt
|
||||||
include llama_stack/distribution/*.sh
|
include llama_stack/distribution/*.sh
|
||||||
include llama_stack/cli/scripts/*.sh
|
include llama_stack/cli/scripts/*.sh
|
||||||
include distributions/*/build.yaml
|
include llama_stack/templates/*/build.yaml
|
||||||
|
|
|
@ -1,10 +0,0 @@
|
||||||
name: bedrock
|
|
||||||
distribution_spec:
|
|
||||||
description: Use Amazon Bedrock APIs.
|
|
||||||
providers:
|
|
||||||
inference: remote::bedrock
|
|
||||||
memory: meta-reference
|
|
||||||
safety: meta-reference
|
|
||||||
agents: meta-reference
|
|
||||||
telemetry: meta-reference
|
|
||||||
image_type: conda
|
|
1
distributions/bedrock/build.yaml
Symbolic link
1
distributions/bedrock/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../llama_stack/templates/bedrock/build.yaml
|
|
@ -1,10 +0,0 @@
|
||||||
name: databricks
|
|
||||||
distribution_spec:
|
|
||||||
description: Use Databricks for running LLM inference
|
|
||||||
providers:
|
|
||||||
inference: remote::databricks
|
|
||||||
memory: meta-reference
|
|
||||||
safety: meta-reference
|
|
||||||
agents: meta-reference
|
|
||||||
telemetry: meta-reference
|
|
||||||
image_type: conda
|
|
1
distributions/databricks/build.yaml
Symbolic link
1
distributions/databricks/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../llama_stack/templates/databricks/build.yaml
|
|
@ -49,7 +49,7 @@ inference:
|
||||||
**Via Conda**
|
**Via Conda**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --config ./build.yaml
|
llama stack build --template fireworks --image-type conda
|
||||||
# -- modify run.yaml to a valid Fireworks server endpoint
|
# -- modify run.yaml to a valid Fireworks server endpoint
|
||||||
llama stack run ./run.yaml
|
llama stack run ./run.yaml
|
||||||
```
|
```
|
||||||
|
|
|
@ -1,10 +0,0 @@
|
||||||
name: fireworks
|
|
||||||
distribution_spec:
|
|
||||||
description: Use Fireworks.ai for running LLM inference
|
|
||||||
providers:
|
|
||||||
inference: remote::fireworks
|
|
||||||
memory: meta-reference
|
|
||||||
safety: meta-reference
|
|
||||||
agents: meta-reference
|
|
||||||
telemetry: meta-reference
|
|
||||||
image_type: docker
|
|
1
distributions/fireworks/build.yaml
Symbolic link
1
distributions/fireworks/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../llama_stack/templates/fireworks/build.yaml
|
|
@ -1,10 +0,0 @@
|
||||||
name: hf-endpoint
|
|
||||||
distribution_spec:
|
|
||||||
description: "Like local, but use Hugging Face Inference Endpoints for running LLM inference.\nSee https://hf.co/docs/api-endpoints."
|
|
||||||
providers:
|
|
||||||
inference: remote::hf::endpoint
|
|
||||||
memory: meta-reference
|
|
||||||
safety: meta-reference
|
|
||||||
agents: meta-reference
|
|
||||||
telemetry: meta-reference
|
|
||||||
image_type: conda
|
|
1
distributions/hf-endpoint/build.yaml
Symbolic link
1
distributions/hf-endpoint/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../llama_stack/templates/hf-endpoint/build.yaml
|
|
@ -1,10 +0,0 @@
|
||||||
name: hf-serverless
|
|
||||||
distribution_spec:
|
|
||||||
description: "Like local, but use Hugging Face Inference API (serverless) for running LLM inference.\nSee https://hf.co/docs/api-inference."
|
|
||||||
providers:
|
|
||||||
inference: remote::hf::serverless
|
|
||||||
memory: meta-reference
|
|
||||||
safety: meta-reference
|
|
||||||
agents: meta-reference
|
|
||||||
telemetry: meta-reference
|
|
||||||
image_type: conda
|
|
1
distributions/hf-serverless/build.yaml
Symbolic link
1
distributions/hf-serverless/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../llama_stack/templates/hf-serverless/build.yaml
|
|
@ -1,14 +0,0 @@
|
||||||
name: meta-reference-gpu
|
|
||||||
distribution_spec:
|
|
||||||
docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
|
|
||||||
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
|
||||||
providers:
|
|
||||||
inference: meta-reference
|
|
||||||
memory:
|
|
||||||
- meta-reference
|
|
||||||
- remote::chromadb
|
|
||||||
- remote::pgvector
|
|
||||||
safety: meta-reference
|
|
||||||
agents: meta-reference
|
|
||||||
telemetry: meta-reference
|
|
||||||
image_type: docker
|
|
1
distributions/meta-reference-gpu/build.yaml
Symbolic link
1
distributions/meta-reference-gpu/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../llama_stack/templates/meta-reference-gpu/build.yaml
|
|
@ -1,14 +0,0 @@
|
||||||
name: meta-reference-quantized-gpu
|
|
||||||
distribution_spec:
|
|
||||||
docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
|
|
||||||
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
|
||||||
providers:
|
|
||||||
inference: meta-reference-quantized
|
|
||||||
memory:
|
|
||||||
- meta-reference
|
|
||||||
- remote::chromadb
|
|
||||||
- remote::pgvector
|
|
||||||
safety: meta-reference
|
|
||||||
agents: meta-reference
|
|
||||||
telemetry: meta-reference
|
|
||||||
image_type: docker
|
|
1
distributions/meta-reference-quantized-gpu/build.yaml
Symbolic link
1
distributions/meta-reference-quantized-gpu/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../llama_stack/templates/meta-reference-quantized-gpu/build.yaml
|
|
@ -86,6 +86,6 @@ inference:
|
||||||
**Via Conda**
|
**Via Conda**
|
||||||
|
|
||||||
```
|
```
|
||||||
llama stack build --config ./build.yaml
|
llama stack build --template ollama --image-type conda
|
||||||
llama stack run ./gpu/run.yaml
|
llama stack run ./gpu/run.yaml
|
||||||
```
|
```
|
||||||
|
|
|
@ -1,13 +0,0 @@
|
||||||
name: ollama
|
|
||||||
distribution_spec:
|
|
||||||
description: Use ollama for running LLM inference
|
|
||||||
providers:
|
|
||||||
inference: remote::ollama
|
|
||||||
memory:
|
|
||||||
- meta-reference
|
|
||||||
- remote::chromadb
|
|
||||||
- remote::pgvector
|
|
||||||
safety: meta-reference
|
|
||||||
agents: meta-reference
|
|
||||||
telemetry: meta-reference
|
|
||||||
image_type: docker
|
|
1
distributions/ollama/build.yaml
Symbolic link
1
distributions/ollama/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../llama_stack/templates/ollama/build.yaml
|
|
@ -88,7 +88,7 @@ inference:
|
||||||
**Via Conda**
|
**Via Conda**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --config ./build.yaml
|
llama stack build --template tgi --image-type conda
|
||||||
# -- start a TGI server endpoint
|
# -- start a TGI server endpoint
|
||||||
llama stack run ./gpu/run.yaml
|
llama stack run ./gpu/run.yaml
|
||||||
```
|
```
|
||||||
|
|
|
@ -1,13 +0,0 @@
|
||||||
name: tgi
|
|
||||||
distribution_spec:
|
|
||||||
description: Use TGI for running LLM inference
|
|
||||||
providers:
|
|
||||||
inference: remote::tgi
|
|
||||||
memory:
|
|
||||||
- meta-reference
|
|
||||||
- remote::chromadb
|
|
||||||
- remote::pgvector
|
|
||||||
safety: meta-reference
|
|
||||||
agents: meta-reference
|
|
||||||
telemetry: meta-reference
|
|
||||||
image_type: docker
|
|
1
distributions/tgi/build.yaml
Symbolic link
1
distributions/tgi/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../llama_stack/templates/tgi/build.yaml
|
|
@ -62,7 +62,7 @@ memory:
|
||||||
**Via Conda**
|
**Via Conda**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --config ./build.yaml
|
llama stack build --template together --image-type conda
|
||||||
# -- modify run.yaml to a valid Together server endpoint
|
# -- modify run.yaml to a valid Together server endpoint
|
||||||
llama stack run ./run.yaml
|
llama stack run ./run.yaml
|
||||||
```
|
```
|
||||||
|
|
|
@ -1,10 +0,0 @@
|
||||||
name: together
|
|
||||||
distribution_spec:
|
|
||||||
description: Use Together.ai for running LLM inference
|
|
||||||
providers:
|
|
||||||
inference: remote::together
|
|
||||||
memory: remote::weaviate
|
|
||||||
safety: remote::together
|
|
||||||
agents: meta-reference
|
|
||||||
telemetry: meta-reference
|
|
||||||
image_type: docker
|
|
1
distributions/together/build.yaml
Symbolic link
1
distributions/together/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../llama_stack/templates/together/build.yaml
|
|
@ -1,10 +0,0 @@
|
||||||
name: vllm
|
|
||||||
distribution_spec:
|
|
||||||
description: Like local, but use vLLM for running LLM inference
|
|
||||||
providers:
|
|
||||||
inference: vllm
|
|
||||||
memory: meta-reference
|
|
||||||
safety: meta-reference
|
|
||||||
agents: meta-reference
|
|
||||||
telemetry: meta-reference
|
|
||||||
image_type: conda
|
|
1
distributions/vllm/build.yaml
Symbolic link
1
distributions/vllm/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../llama_stack/templates/vllm/build.yaml
|
|
@ -279,11 +279,11 @@ llama stack build --list-templates
|
||||||
You may then pick a template to build your distribution with providers fitted to your liking.
|
You may then pick a template to build your distribution with providers fitted to your liking.
|
||||||
|
|
||||||
```
|
```
|
||||||
llama stack build --template local-tgi --name my-tgi-stack
|
llama stack build --template local-tgi --name my-tgi-stack --image-type conda
|
||||||
```
|
```
|
||||||
|
|
||||||
```
|
```
|
||||||
$ llama stack build --template local-tgi --name my-tgi-stack
|
$ llama stack build --template local-tgi --name my-tgi-stack --image-type conda
|
||||||
...
|
...
|
||||||
...
|
...
|
||||||
Build spec configuration saved at ~/.conda/envs/llamastack-my-tgi-stack/my-tgi-stack-build.yaml
|
Build spec configuration saved at ~/.conda/envs/llamastack-my-tgi-stack/my-tgi-stack-build.yaml
|
||||||
|
@ -293,10 +293,10 @@ You may now run `llama stack configure my-tgi-stack` or `llama stack configure ~
|
||||||
#### Building from config file
|
#### Building from config file
|
||||||
- In addition to templates, you may customize the build to your liking through editing config files and build from config files with the following command.
|
- In addition to templates, you may customize the build to your liking through editing config files and build from config files with the following command.
|
||||||
|
|
||||||
- The config file will be of contents like the ones in `llama_stack/distributions/templates/`.
|
- The config file will be of contents like the ones in `llama_stack/templates/`.
|
||||||
|
|
||||||
```
|
```
|
||||||
$ cat llama_stack/distribution/templates/local-ollama-build.yaml
|
$ cat build.yaml
|
||||||
|
|
||||||
name: local-ollama
|
name: local-ollama
|
||||||
distribution_spec:
|
distribution_spec:
|
||||||
|
@ -311,7 +311,7 @@ image_type: conda
|
||||||
```
|
```
|
||||||
|
|
||||||
```
|
```
|
||||||
llama stack build --config llama_stack/distribution/templates/local-ollama-build.yaml
|
llama stack build --config build.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### How to build distribution with Docker image
|
#### How to build distribution with Docker image
|
||||||
|
|
|
@ -35,11 +35,7 @@ You have two ways to start up Llama stack server:
|
||||||
|
|
||||||
1. **Starting up server via docker**:
|
1. **Starting up server via docker**:
|
||||||
|
|
||||||
We provide 2 pre-built Docker image of Llama Stack distribution, which can be found in the following links.
|
We provide pre-built Docker image of Llama Stack distribution, which can be found in the following links in the [distributions](../distributions/) folder.
|
||||||
- [llamastack-local-gpu](https://hub.docker.com/repository/docker/llamastack/llamastack-local-gpu/general)
|
|
||||||
- This is a packaged version with our local meta-reference implementations, where you will be running inference locally with downloaded Llama model checkpoints.
|
|
||||||
- [llamastack-local-cpu](https://hub.docker.com/repository/docker/llamastack/llamastack-local-cpu/general)
|
|
||||||
- This is a lite version with remote inference where you can hook up to your favourite remote inference framework (e.g. ollama, fireworks, together, tgi) for running inference without GPU.
|
|
||||||
|
|
||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
> For GPU inference, you need to set these environment variables for specifying local directory containing your model checkpoints, and enable GPU inference to start running docker container.
|
> For GPU inference, you need to set these environment variables for specifying local directory containing your model checkpoints, and enable GPU inference to start running docker container.
|
||||||
|
|
|
@ -12,9 +12,7 @@ import os
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
TEMPLATES_PATH = (
|
TEMPLATES_PATH = Path(os.path.relpath(__file__)).parent.parent.parent / "templates"
|
||||||
Path(os.path.relpath(__file__)).parent.parent.parent.parent / "distributions"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@lru_cache()
|
@lru_cache()
|
||||||
|
@ -26,7 +24,6 @@ def available_templates_specs() -> List[BuildConfig]:
|
||||||
with open(p, "r") as f:
|
with open(p, "r") as f:
|
||||||
build_config = BuildConfig(**yaml.safe_load(f))
|
build_config = BuildConfig(**yaml.safe_load(f))
|
||||||
template_specs.append(build_config)
|
template_specs.append(build_config)
|
||||||
|
|
||||||
return template_specs
|
return template_specs
|
||||||
|
|
||||||
|
|
||||||
|
@ -99,19 +96,22 @@ class StackBuild(Subcommand):
|
||||||
"You must specify a name for the build using --name when using a template"
|
"You must specify a name for the build using --name when using a template"
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
build_path = TEMPLATES_PATH / f"{args.template}-build.yaml"
|
available_templates = available_templates_specs()
|
||||||
if not build_path.exists():
|
for build_config in available_templates:
|
||||||
self.parser.error(
|
if build_config.name == args.template:
|
||||||
f"Could not find template {args.template}. Please run `llama stack build --list-templates` to check out the available templates"
|
build_config.name = args.name
|
||||||
)
|
if args.image_type:
|
||||||
return
|
build_config.image_type = args.image_type
|
||||||
with open(build_path, "r") as f:
|
else:
|
||||||
build_config = BuildConfig(**yaml.safe_load(f))
|
self.parser.error(
|
||||||
build_config.name = args.name
|
f"Please specify a image-type (docker | conda) for {args.template}"
|
||||||
if args.image_type:
|
)
|
||||||
build_config.image_type = args.image_type
|
self._run_stack_build_command_from_build_config(build_config)
|
||||||
self._run_stack_build_command_from_build_config(build_config)
|
return
|
||||||
|
|
||||||
|
self.parser.error(
|
||||||
|
f"Could not find template {args.template}. Please run `llama stack build --list-templates` to check out the available templates"
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
# try to see if we can find a pre-existing build config file through name
|
# try to see if we can find a pre-existing build config file through name
|
||||||
|
|
|
@ -8,18 +8,19 @@ from enum import Enum
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
import pkg_resources
|
import pkg_resources
|
||||||
|
|
||||||
from llama_stack.distribution.utils.exec import run_with_pty
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from termcolor import cprint
|
from termcolor import cprint
|
||||||
|
|
||||||
|
from llama_stack.distribution.utils.exec import run_with_pty
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import * # noqa: F403
|
from llama_stack.distribution.datatypes import * # noqa: F403
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from llama_stack.distribution.utils.config_dirs import BUILDS_BASE_DIR
|
|
||||||
from llama_stack.distribution.distribution import get_provider_registry
|
from llama_stack.distribution.distribution import get_provider_registry
|
||||||
|
|
||||||
|
from llama_stack.distribution.utils.config_dirs import BUILDS_BASE_DIR
|
||||||
|
|
||||||
|
|
||||||
# These are the dependencies needed by the distribution server.
|
# These are the dependencies needed by the distribution server.
|
||||||
# `llama-stack` is automatically installed by the installation script.
|
# `llama-stack` is automatically installed by the installation script.
|
||||||
|
|
|
@ -1,5 +1,11 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
if [[ $# -ne 1 ]]; then
|
if [[ $# -ne 1 ]]; then
|
||||||
echo "Error: Please provide the name of CONDA environment you wish to create"
|
echo "Error: Please provide the name of CONDA environment you wish to create"
|
||||||
exit 1
|
exit 1
|
||||||
|
|
9
llama_stack/templates/bedrock/build.yaml
Normal file
9
llama_stack/templates/bedrock/build.yaml
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
name: bedrock
|
||||||
|
distribution_spec:
|
||||||
|
description: Use Amazon Bedrock APIs.
|
||||||
|
providers:
|
||||||
|
inference: remote::bedrock
|
||||||
|
memory: meta-reference
|
||||||
|
safety: meta-reference
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
9
llama_stack/templates/databricks/build.yaml
Normal file
9
llama_stack/templates/databricks/build.yaml
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
name: databricks
|
||||||
|
distribution_spec:
|
||||||
|
description: Use Databricks for running LLM inference
|
||||||
|
providers:
|
||||||
|
inference: remote::databricks
|
||||||
|
memory: meta-reference
|
||||||
|
safety: meta-reference
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
9
llama_stack/templates/fireworks/build.yaml
Normal file
9
llama_stack/templates/fireworks/build.yaml
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
name: fireworks
|
||||||
|
distribution_spec:
|
||||||
|
description: Use Fireworks.ai for running LLM inference
|
||||||
|
providers:
|
||||||
|
inference: remote::fireworks
|
||||||
|
memory: meta-reference
|
||||||
|
safety: meta-reference
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
9
llama_stack/templates/hf-endpoint/build.yaml
Normal file
9
llama_stack/templates/hf-endpoint/build.yaml
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
name: hf-endpoint
|
||||||
|
distribution_spec:
|
||||||
|
description: "Like local, but use Hugging Face Inference Endpoints for running LLM inference.\nSee https://hf.co/docs/api-endpoints."
|
||||||
|
providers:
|
||||||
|
inference: remote::hf::endpoint
|
||||||
|
memory: meta-reference
|
||||||
|
safety: meta-reference
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
9
llama_stack/templates/hf-serverless/build.yaml
Normal file
9
llama_stack/templates/hf-serverless/build.yaml
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
name: hf-serverless
|
||||||
|
distribution_spec:
|
||||||
|
description: "Like local, but use Hugging Face Inference API (serverless) for running LLM inference.\nSee https://hf.co/docs/api-inference."
|
||||||
|
providers:
|
||||||
|
inference: remote::hf::serverless
|
||||||
|
memory: meta-reference
|
||||||
|
safety: meta-reference
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
13
llama_stack/templates/meta-reference-gpu/build.yaml
Normal file
13
llama_stack/templates/meta-reference-gpu/build.yaml
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
name: meta-reference-gpu
|
||||||
|
distribution_spec:
|
||||||
|
docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
|
||||||
|
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
||||||
|
providers:
|
||||||
|
inference: meta-reference
|
||||||
|
memory:
|
||||||
|
- meta-reference
|
||||||
|
- remote::chromadb
|
||||||
|
- remote::pgvector
|
||||||
|
safety: meta-reference
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
|
@ -0,0 +1,13 @@
|
||||||
|
name: meta-reference-quantized-gpu
|
||||||
|
distribution_spec:
|
||||||
|
docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
|
||||||
|
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
||||||
|
providers:
|
||||||
|
inference: meta-reference-quantized
|
||||||
|
memory:
|
||||||
|
- meta-reference
|
||||||
|
- remote::chromadb
|
||||||
|
- remote::pgvector
|
||||||
|
safety: meta-reference
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
12
llama_stack/templates/ollama/build.yaml
Normal file
12
llama_stack/templates/ollama/build.yaml
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
name: ollama
|
||||||
|
distribution_spec:
|
||||||
|
description: Use ollama for running LLM inference
|
||||||
|
providers:
|
||||||
|
inference: remote::ollama
|
||||||
|
memory:
|
||||||
|
- meta-reference
|
||||||
|
- remote::chromadb
|
||||||
|
- remote::pgvector
|
||||||
|
safety: meta-reference
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
12
llama_stack/templates/tgi/build.yaml
Normal file
12
llama_stack/templates/tgi/build.yaml
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
name: tgi
|
||||||
|
distribution_spec:
|
||||||
|
description: Use TGI for running LLM inference
|
||||||
|
providers:
|
||||||
|
inference: remote::tgi
|
||||||
|
memory:
|
||||||
|
- meta-reference
|
||||||
|
- remote::chromadb
|
||||||
|
- remote::pgvector
|
||||||
|
safety: meta-reference
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
9
llama_stack/templates/together/build.yaml
Normal file
9
llama_stack/templates/together/build.yaml
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
name: together
|
||||||
|
distribution_spec:
|
||||||
|
description: Use Together.ai for running LLM inference
|
||||||
|
providers:
|
||||||
|
inference: remote::together
|
||||||
|
memory: remote::weaviate
|
||||||
|
safety: remote::together
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
9
llama_stack/templates/vllm/build.yaml
Normal file
9
llama_stack/templates/vllm/build.yaml
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
name: vllm
|
||||||
|
distribution_spec:
|
||||||
|
description: Like local, but use vLLM for running LLM inference
|
||||||
|
providers:
|
||||||
|
inference: vllm
|
||||||
|
memory: meta-reference
|
||||||
|
safety: meta-reference
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
Loading…
Add table
Add a link
Reference in a new issue