From 07f9bf723fdf3ae054af959df7d776f8b1653b97 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Fri, 25 Oct 2024 12:51:22 -0700 Subject: [PATCH] fix broken --list-templates with adding build.yaml files for packaging (#327) * add build files to templates * fix templates * manifest * symlink * symlink * precommit * change everything to docker build.yaml * remove image_type in templates * fix build from templates CLI * fix readmes --- MANIFEST.in | 2 +- distributions/bedrock/build.yaml | 11 +------ distributions/databricks/build.yaml | 11 +------ distributions/fireworks/README.md | 2 +- distributions/fireworks/build.yaml | 11 +------ distributions/hf-endpoint/build.yaml | 11 +------ distributions/hf-serverless/build.yaml | 11 +------ distributions/meta-reference-gpu/build.yaml | 15 +-------- .../meta-reference-quantized-gpu/build.yaml | 15 +-------- distributions/ollama/README.md | 2 +- distributions/ollama/build.yaml | 14 +------- distributions/tgi/README.md | 2 +- distributions/tgi/build.yaml | 14 +------- distributions/together/README.md | 2 +- distributions/together/build.yaml | 11 +------ distributions/vllm/build.yaml | 11 +------ docs/cli_reference.md | 10 +++--- docs/getting_started.md | 6 +--- llama_stack/cli/stack/build.py | 32 +++++++++---------- llama_stack/distribution/build.py | 7 ++-- .../quantization/scripts/build_conda.sh | 6 ++++ llama_stack/templates/bedrock/build.yaml | 9 ++++++ llama_stack/templates/databricks/build.yaml | 9 ++++++ llama_stack/templates/fireworks/build.yaml | 9 ++++++ llama_stack/templates/hf-endpoint/build.yaml | 9 ++++++ .../templates/hf-serverless/build.yaml | 9 ++++++ .../templates/meta-reference-gpu/build.yaml | 13 ++++++++ .../meta-reference-quantized-gpu/build.yaml | 13 ++++++++ llama_stack/templates/ollama/build.yaml | 12 +++++++ llama_stack/templates/tgi/build.yaml | 12 +++++++ llama_stack/templates/together/build.yaml | 9 ++++++ llama_stack/templates/vllm/build.yaml | 9 ++++++ 32 files changed, 161 insertions(+), 158 deletions(-) mode change 100644 => 120000 distributions/bedrock/build.yaml mode change 100644 => 120000 distributions/databricks/build.yaml mode change 100644 => 120000 distributions/fireworks/build.yaml mode change 100644 => 120000 distributions/hf-endpoint/build.yaml mode change 100644 => 120000 distributions/hf-serverless/build.yaml mode change 100644 => 120000 distributions/meta-reference-gpu/build.yaml mode change 100644 => 120000 distributions/meta-reference-quantized-gpu/build.yaml mode change 100644 => 120000 distributions/ollama/build.yaml mode change 100644 => 120000 distributions/tgi/build.yaml mode change 100644 => 120000 distributions/together/build.yaml mode change 100644 => 120000 distributions/vllm/build.yaml create mode 100644 llama_stack/templates/bedrock/build.yaml create mode 100644 llama_stack/templates/databricks/build.yaml create mode 100644 llama_stack/templates/fireworks/build.yaml create mode 100644 llama_stack/templates/hf-endpoint/build.yaml create mode 100644 llama_stack/templates/hf-serverless/build.yaml create mode 100644 llama_stack/templates/meta-reference-gpu/build.yaml create mode 100644 llama_stack/templates/meta-reference-quantized-gpu/build.yaml create mode 100644 llama_stack/templates/ollama/build.yaml create mode 100644 llama_stack/templates/tgi/build.yaml create mode 100644 llama_stack/templates/together/build.yaml create mode 100644 llama_stack/templates/vllm/build.yaml diff --git a/MANIFEST.in b/MANIFEST.in index 7426a3abd..0517b86a8 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,4 @@ include requirements.txt include llama_stack/distribution/*.sh include llama_stack/cli/scripts/*.sh -include distributions/*/build.yaml +include llama_stack/templates/*/build.yaml diff --git a/distributions/bedrock/build.yaml b/distributions/bedrock/build.yaml deleted file mode 100644 index ae7b27d49..000000000 --- a/distributions/bedrock/build.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: bedrock -distribution_spec: - description: Use Amazon Bedrock APIs. - providers: - inference: remote::bedrock - memory: meta-reference - safety: meta-reference - agents: meta-reference - telemetry: meta-reference -image_type: conda diff --git a/distributions/bedrock/build.yaml b/distributions/bedrock/build.yaml new file mode 120000 index 000000000..72402ef8d --- /dev/null +++ b/distributions/bedrock/build.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/bedrock/build.yaml \ No newline at end of file diff --git a/distributions/databricks/build.yaml b/distributions/databricks/build.yaml deleted file mode 100644 index 2188dd0a0..000000000 --- a/distributions/databricks/build.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: databricks -distribution_spec: - description: Use Databricks for running LLM inference - providers: - inference: remote::databricks - memory: meta-reference - safety: meta-reference - agents: meta-reference - telemetry: meta-reference -image_type: conda diff --git a/distributions/databricks/build.yaml b/distributions/databricks/build.yaml new file mode 120000 index 000000000..66342fe6f --- /dev/null +++ b/distributions/databricks/build.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/databricks/build.yaml \ No newline at end of file diff --git a/distributions/fireworks/README.md b/distributions/fireworks/README.md index fcf74d809..e3987e1e2 100644 --- a/distributions/fireworks/README.md +++ b/distributions/fireworks/README.md @@ -49,7 +49,7 @@ inference: **Via Conda** ```bash -llama stack build --config ./build.yaml +llama stack build --template fireworks --image-type conda # -- modify run.yaml to a valid Fireworks server endpoint llama stack run ./run.yaml ``` diff --git a/distributions/fireworks/build.yaml b/distributions/fireworks/build.yaml deleted file mode 100644 index 2e5cf0753..000000000 --- a/distributions/fireworks/build.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: fireworks -distribution_spec: - description: Use Fireworks.ai for running LLM inference - providers: - inference: remote::fireworks - memory: meta-reference - safety: meta-reference - agents: meta-reference - telemetry: meta-reference -image_type: docker diff --git a/distributions/fireworks/build.yaml b/distributions/fireworks/build.yaml new file mode 120000 index 000000000..32a5bd869 --- /dev/null +++ b/distributions/fireworks/build.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/fireworks/build.yaml \ No newline at end of file diff --git a/distributions/hf-endpoint/build.yaml b/distributions/hf-endpoint/build.yaml deleted file mode 100644 index 750bebcb5..000000000 --- a/distributions/hf-endpoint/build.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: hf-endpoint -distribution_spec: - description: "Like local, but use Hugging Face Inference Endpoints for running LLM inference.\nSee https://hf.co/docs/api-endpoints." - providers: - inference: remote::hf::endpoint - memory: meta-reference - safety: meta-reference - agents: meta-reference - telemetry: meta-reference -image_type: conda diff --git a/distributions/hf-endpoint/build.yaml b/distributions/hf-endpoint/build.yaml new file mode 120000 index 000000000..a73c70c05 --- /dev/null +++ b/distributions/hf-endpoint/build.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/hf-endpoint/build.yaml \ No newline at end of file diff --git a/distributions/hf-serverless/build.yaml b/distributions/hf-serverless/build.yaml deleted file mode 100644 index f6da3ad4d..000000000 --- a/distributions/hf-serverless/build.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: hf-serverless -distribution_spec: - description: "Like local, but use Hugging Face Inference API (serverless) for running LLM inference.\nSee https://hf.co/docs/api-inference." - providers: - inference: remote::hf::serverless - memory: meta-reference - safety: meta-reference - agents: meta-reference - telemetry: meta-reference -image_type: conda diff --git a/distributions/hf-serverless/build.yaml b/distributions/hf-serverless/build.yaml new file mode 120000 index 000000000..f2db0fd55 --- /dev/null +++ b/distributions/hf-serverless/build.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/hf-serverless/build.yaml \ No newline at end of file diff --git a/distributions/meta-reference-gpu/build.yaml b/distributions/meta-reference-gpu/build.yaml deleted file mode 100644 index 08e034154..000000000 --- a/distributions/meta-reference-gpu/build.yaml +++ /dev/null @@ -1,14 +0,0 @@ -name: meta-reference-gpu -distribution_spec: - docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime - description: Use code from `llama_stack` itself to serve all llama stack APIs - providers: - inference: meta-reference - memory: - - meta-reference - - remote::chromadb - - remote::pgvector - safety: meta-reference - agents: meta-reference - telemetry: meta-reference -image_type: docker diff --git a/distributions/meta-reference-gpu/build.yaml b/distributions/meta-reference-gpu/build.yaml new file mode 120000 index 000000000..4418195eb --- /dev/null +++ b/distributions/meta-reference-gpu/build.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/meta-reference-gpu/build.yaml \ No newline at end of file diff --git a/distributions/meta-reference-quantized-gpu/build.yaml b/distributions/meta-reference-quantized-gpu/build.yaml deleted file mode 100644 index e9ddb4aad..000000000 --- a/distributions/meta-reference-quantized-gpu/build.yaml +++ /dev/null @@ -1,14 +0,0 @@ -name: meta-reference-quantized-gpu -distribution_spec: - docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime - description: Use code from `llama_stack` itself to serve all llama stack APIs - providers: - inference: meta-reference-quantized - memory: - - meta-reference - - remote::chromadb - - remote::pgvector - safety: meta-reference - agents: meta-reference - telemetry: meta-reference -image_type: docker diff --git a/distributions/meta-reference-quantized-gpu/build.yaml b/distributions/meta-reference-quantized-gpu/build.yaml new file mode 120000 index 000000000..f3dbe996f --- /dev/null +++ b/distributions/meta-reference-quantized-gpu/build.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/meta-reference-quantized-gpu/build.yaml \ No newline at end of file diff --git a/distributions/ollama/README.md b/distributions/ollama/README.md index d59c3f9e1..70bc27a85 100644 --- a/distributions/ollama/README.md +++ b/distributions/ollama/README.md @@ -86,6 +86,6 @@ inference: **Via Conda** ``` -llama stack build --config ./build.yaml +llama stack build --template ollama --image-type conda llama stack run ./gpu/run.yaml ``` diff --git a/distributions/ollama/build.yaml b/distributions/ollama/build.yaml deleted file mode 100644 index c27f40929..000000000 --- a/distributions/ollama/build.yaml +++ /dev/null @@ -1,13 +0,0 @@ -name: ollama -distribution_spec: - description: Use ollama for running LLM inference - providers: - inference: remote::ollama - memory: - - meta-reference - - remote::chromadb - - remote::pgvector - safety: meta-reference - agents: meta-reference - telemetry: meta-reference -image_type: docker diff --git a/distributions/ollama/build.yaml b/distributions/ollama/build.yaml new file mode 120000 index 000000000..8772548e0 --- /dev/null +++ b/distributions/ollama/build.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/ollama/build.yaml \ No newline at end of file diff --git a/distributions/tgi/README.md b/distributions/tgi/README.md index 86d2636d7..886252ecd 100644 --- a/distributions/tgi/README.md +++ b/distributions/tgi/README.md @@ -88,7 +88,7 @@ inference: **Via Conda** ```bash -llama stack build --config ./build.yaml +llama stack build --template tgi --image-type conda # -- start a TGI server endpoint llama stack run ./gpu/run.yaml ``` diff --git a/distributions/tgi/build.yaml b/distributions/tgi/build.yaml deleted file mode 100644 index 2c0ca1d33..000000000 --- a/distributions/tgi/build.yaml +++ /dev/null @@ -1,13 +0,0 @@ -name: tgi -distribution_spec: - description: Use TGI for running LLM inference - providers: - inference: remote::tgi - memory: - - meta-reference - - remote::chromadb - - remote::pgvector - safety: meta-reference - agents: meta-reference - telemetry: meta-reference -image_type: docker diff --git a/distributions/tgi/build.yaml b/distributions/tgi/build.yaml new file mode 120000 index 000000000..73e59ad84 --- /dev/null +++ b/distributions/tgi/build.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/tgi/build.yaml \ No newline at end of file diff --git a/distributions/together/README.md b/distributions/together/README.md index 227c7a450..b964673e0 100644 --- a/distributions/together/README.md +++ b/distributions/together/README.md @@ -62,7 +62,7 @@ memory: **Via Conda** ```bash -llama stack build --config ./build.yaml +llama stack build --template together --image-type conda # -- modify run.yaml to a valid Together server endpoint llama stack run ./run.yaml ``` diff --git a/distributions/together/build.yaml b/distributions/together/build.yaml deleted file mode 100644 index 49eab859d..000000000 --- a/distributions/together/build.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: together -distribution_spec: - description: Use Together.ai for running LLM inference - providers: - inference: remote::together - memory: remote::weaviate - safety: remote::together - agents: meta-reference - telemetry: meta-reference -image_type: docker diff --git a/distributions/together/build.yaml b/distributions/together/build.yaml new file mode 120000 index 000000000..3877a9c96 --- /dev/null +++ b/distributions/together/build.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/together/build.yaml \ No newline at end of file diff --git a/distributions/vllm/build.yaml b/distributions/vllm/build.yaml deleted file mode 100644 index f41352eb1..000000000 --- a/distributions/vllm/build.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: vllm -distribution_spec: - description: Like local, but use vLLM for running LLM inference - providers: - inference: vllm - memory: meta-reference - safety: meta-reference - agents: meta-reference - telemetry: meta-reference -image_type: conda \ No newline at end of file diff --git a/distributions/vllm/build.yaml b/distributions/vllm/build.yaml new file mode 120000 index 000000000..dfc9401b6 --- /dev/null +++ b/distributions/vllm/build.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/vllm/build.yaml \ No newline at end of file diff --git a/docs/cli_reference.md b/docs/cli_reference.md index f0f67192f..ddc8e6b3e 100644 --- a/docs/cli_reference.md +++ b/docs/cli_reference.md @@ -279,11 +279,11 @@ llama stack build --list-templates You may then pick a template to build your distribution with providers fitted to your liking. ``` -llama stack build --template local-tgi --name my-tgi-stack +llama stack build --template local-tgi --name my-tgi-stack --image-type conda ``` ``` -$ llama stack build --template local-tgi --name my-tgi-stack +$ llama stack build --template local-tgi --name my-tgi-stack --image-type conda ... ... Build spec configuration saved at ~/.conda/envs/llamastack-my-tgi-stack/my-tgi-stack-build.yaml @@ -293,10 +293,10 @@ You may now run `llama stack configure my-tgi-stack` or `llama stack configure ~ #### Building from config file - In addition to templates, you may customize the build to your liking through editing config files and build from config files with the following command. -- The config file will be of contents like the ones in `llama_stack/distributions/templates/`. +- The config file will be of contents like the ones in `llama_stack/templates/`. ``` -$ cat llama_stack/distribution/templates/local-ollama-build.yaml +$ cat build.yaml name: local-ollama distribution_spec: @@ -311,7 +311,7 @@ image_type: conda ``` ``` -llama stack build --config llama_stack/distribution/templates/local-ollama-build.yaml +llama stack build --config build.yaml ``` #### How to build distribution with Docker image diff --git a/docs/getting_started.md b/docs/getting_started.md index 4f06f5d47..2a90301d0 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -35,11 +35,7 @@ You have two ways to start up Llama stack server: 1. **Starting up server via docker**: - We provide 2 pre-built Docker image of Llama Stack distribution, which can be found in the following links. - - [llamastack-local-gpu](https://hub.docker.com/repository/docker/llamastack/llamastack-local-gpu/general) - - This is a packaged version with our local meta-reference implementations, where you will be running inference locally with downloaded Llama model checkpoints. - - [llamastack-local-cpu](https://hub.docker.com/repository/docker/llamastack/llamastack-local-cpu/general) - - This is a lite version with remote inference where you can hook up to your favourite remote inference framework (e.g. ollama, fireworks, together, tgi) for running inference without GPU. + We provide pre-built Docker image of Llama Stack distribution, which can be found in the following links in the [distributions](../distributions/) folder. > [!NOTE] > For GPU inference, you need to set these environment variables for specifying local directory containing your model checkpoints, and enable GPU inference to start running docker container. diff --git a/llama_stack/cli/stack/build.py b/llama_stack/cli/stack/build.py index 098932f8a..40fca4c6d 100644 --- a/llama_stack/cli/stack/build.py +++ b/llama_stack/cli/stack/build.py @@ -12,9 +12,7 @@ import os from functools import lru_cache from pathlib import Path -TEMPLATES_PATH = ( - Path(os.path.relpath(__file__)).parent.parent.parent.parent / "distributions" -) +TEMPLATES_PATH = Path(os.path.relpath(__file__)).parent.parent.parent / "templates" @lru_cache() @@ -26,7 +24,6 @@ def available_templates_specs() -> List[BuildConfig]: with open(p, "r") as f: build_config = BuildConfig(**yaml.safe_load(f)) template_specs.append(build_config) - return template_specs @@ -99,19 +96,22 @@ class StackBuild(Subcommand): "You must specify a name for the build using --name when using a template" ) return - build_path = TEMPLATES_PATH / f"{args.template}-build.yaml" - if not build_path.exists(): - self.parser.error( - f"Could not find template {args.template}. Please run `llama stack build --list-templates` to check out the available templates" - ) - return - with open(build_path, "r") as f: - build_config = BuildConfig(**yaml.safe_load(f)) - build_config.name = args.name - if args.image_type: - build_config.image_type = args.image_type - self._run_stack_build_command_from_build_config(build_config) + available_templates = available_templates_specs() + for build_config in available_templates: + if build_config.name == args.template: + build_config.name = args.name + if args.image_type: + build_config.image_type = args.image_type + else: + self.parser.error( + f"Please specify a image-type (docker | conda) for {args.template}" + ) + self._run_stack_build_command_from_build_config(build_config) + return + self.parser.error( + f"Could not find template {args.template}. Please run `llama stack build --list-templates` to check out the available templates" + ) return # try to see if we can find a pre-existing build config file through name diff --git a/llama_stack/distribution/build.py b/llama_stack/distribution/build.py index 13c545723..e3a9d9186 100644 --- a/llama_stack/distribution/build.py +++ b/llama_stack/distribution/build.py @@ -8,18 +8,19 @@ from enum import Enum from typing import List, Optional import pkg_resources - -from llama_stack.distribution.utils.exec import run_with_pty from pydantic import BaseModel from termcolor import cprint +from llama_stack.distribution.utils.exec import run_with_pty + from llama_stack.distribution.datatypes import * # noqa: F403 from pathlib import Path -from llama_stack.distribution.utils.config_dirs import BUILDS_BASE_DIR from llama_stack.distribution.distribution import get_provider_registry +from llama_stack.distribution.utils.config_dirs import BUILDS_BASE_DIR + # These are the dependencies needed by the distribution server. # `llama-stack` is automatically installed by the installation script. diff --git a/llama_stack/providers/impls/meta_reference/inference/quantization/scripts/build_conda.sh b/llama_stack/providers/impls/meta_reference/inference/quantization/scripts/build_conda.sh index d3028f8e8..ae0ed0bac 100644 --- a/llama_stack/providers/impls/meta_reference/inference/quantization/scripts/build_conda.sh +++ b/llama_stack/providers/impls/meta_reference/inference/quantization/scripts/build_conda.sh @@ -1,5 +1,11 @@ #!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + if [[ $# -ne 1 ]]; then echo "Error: Please provide the name of CONDA environment you wish to create" exit 1 diff --git a/llama_stack/templates/bedrock/build.yaml b/llama_stack/templates/bedrock/build.yaml new file mode 100644 index 000000000..a3ff27949 --- /dev/null +++ b/llama_stack/templates/bedrock/build.yaml @@ -0,0 +1,9 @@ +name: bedrock +distribution_spec: + description: Use Amazon Bedrock APIs. + providers: + inference: remote::bedrock + memory: meta-reference + safety: meta-reference + agents: meta-reference + telemetry: meta-reference diff --git a/llama_stack/templates/databricks/build.yaml b/llama_stack/templates/databricks/build.yaml new file mode 100644 index 000000000..f6c8b50a1 --- /dev/null +++ b/llama_stack/templates/databricks/build.yaml @@ -0,0 +1,9 @@ +name: databricks +distribution_spec: + description: Use Databricks for running LLM inference + providers: + inference: remote::databricks + memory: meta-reference + safety: meta-reference + agents: meta-reference + telemetry: meta-reference diff --git a/llama_stack/templates/fireworks/build.yaml b/llama_stack/templates/fireworks/build.yaml new file mode 100644 index 000000000..37129bef0 --- /dev/null +++ b/llama_stack/templates/fireworks/build.yaml @@ -0,0 +1,9 @@ +name: fireworks +distribution_spec: + description: Use Fireworks.ai for running LLM inference + providers: + inference: remote::fireworks + memory: meta-reference + safety: meta-reference + agents: meta-reference + telemetry: meta-reference diff --git a/llama_stack/templates/hf-endpoint/build.yaml b/llama_stack/templates/hf-endpoint/build.yaml new file mode 100644 index 000000000..6c84e5ccf --- /dev/null +++ b/llama_stack/templates/hf-endpoint/build.yaml @@ -0,0 +1,9 @@ +name: hf-endpoint +distribution_spec: + description: "Like local, but use Hugging Face Inference Endpoints for running LLM inference.\nSee https://hf.co/docs/api-endpoints." + providers: + inference: remote::hf::endpoint + memory: meta-reference + safety: meta-reference + agents: meta-reference + telemetry: meta-reference diff --git a/llama_stack/templates/hf-serverless/build.yaml b/llama_stack/templates/hf-serverless/build.yaml new file mode 100644 index 000000000..32561c1fa --- /dev/null +++ b/llama_stack/templates/hf-serverless/build.yaml @@ -0,0 +1,9 @@ +name: hf-serverless +distribution_spec: + description: "Like local, but use Hugging Face Inference API (serverless) for running LLM inference.\nSee https://hf.co/docs/api-inference." + providers: + inference: remote::hf::serverless + memory: meta-reference + safety: meta-reference + agents: meta-reference + telemetry: meta-reference diff --git a/llama_stack/templates/meta-reference-gpu/build.yaml b/llama_stack/templates/meta-reference-gpu/build.yaml new file mode 100644 index 000000000..d0fe93aa3 --- /dev/null +++ b/llama_stack/templates/meta-reference-gpu/build.yaml @@ -0,0 +1,13 @@ +name: meta-reference-gpu +distribution_spec: + docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime + description: Use code from `llama_stack` itself to serve all llama stack APIs + providers: + inference: meta-reference + memory: + - meta-reference + - remote::chromadb + - remote::pgvector + safety: meta-reference + agents: meta-reference + telemetry: meta-reference diff --git a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml new file mode 100644 index 000000000..20500ea5a --- /dev/null +++ b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml @@ -0,0 +1,13 @@ +name: meta-reference-quantized-gpu +distribution_spec: + docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime + description: Use code from `llama_stack` itself to serve all llama stack APIs + providers: + inference: meta-reference-quantized + memory: + - meta-reference + - remote::chromadb + - remote::pgvector + safety: meta-reference + agents: meta-reference + telemetry: meta-reference diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml new file mode 100644 index 000000000..06de2fc3c --- /dev/null +++ b/llama_stack/templates/ollama/build.yaml @@ -0,0 +1,12 @@ +name: ollama +distribution_spec: + description: Use ollama for running LLM inference + providers: + inference: remote::ollama + memory: + - meta-reference + - remote::chromadb + - remote::pgvector + safety: meta-reference + agents: meta-reference + telemetry: meta-reference diff --git a/llama_stack/templates/tgi/build.yaml b/llama_stack/templates/tgi/build.yaml new file mode 100644 index 000000000..c5e618bb6 --- /dev/null +++ b/llama_stack/templates/tgi/build.yaml @@ -0,0 +1,12 @@ +name: tgi +distribution_spec: + description: Use TGI for running LLM inference + providers: + inference: remote::tgi + memory: + - meta-reference + - remote::chromadb + - remote::pgvector + safety: meta-reference + agents: meta-reference + telemetry: meta-reference diff --git a/llama_stack/templates/together/build.yaml b/llama_stack/templates/together/build.yaml new file mode 100644 index 000000000..5232aeb93 --- /dev/null +++ b/llama_stack/templates/together/build.yaml @@ -0,0 +1,9 @@ +name: together +distribution_spec: + description: Use Together.ai for running LLM inference + providers: + inference: remote::together + memory: remote::weaviate + safety: remote::together + agents: meta-reference + telemetry: meta-reference diff --git a/llama_stack/templates/vllm/build.yaml b/llama_stack/templates/vllm/build.yaml new file mode 100644 index 000000000..d842896db --- /dev/null +++ b/llama_stack/templates/vllm/build.yaml @@ -0,0 +1,9 @@ +name: vllm +distribution_spec: + description: Like local, but use vLLM for running LLM inference + providers: + inference: vllm + memory: meta-reference + safety: meta-reference + agents: meta-reference + telemetry: meta-reference