move build.yaml to templates, symlink in distributions

This commit is contained in:
Xi Yan 2024-10-25 11:54:09 -07:00
parent 81ed0327f3
commit 56f9b7d5d6
23 changed files with 136 additions and 142 deletions

View file

@ -1,10 +0,0 @@
name: bedrock
distribution_spec:
description: Use Amazon Bedrock APIs.
providers:
inference: remote::bedrock
memory: meta-reference
safety: meta-reference
agents: meta-reference
telemetry: meta-reference
image_type: conda

View file

@ -0,0 +1 @@
../../llama_stack/templates/bedrock/build.yaml

View file

@ -1,10 +0,0 @@
name: databricks
distribution_spec:
description: Use Databricks for running LLM inference
providers:
inference: remote::databricks
memory: meta-reference
safety: meta-reference
agents: meta-reference
telemetry: meta-reference
image_type: conda

View file

@ -0,0 +1 @@
../../llama_stack/templates/databricks/build.yaml

View file

@ -1,10 +0,0 @@
name: fireworks
distribution_spec:
description: Use Fireworks.ai for running LLM inference
providers:
inference: remote::fireworks
memory: meta-reference
safety: meta-reference
agents: meta-reference
telemetry: meta-reference
image_type: docker

View file

@ -0,0 +1 @@
../../llama_stack/templates/fireworks/build.yaml

View file

@ -1,10 +0,0 @@
name: hf-endpoint
distribution_spec:
description: "Like local, but use Hugging Face Inference Endpoints for running LLM inference.\nSee https://hf.co/docs/api-endpoints."
providers:
inference: remote::hf::endpoint
memory: meta-reference
safety: meta-reference
agents: meta-reference
telemetry: meta-reference
image_type: conda

View file

@ -0,0 +1 @@
../../llama_stack/templates/hf-endpoint/build.yaml

View file

@ -1,10 +0,0 @@
name: hf-serverless
distribution_spec:
description: "Like local, but use Hugging Face Inference API (serverless) for running LLM inference.\nSee https://hf.co/docs/api-inference."
providers:
inference: remote::hf::serverless
memory: meta-reference
safety: meta-reference
agents: meta-reference
telemetry: meta-reference
image_type: conda

View file

@ -0,0 +1 @@
../../llama_stack/templates/hf-serverless/build.yaml

View file

@ -1,14 +0,0 @@
name: meta-reference-gpu
distribution_spec:
docker_image: pytorch/pytorch
description: Use code from `llama_stack` itself to serve all llama stack APIs
providers:
inference: meta-reference
memory:
- meta-reference
- remote::chromadb
- remote::pgvector
safety: meta-reference
agents: meta-reference
telemetry: meta-reference
image_type: docker

View file

@ -0,0 +1 @@
../../llama_stack/templates/meta-reference-gpu/build.yaml

View file

@ -1,14 +0,0 @@
name: meta-reference-quantized-gpu
distribution_spec:
docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
description: Use code from `llama_stack` itself to serve all llama stack APIs
providers:
inference: meta-reference-quantized
memory:
- meta-reference
- remote::chromadb
- remote::pgvector
safety: meta-reference
agents: meta-reference
telemetry: meta-reference
image_type: docker

View file

@ -0,0 +1 @@
../../llama_stack/templates/meta-reference-quantized-gpu/build.yaml

View file

@ -1,13 +0,0 @@
name: ollama
distribution_spec:
description: Use ollama for running LLM inference
providers:
inference: remote::ollama
memory:
- meta-reference
- remote::chromadb
- remote::pgvector
safety: meta-reference
agents: meta-reference
telemetry: meta-reference
image_type: docker

View file

@ -0,0 +1 @@
../../llama_stack/templates/ollama/build.yaml

View file

@ -1,13 +0,0 @@
name: tgi
distribution_spec:
description: Use TGI for running LLM inference
providers:
inference: remote::tgi
memory:
- meta-reference
- remote::chromadb
- remote::pgvector
safety: meta-reference
agents: meta-reference
telemetry: meta-reference
image_type: docker

View file

@ -0,0 +1 @@
../../llama_stack/templates/tgi/build.yaml

View file

@ -1,10 +0,0 @@
name: together
distribution_spec:
description: Use Together.ai for running LLM inference
providers:
inference: remote::together
memory: remote::weaviate
safety: remote::together
agents: meta-reference
telemetry: meta-reference
image_type: docker

View file

@ -0,0 +1 @@
../../llama_stack/templates/together/build.yaml

View file

@ -1,10 +0,0 @@
name: vllm
distribution_spec:
description: Like local, but use vLLM for running LLM inference
providers:
inference: vllm
memory: meta-reference
safety: meta-reference
agents: meta-reference
telemetry: meta-reference
image_type: conda

View file

@ -0,0 +1 @@
../../llama_stack/templates/vllm/build.yaml

View file

@ -12,15 +12,7 @@ import os
from functools import lru_cache
from pathlib import Path
TEMPLATES_PATH = (
Path(os.path.relpath(__file__)).parent.parent.parent.parent / "distributions----"
)
# build.yaml templates exist in the llama-stack/distributions while wheel installs llama-stack/llama_stack
# we copied the distributions folder to llama-stack/llama_stack/cli/distributions for wheel builds,
# so we need to check both locations
if not TEMPLATES_PATH.exists():
TEMPLATES_PATH = Path(os.path.relpath(__file__)).parent.parent.parent / "templates"
TEMPLATES_PATH = Path(os.path.relpath(__file__)).parent.parent.parent / "templates"
@lru_cache()

View file

@ -1 +0,0 @@
../../distributions/bedrock/build.yaml

View file

@ -0,0 +1,10 @@
name: bedrock
distribution_spec:
description: Use Amazon Bedrock APIs.
providers:
inference: remote::bedrock
memory: meta-reference
safety: meta-reference
agents: meta-reference
telemetry: meta-reference
image_type: conda

View file

@ -0,0 +1,10 @@
name: databricks
distribution_spec:
description: Use Databricks for running LLM inference
providers:
inference: remote::databricks
memory: meta-reference
safety: meta-reference
agents: meta-reference
telemetry: meta-reference
image_type: conda

View file

@ -1 +0,0 @@
../../distributions/fireworks/build.yaml

View file

@ -0,0 +1,10 @@
name: fireworks
distribution_spec:
description: Use Fireworks.ai for running LLM inference
providers:
inference: remote::fireworks
memory: meta-reference
safety: meta-reference
agents: meta-reference
telemetry: meta-reference
image_type: docker

View file

@ -1 +0,0 @@
../../distributions/hf-endpoint/build.yaml

View file

@ -0,0 +1,10 @@
name: hf-endpoint
distribution_spec:
description: "Like local, but use Hugging Face Inference Endpoints for running LLM inference.\nSee https://hf.co/docs/api-endpoints."
providers:
inference: remote::hf::endpoint
memory: meta-reference
safety: meta-reference
agents: meta-reference
telemetry: meta-reference
image_type: conda

View file

@ -1 +0,0 @@
../../distributions/hf-serverless/build.yaml

View file

@ -0,0 +1,10 @@
name: hf-serverless
distribution_spec:
description: "Like local, but use Hugging Face Inference API (serverless) for running LLM inference.\nSee https://hf.co/docs/api-inference."
providers:
inference: remote::hf::serverless
memory: meta-reference
safety: meta-reference
agents: meta-reference
telemetry: meta-reference
image_type: conda

View file

@ -1 +0,0 @@
../../distributions/meta-reference-gpu/build.yaml

View file

@ -0,0 +1,14 @@
name: meta-reference-gpu
distribution_spec:
docker_image: pytorch/pytorch
description: Use code from `llama_stack` itself to serve all llama stack APIs
providers:
inference: meta-reference
memory:
- meta-reference
- remote::chromadb
- remote::pgvector
safety: meta-reference
agents: meta-reference
telemetry: meta-reference
image_type: docker

View file

@ -0,0 +1,14 @@
name: meta-reference-quantized-gpu
distribution_spec:
docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
description: Use code from `llama_stack` itself to serve all llama stack APIs
providers:
inference: meta-reference-quantized
memory:
- meta-reference
- remote::chromadb
- remote::pgvector
safety: meta-reference
agents: meta-reference
telemetry: meta-reference
image_type: docker

View file

@ -1 +0,0 @@
../../distributions/ollama/build.yaml

View file

@ -0,0 +1,13 @@
name: ollama
distribution_spec:
description: Use ollama for running LLM inference
providers:
inference: remote::ollama
memory:
- meta-reference
- remote::chromadb
- remote::pgvector
safety: meta-reference
agents: meta-reference
telemetry: meta-reference
image_type: docker

View file

@ -1 +0,0 @@
../../distributions/tgi/build.yaml

View file

@ -0,0 +1,13 @@
name: tgi
distribution_spec:
description: Use TGI for running LLM inference
providers:
inference: remote::tgi
memory:
- meta-reference
- remote::chromadb
- remote::pgvector
safety: meta-reference
agents: meta-reference
telemetry: meta-reference
image_type: docker

View file

@ -1 +0,0 @@
../../distributions/together/build.yaml

View file

@ -0,0 +1,10 @@
name: together
distribution_spec:
description: Use Together.ai for running LLM inference
providers:
inference: remote::together
memory: remote::weaviate
safety: remote::together
agents: meta-reference
telemetry: meta-reference
image_type: docker

View file

@ -1 +0,0 @@
../../distributions/vllm/build.yaml

View file

@ -0,0 +1,10 @@
name: vllm
distribution_spec:
description: Like local, but use vLLM for running LLM inference
providers:
inference: vllm
memory: meta-reference
safety: meta-reference
agents: meta-reference
telemetry: meta-reference
image_type: conda