mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-29 15:23:51 +00:00
move build.yaml to templates, symlink in distributions
This commit is contained in:
parent
81ed0327f3
commit
56f9b7d5d6
23 changed files with 136 additions and 142 deletions
|
@ -1,10 +0,0 @@
|
||||||
name: bedrock
|
|
||||||
distribution_spec:
|
|
||||||
description: Use Amazon Bedrock APIs.
|
|
||||||
providers:
|
|
||||||
inference: remote::bedrock
|
|
||||||
memory: meta-reference
|
|
||||||
safety: meta-reference
|
|
||||||
agents: meta-reference
|
|
||||||
telemetry: meta-reference
|
|
||||||
image_type: conda
|
|
1
distributions/bedrock/build.yaml
Symbolic link
1
distributions/bedrock/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../llama_stack/templates/bedrock/build.yaml
|
|
@ -1,10 +0,0 @@
|
||||||
name: databricks
|
|
||||||
distribution_spec:
|
|
||||||
description: Use Databricks for running LLM inference
|
|
||||||
providers:
|
|
||||||
inference: remote::databricks
|
|
||||||
memory: meta-reference
|
|
||||||
safety: meta-reference
|
|
||||||
agents: meta-reference
|
|
||||||
telemetry: meta-reference
|
|
||||||
image_type: conda
|
|
1
distributions/databricks/build.yaml
Symbolic link
1
distributions/databricks/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../llama_stack/templates/databricks/build.yaml
|
|
@ -1,10 +0,0 @@
|
||||||
name: fireworks
|
|
||||||
distribution_spec:
|
|
||||||
description: Use Fireworks.ai for running LLM inference
|
|
||||||
providers:
|
|
||||||
inference: remote::fireworks
|
|
||||||
memory: meta-reference
|
|
||||||
safety: meta-reference
|
|
||||||
agents: meta-reference
|
|
||||||
telemetry: meta-reference
|
|
||||||
image_type: docker
|
|
1
distributions/fireworks/build.yaml
Symbolic link
1
distributions/fireworks/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../llama_stack/templates/fireworks/build.yaml
|
|
@ -1,10 +0,0 @@
|
||||||
name: hf-endpoint
|
|
||||||
distribution_spec:
|
|
||||||
description: "Like local, but use Hugging Face Inference Endpoints for running LLM inference.\nSee https://hf.co/docs/api-endpoints."
|
|
||||||
providers:
|
|
||||||
inference: remote::hf::endpoint
|
|
||||||
memory: meta-reference
|
|
||||||
safety: meta-reference
|
|
||||||
agents: meta-reference
|
|
||||||
telemetry: meta-reference
|
|
||||||
image_type: conda
|
|
1
distributions/hf-endpoint/build.yaml
Symbolic link
1
distributions/hf-endpoint/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../llama_stack/templates/hf-endpoint/build.yaml
|
|
@ -1,10 +0,0 @@
|
||||||
name: hf-serverless
|
|
||||||
distribution_spec:
|
|
||||||
description: "Like local, but use Hugging Face Inference API (serverless) for running LLM inference.\nSee https://hf.co/docs/api-inference."
|
|
||||||
providers:
|
|
||||||
inference: remote::hf::serverless
|
|
||||||
memory: meta-reference
|
|
||||||
safety: meta-reference
|
|
||||||
agents: meta-reference
|
|
||||||
telemetry: meta-reference
|
|
||||||
image_type: conda
|
|
1
distributions/hf-serverless/build.yaml
Symbolic link
1
distributions/hf-serverless/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../llama_stack/templates/hf-serverless/build.yaml
|
|
@ -1,14 +0,0 @@
|
||||||
name: meta-reference-gpu
|
|
||||||
distribution_spec:
|
|
||||||
docker_image: pytorch/pytorch
|
|
||||||
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
|
||||||
providers:
|
|
||||||
inference: meta-reference
|
|
||||||
memory:
|
|
||||||
- meta-reference
|
|
||||||
- remote::chromadb
|
|
||||||
- remote::pgvector
|
|
||||||
safety: meta-reference
|
|
||||||
agents: meta-reference
|
|
||||||
telemetry: meta-reference
|
|
||||||
image_type: docker
|
|
1
distributions/meta-reference-gpu/build.yaml
Symbolic link
1
distributions/meta-reference-gpu/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../llama_stack/templates/meta-reference-gpu/build.yaml
|
|
@ -1,14 +0,0 @@
|
||||||
name: meta-reference-quantized-gpu
|
|
||||||
distribution_spec:
|
|
||||||
docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
|
|
||||||
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
|
||||||
providers:
|
|
||||||
inference: meta-reference-quantized
|
|
||||||
memory:
|
|
||||||
- meta-reference
|
|
||||||
- remote::chromadb
|
|
||||||
- remote::pgvector
|
|
||||||
safety: meta-reference
|
|
||||||
agents: meta-reference
|
|
||||||
telemetry: meta-reference
|
|
||||||
image_type: docker
|
|
1
distributions/meta-reference-quantized-gpu/build.yaml
Symbolic link
1
distributions/meta-reference-quantized-gpu/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../llama_stack/templates/meta-reference-quantized-gpu/build.yaml
|
|
@ -1,13 +0,0 @@
|
||||||
name: ollama
|
|
||||||
distribution_spec:
|
|
||||||
description: Use ollama for running LLM inference
|
|
||||||
providers:
|
|
||||||
inference: remote::ollama
|
|
||||||
memory:
|
|
||||||
- meta-reference
|
|
||||||
- remote::chromadb
|
|
||||||
- remote::pgvector
|
|
||||||
safety: meta-reference
|
|
||||||
agents: meta-reference
|
|
||||||
telemetry: meta-reference
|
|
||||||
image_type: docker
|
|
1
distributions/ollama/build.yaml
Symbolic link
1
distributions/ollama/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../llama_stack/templates/ollama/build.yaml
|
|
@ -1,13 +0,0 @@
|
||||||
name: tgi
|
|
||||||
distribution_spec:
|
|
||||||
description: Use TGI for running LLM inference
|
|
||||||
providers:
|
|
||||||
inference: remote::tgi
|
|
||||||
memory:
|
|
||||||
- meta-reference
|
|
||||||
- remote::chromadb
|
|
||||||
- remote::pgvector
|
|
||||||
safety: meta-reference
|
|
||||||
agents: meta-reference
|
|
||||||
telemetry: meta-reference
|
|
||||||
image_type: docker
|
|
1
distributions/tgi/build.yaml
Symbolic link
1
distributions/tgi/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../llama_stack/templates/tgi/build.yaml
|
|
@ -1,10 +0,0 @@
|
||||||
name: together
|
|
||||||
distribution_spec:
|
|
||||||
description: Use Together.ai for running LLM inference
|
|
||||||
providers:
|
|
||||||
inference: remote::together
|
|
||||||
memory: remote::weaviate
|
|
||||||
safety: remote::together
|
|
||||||
agents: meta-reference
|
|
||||||
telemetry: meta-reference
|
|
||||||
image_type: docker
|
|
1
distributions/together/build.yaml
Symbolic link
1
distributions/together/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../llama_stack/templates/together/build.yaml
|
|
@ -1,10 +0,0 @@
|
||||||
name: vllm
|
|
||||||
distribution_spec:
|
|
||||||
description: Like local, but use vLLM for running LLM inference
|
|
||||||
providers:
|
|
||||||
inference: vllm
|
|
||||||
memory: meta-reference
|
|
||||||
safety: meta-reference
|
|
||||||
agents: meta-reference
|
|
||||||
telemetry: meta-reference
|
|
||||||
image_type: conda
|
|
1
distributions/vllm/build.yaml
Symbolic link
1
distributions/vllm/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../llama_stack/templates/vllm/build.yaml
|
|
@ -12,15 +12,7 @@ import os
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
TEMPLATES_PATH = (
|
TEMPLATES_PATH = Path(os.path.relpath(__file__)).parent.parent.parent / "templates"
|
||||||
Path(os.path.relpath(__file__)).parent.parent.parent.parent / "distributions----"
|
|
||||||
)
|
|
||||||
|
|
||||||
# build.yaml templates exist in the llama-stack/distributions while wheel installs llama-stack/llama_stack
|
|
||||||
# we copied the distributions folder to llama-stack/llama_stack/cli/distributions for wheel builds,
|
|
||||||
# so we need to check both locations
|
|
||||||
if not TEMPLATES_PATH.exists():
|
|
||||||
TEMPLATES_PATH = Path(os.path.relpath(__file__)).parent.parent.parent / "templates"
|
|
||||||
|
|
||||||
|
|
||||||
@lru_cache()
|
@lru_cache()
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
../../distributions/bedrock/build.yaml
|
|
10
llama_stack/templates/bedrock/build.yaml
Normal file
10
llama_stack/templates/bedrock/build.yaml
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
name: bedrock
|
||||||
|
distribution_spec:
|
||||||
|
description: Use Amazon Bedrock APIs.
|
||||||
|
providers:
|
||||||
|
inference: remote::bedrock
|
||||||
|
memory: meta-reference
|
||||||
|
safety: meta-reference
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
||||||
|
image_type: conda
|
10
llama_stack/templates/databricks/build.yaml
Normal file
10
llama_stack/templates/databricks/build.yaml
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
name: databricks
|
||||||
|
distribution_spec:
|
||||||
|
description: Use Databricks for running LLM inference
|
||||||
|
providers:
|
||||||
|
inference: remote::databricks
|
||||||
|
memory: meta-reference
|
||||||
|
safety: meta-reference
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
||||||
|
image_type: conda
|
|
@ -1 +0,0 @@
|
||||||
../../distributions/fireworks/build.yaml
|
|
10
llama_stack/templates/fireworks/build.yaml
Normal file
10
llama_stack/templates/fireworks/build.yaml
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
name: fireworks
|
||||||
|
distribution_spec:
|
||||||
|
description: Use Fireworks.ai for running LLM inference
|
||||||
|
providers:
|
||||||
|
inference: remote::fireworks
|
||||||
|
memory: meta-reference
|
||||||
|
safety: meta-reference
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
||||||
|
image_type: docker
|
|
@ -1 +0,0 @@
|
||||||
../../distributions/hf-endpoint/build.yaml
|
|
10
llama_stack/templates/hf-endpoint/build.yaml
Normal file
10
llama_stack/templates/hf-endpoint/build.yaml
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
name: hf-endpoint
|
||||||
|
distribution_spec:
|
||||||
|
description: "Like local, but use Hugging Face Inference Endpoints for running LLM inference.\nSee https://hf.co/docs/api-endpoints."
|
||||||
|
providers:
|
||||||
|
inference: remote::hf::endpoint
|
||||||
|
memory: meta-reference
|
||||||
|
safety: meta-reference
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
||||||
|
image_type: conda
|
|
@ -1 +0,0 @@
|
||||||
../../distributions/hf-serverless/build.yaml
|
|
10
llama_stack/templates/hf-serverless/build.yaml
Normal file
10
llama_stack/templates/hf-serverless/build.yaml
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
name: hf-serverless
|
||||||
|
distribution_spec:
|
||||||
|
description: "Like local, but use Hugging Face Inference API (serverless) for running LLM inference.\nSee https://hf.co/docs/api-inference."
|
||||||
|
providers:
|
||||||
|
inference: remote::hf::serverless
|
||||||
|
memory: meta-reference
|
||||||
|
safety: meta-reference
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
||||||
|
image_type: conda
|
|
@ -1 +0,0 @@
|
||||||
../../distributions/meta-reference-gpu/build.yaml
|
|
14
llama_stack/templates/meta-reference-gpu/build.yaml
Normal file
14
llama_stack/templates/meta-reference-gpu/build.yaml
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
name: meta-reference-gpu
|
||||||
|
distribution_spec:
|
||||||
|
docker_image: pytorch/pytorch
|
||||||
|
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
||||||
|
providers:
|
||||||
|
inference: meta-reference
|
||||||
|
memory:
|
||||||
|
- meta-reference
|
||||||
|
- remote::chromadb
|
||||||
|
- remote::pgvector
|
||||||
|
safety: meta-reference
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
||||||
|
image_type: docker
|
|
@ -0,0 +1,14 @@
|
||||||
|
name: meta-reference-quantized-gpu
|
||||||
|
distribution_spec:
|
||||||
|
docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
|
||||||
|
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
||||||
|
providers:
|
||||||
|
inference: meta-reference-quantized
|
||||||
|
memory:
|
||||||
|
- meta-reference
|
||||||
|
- remote::chromadb
|
||||||
|
- remote::pgvector
|
||||||
|
safety: meta-reference
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
||||||
|
image_type: docker
|
|
@ -1 +0,0 @@
|
||||||
../../distributions/ollama/build.yaml
|
|
13
llama_stack/templates/ollama/build.yaml
Normal file
13
llama_stack/templates/ollama/build.yaml
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
name: ollama
|
||||||
|
distribution_spec:
|
||||||
|
description: Use ollama for running LLM inference
|
||||||
|
providers:
|
||||||
|
inference: remote::ollama
|
||||||
|
memory:
|
||||||
|
- meta-reference
|
||||||
|
- remote::chromadb
|
||||||
|
- remote::pgvector
|
||||||
|
safety: meta-reference
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
||||||
|
image_type: docker
|
|
@ -1 +0,0 @@
|
||||||
../../distributions/tgi/build.yaml
|
|
13
llama_stack/templates/tgi/build.yaml
Normal file
13
llama_stack/templates/tgi/build.yaml
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
name: tgi
|
||||||
|
distribution_spec:
|
||||||
|
description: Use TGI for running LLM inference
|
||||||
|
providers:
|
||||||
|
inference: remote::tgi
|
||||||
|
memory:
|
||||||
|
- meta-reference
|
||||||
|
- remote::chromadb
|
||||||
|
- remote::pgvector
|
||||||
|
safety: meta-reference
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
||||||
|
image_type: docker
|
|
@ -1 +0,0 @@
|
||||||
../../distributions/together/build.yaml
|
|
10
llama_stack/templates/together/build.yaml
Normal file
10
llama_stack/templates/together/build.yaml
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
name: together
|
||||||
|
distribution_spec:
|
||||||
|
description: Use Together.ai for running LLM inference
|
||||||
|
providers:
|
||||||
|
inference: remote::together
|
||||||
|
memory: remote::weaviate
|
||||||
|
safety: remote::together
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
||||||
|
image_type: docker
|
|
@ -1 +0,0 @@
|
||||||
../../distributions/vllm/build.yaml
|
|
10
llama_stack/templates/vllm/build.yaml
Normal file
10
llama_stack/templates/vllm/build.yaml
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
name: vllm
|
||||||
|
distribution_spec:
|
||||||
|
description: Like local, but use vLLM for running LLM inference
|
||||||
|
providers:
|
||||||
|
inference: vllm
|
||||||
|
memory: meta-reference
|
||||||
|
safety: meta-reference
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
||||||
|
image_type: conda
|
Loading…
Add table
Add a link
Reference in a new issue