mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-29 15:23:51 +00:00
move build.yaml to templates, symlink in distributions
This commit is contained in:
parent
81ed0327f3
commit
56f9b7d5d6
23 changed files with 136 additions and 142 deletions
|
@ -1,10 +0,0 @@
|
|||
name: bedrock
|
||||
distribution_spec:
|
||||
description: Use Amazon Bedrock APIs.
|
||||
providers:
|
||||
inference: remote::bedrock
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: conda
|
1
distributions/bedrock/build.yaml
Symbolic link
1
distributions/bedrock/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../llama_stack/templates/bedrock/build.yaml
|
|
@ -1,10 +0,0 @@
|
|||
name: databricks
|
||||
distribution_spec:
|
||||
description: Use Databricks for running LLM inference
|
||||
providers:
|
||||
inference: remote::databricks
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: conda
|
1
distributions/databricks/build.yaml
Symbolic link
1
distributions/databricks/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../llama_stack/templates/databricks/build.yaml
|
|
@ -1,10 +0,0 @@
|
|||
name: fireworks
|
||||
distribution_spec:
|
||||
description: Use Fireworks.ai for running LLM inference
|
||||
providers:
|
||||
inference: remote::fireworks
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: docker
|
1
distributions/fireworks/build.yaml
Symbolic link
1
distributions/fireworks/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../llama_stack/templates/fireworks/build.yaml
|
|
@ -1,10 +0,0 @@
|
|||
name: hf-endpoint
|
||||
distribution_spec:
|
||||
description: "Like local, but use Hugging Face Inference Endpoints for running LLM inference.\nSee https://hf.co/docs/api-endpoints."
|
||||
providers:
|
||||
inference: remote::hf::endpoint
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: conda
|
1
distributions/hf-endpoint/build.yaml
Symbolic link
1
distributions/hf-endpoint/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../llama_stack/templates/hf-endpoint/build.yaml
|
|
@ -1,10 +0,0 @@
|
|||
name: hf-serverless
|
||||
distribution_spec:
|
||||
description: "Like local, but use Hugging Face Inference API (serverless) for running LLM inference.\nSee https://hf.co/docs/api-inference."
|
||||
providers:
|
||||
inference: remote::hf::serverless
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: conda
|
1
distributions/hf-serverless/build.yaml
Symbolic link
1
distributions/hf-serverless/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../llama_stack/templates/hf-serverless/build.yaml
|
|
@ -1,14 +0,0 @@
|
|||
name: meta-reference-gpu
|
||||
distribution_spec:
|
||||
docker_image: pytorch/pytorch
|
||||
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
||||
providers:
|
||||
inference: meta-reference
|
||||
memory:
|
||||
- meta-reference
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: docker
|
1
distributions/meta-reference-gpu/build.yaml
Symbolic link
1
distributions/meta-reference-gpu/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../llama_stack/templates/meta-reference-gpu/build.yaml
|
|
@ -1,14 +0,0 @@
|
|||
name: meta-reference-quantized-gpu
|
||||
distribution_spec:
|
||||
docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
|
||||
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
||||
providers:
|
||||
inference: meta-reference-quantized
|
||||
memory:
|
||||
- meta-reference
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: docker
|
1
distributions/meta-reference-quantized-gpu/build.yaml
Symbolic link
1
distributions/meta-reference-quantized-gpu/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../llama_stack/templates/meta-reference-quantized-gpu/build.yaml
|
|
@ -1,13 +0,0 @@
|
|||
name: ollama
|
||||
distribution_spec:
|
||||
description: Use ollama for running LLM inference
|
||||
providers:
|
||||
inference: remote::ollama
|
||||
memory:
|
||||
- meta-reference
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: docker
|
1
distributions/ollama/build.yaml
Symbolic link
1
distributions/ollama/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../llama_stack/templates/ollama/build.yaml
|
|
@ -1,13 +0,0 @@
|
|||
name: tgi
|
||||
distribution_spec:
|
||||
description: Use TGI for running LLM inference
|
||||
providers:
|
||||
inference: remote::tgi
|
||||
memory:
|
||||
- meta-reference
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: docker
|
1
distributions/tgi/build.yaml
Symbolic link
1
distributions/tgi/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../llama_stack/templates/tgi/build.yaml
|
|
@ -1,10 +0,0 @@
|
|||
name: together
|
||||
distribution_spec:
|
||||
description: Use Together.ai for running LLM inference
|
||||
providers:
|
||||
inference: remote::together
|
||||
memory: remote::weaviate
|
||||
safety: remote::together
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: docker
|
1
distributions/together/build.yaml
Symbolic link
1
distributions/together/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../llama_stack/templates/together/build.yaml
|
|
@ -1,10 +0,0 @@
|
|||
name: vllm
|
||||
distribution_spec:
|
||||
description: Like local, but use vLLM for running LLM inference
|
||||
providers:
|
||||
inference: vllm
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: conda
|
1
distributions/vllm/build.yaml
Symbolic link
1
distributions/vllm/build.yaml
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../llama_stack/templates/vllm/build.yaml
|
|
@ -12,15 +12,7 @@ import os
|
|||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
|
||||
TEMPLATES_PATH = (
|
||||
Path(os.path.relpath(__file__)).parent.parent.parent.parent / "distributions----"
|
||||
)
|
||||
|
||||
# build.yaml templates exist in the llama-stack/distributions while wheel installs llama-stack/llama_stack
|
||||
# we copied the distributions folder to llama-stack/llama_stack/cli/distributions for wheel builds,
|
||||
# so we need to check both locations
|
||||
if not TEMPLATES_PATH.exists():
|
||||
TEMPLATES_PATH = Path(os.path.relpath(__file__)).parent.parent.parent / "templates"
|
||||
TEMPLATES_PATH = Path(os.path.relpath(__file__)).parent.parent.parent / "templates"
|
||||
|
||||
|
||||
@lru_cache()
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
../../distributions/bedrock/build.yaml
|
10
llama_stack/templates/bedrock/build.yaml
Normal file
10
llama_stack/templates/bedrock/build.yaml
Normal file
|
@ -0,0 +1,10 @@
|
|||
name: bedrock
|
||||
distribution_spec:
|
||||
description: Use Amazon Bedrock APIs.
|
||||
providers:
|
||||
inference: remote::bedrock
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: conda
|
10
llama_stack/templates/databricks/build.yaml
Normal file
10
llama_stack/templates/databricks/build.yaml
Normal file
|
@ -0,0 +1,10 @@
|
|||
name: databricks
|
||||
distribution_spec:
|
||||
description: Use Databricks for running LLM inference
|
||||
providers:
|
||||
inference: remote::databricks
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: conda
|
|
@ -1 +0,0 @@
|
|||
../../distributions/fireworks/build.yaml
|
10
llama_stack/templates/fireworks/build.yaml
Normal file
10
llama_stack/templates/fireworks/build.yaml
Normal file
|
@ -0,0 +1,10 @@
|
|||
name: fireworks
|
||||
distribution_spec:
|
||||
description: Use Fireworks.ai for running LLM inference
|
||||
providers:
|
||||
inference: remote::fireworks
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: docker
|
|
@ -1 +0,0 @@
|
|||
../../distributions/hf-endpoint/build.yaml
|
10
llama_stack/templates/hf-endpoint/build.yaml
Normal file
10
llama_stack/templates/hf-endpoint/build.yaml
Normal file
|
@ -0,0 +1,10 @@
|
|||
name: hf-endpoint
|
||||
distribution_spec:
|
||||
description: "Like local, but use Hugging Face Inference Endpoints for running LLM inference.\nSee https://hf.co/docs/api-endpoints."
|
||||
providers:
|
||||
inference: remote::hf::endpoint
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: conda
|
|
@ -1 +0,0 @@
|
|||
../../distributions/hf-serverless/build.yaml
|
10
llama_stack/templates/hf-serverless/build.yaml
Normal file
10
llama_stack/templates/hf-serverless/build.yaml
Normal file
|
@ -0,0 +1,10 @@
|
|||
name: hf-serverless
|
||||
distribution_spec:
|
||||
description: "Like local, but use Hugging Face Inference API (serverless) for running LLM inference.\nSee https://hf.co/docs/api-inference."
|
||||
providers:
|
||||
inference: remote::hf::serverless
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: conda
|
|
@ -1 +0,0 @@
|
|||
../../distributions/meta-reference-gpu/build.yaml
|
14
llama_stack/templates/meta-reference-gpu/build.yaml
Normal file
14
llama_stack/templates/meta-reference-gpu/build.yaml
Normal file
|
@ -0,0 +1,14 @@
|
|||
name: meta-reference-gpu
|
||||
distribution_spec:
|
||||
docker_image: pytorch/pytorch
|
||||
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
||||
providers:
|
||||
inference: meta-reference
|
||||
memory:
|
||||
- meta-reference
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: docker
|
|
@ -0,0 +1,14 @@
|
|||
name: meta-reference-quantized-gpu
|
||||
distribution_spec:
|
||||
docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
|
||||
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
||||
providers:
|
||||
inference: meta-reference-quantized
|
||||
memory:
|
||||
- meta-reference
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: docker
|
|
@ -1 +0,0 @@
|
|||
../../distributions/ollama/build.yaml
|
13
llama_stack/templates/ollama/build.yaml
Normal file
13
llama_stack/templates/ollama/build.yaml
Normal file
|
@ -0,0 +1,13 @@
|
|||
name: ollama
|
||||
distribution_spec:
|
||||
description: Use ollama for running LLM inference
|
||||
providers:
|
||||
inference: remote::ollama
|
||||
memory:
|
||||
- meta-reference
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: docker
|
|
@ -1 +0,0 @@
|
|||
../../distributions/tgi/build.yaml
|
13
llama_stack/templates/tgi/build.yaml
Normal file
13
llama_stack/templates/tgi/build.yaml
Normal file
|
@ -0,0 +1,13 @@
|
|||
name: tgi
|
||||
distribution_spec:
|
||||
description: Use TGI for running LLM inference
|
||||
providers:
|
||||
inference: remote::tgi
|
||||
memory:
|
||||
- meta-reference
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: docker
|
|
@ -1 +0,0 @@
|
|||
../../distributions/together/build.yaml
|
10
llama_stack/templates/together/build.yaml
Normal file
10
llama_stack/templates/together/build.yaml
Normal file
|
@ -0,0 +1,10 @@
|
|||
name: together
|
||||
distribution_spec:
|
||||
description: Use Together.ai for running LLM inference
|
||||
providers:
|
||||
inference: remote::together
|
||||
memory: remote::weaviate
|
||||
safety: remote::together
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: docker
|
|
@ -1 +0,0 @@
|
|||
../../distributions/vllm/build.yaml
|
10
llama_stack/templates/vllm/build.yaml
Normal file
10
llama_stack/templates/vllm/build.yaml
Normal file
|
@ -0,0 +1,10 @@
|
|||
name: vllm
|
||||
distribution_spec:
|
||||
description: Like local, but use vLLM for running LLM inference
|
||||
providers:
|
||||
inference: vllm
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: conda
|
Loading…
Add table
Add a link
Reference in a new issue