From 56f9b7d5d6654462ac851a4c0183e91cc892456e Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Fri, 25 Oct 2024 11:54:09 -0700 Subject: [PATCH] move build.yaml to templates, symlink in distributions --- distributions/bedrock/build.yaml | 11 +---------- distributions/databricks/build.yaml | 11 +---------- distributions/fireworks/build.yaml | 11 +---------- distributions/hf-endpoint/build.yaml | 11 +---------- distributions/hf-serverless/build.yaml | 11 +---------- distributions/meta-reference-gpu/build.yaml | 15 +-------------- .../meta-reference-quantized-gpu/build.yaml | 15 +-------------- distributions/ollama/build.yaml | 14 +------------- distributions/tgi/build.yaml | 14 +------------- distributions/together/build.yaml | 11 +---------- distributions/vllm/build.yaml | 11 +---------- llama_stack/cli/stack/build.py | 10 +--------- llama_stack/templates/bedrock/build.yaml | 11 ++++++++++- llama_stack/templates/databricks/build.yaml | 10 ++++++++++ llama_stack/templates/fireworks/build.yaml | 11 ++++++++++- llama_stack/templates/hf-endpoint/build.yaml | 11 ++++++++++- llama_stack/templates/hf-serverless/build.yaml | 11 ++++++++++- .../templates/meta-reference-gpu/build.yaml | 15 ++++++++++++++- .../meta-reference-quantized-gpu/build.yaml | 14 ++++++++++++++ llama_stack/templates/ollama/build.yaml | 14 +++++++++++++- llama_stack/templates/tgi/build.yaml | 14 +++++++++++++- llama_stack/templates/together/build.yaml | 11 ++++++++++- llama_stack/templates/vllm/build.yaml | 11 ++++++++++- 23 files changed, 136 insertions(+), 142 deletions(-) mode change 100644 => 120000 distributions/bedrock/build.yaml mode change 100644 => 120000 distributions/databricks/build.yaml mode change 100644 => 120000 distributions/fireworks/build.yaml mode change 100644 => 120000 distributions/hf-endpoint/build.yaml mode change 100644 => 120000 distributions/hf-serverless/build.yaml mode change 100644 => 120000 distributions/meta-reference-gpu/build.yaml mode change 100644 => 120000 distributions/meta-reference-quantized-gpu/build.yaml mode change 100644 => 120000 distributions/ollama/build.yaml mode change 100644 => 120000 distributions/tgi/build.yaml mode change 100644 => 120000 distributions/together/build.yaml mode change 100644 => 120000 distributions/vllm/build.yaml mode change 120000 => 100644 llama_stack/templates/bedrock/build.yaml create mode 100644 llama_stack/templates/databricks/build.yaml mode change 120000 => 100644 llama_stack/templates/fireworks/build.yaml mode change 120000 => 100644 llama_stack/templates/hf-endpoint/build.yaml mode change 120000 => 100644 llama_stack/templates/hf-serverless/build.yaml mode change 120000 => 100644 llama_stack/templates/meta-reference-gpu/build.yaml create mode 100644 llama_stack/templates/meta-reference-quantized-gpu/build.yaml mode change 120000 => 100644 llama_stack/templates/ollama/build.yaml mode change 120000 => 100644 llama_stack/templates/tgi/build.yaml mode change 120000 => 100644 llama_stack/templates/together/build.yaml mode change 120000 => 100644 llama_stack/templates/vllm/build.yaml diff --git a/distributions/bedrock/build.yaml b/distributions/bedrock/build.yaml deleted file mode 100644 index ae7b27d49..000000000 --- a/distributions/bedrock/build.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: bedrock -distribution_spec: - description: Use Amazon Bedrock APIs. - providers: - inference: remote::bedrock - memory: meta-reference - safety: meta-reference - agents: meta-reference - telemetry: meta-reference -image_type: conda diff --git a/distributions/bedrock/build.yaml b/distributions/bedrock/build.yaml new file mode 120000 index 000000000..72402ef8d --- /dev/null +++ b/distributions/bedrock/build.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/bedrock/build.yaml \ No newline at end of file diff --git a/distributions/databricks/build.yaml b/distributions/databricks/build.yaml deleted file mode 100644 index 2188dd0a0..000000000 --- a/distributions/databricks/build.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: databricks -distribution_spec: - description: Use Databricks for running LLM inference - providers: - inference: remote::databricks - memory: meta-reference - safety: meta-reference - agents: meta-reference - telemetry: meta-reference -image_type: conda diff --git a/distributions/databricks/build.yaml b/distributions/databricks/build.yaml new file mode 120000 index 000000000..66342fe6f --- /dev/null +++ b/distributions/databricks/build.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/databricks/build.yaml \ No newline at end of file diff --git a/distributions/fireworks/build.yaml b/distributions/fireworks/build.yaml deleted file mode 100644 index 2e5cf0753..000000000 --- a/distributions/fireworks/build.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: fireworks -distribution_spec: - description: Use Fireworks.ai for running LLM inference - providers: - inference: remote::fireworks - memory: meta-reference - safety: meta-reference - agents: meta-reference - telemetry: meta-reference -image_type: docker diff --git a/distributions/fireworks/build.yaml b/distributions/fireworks/build.yaml new file mode 120000 index 000000000..32a5bd869 --- /dev/null +++ b/distributions/fireworks/build.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/fireworks/build.yaml \ No newline at end of file diff --git a/distributions/hf-endpoint/build.yaml b/distributions/hf-endpoint/build.yaml deleted file mode 100644 index 750bebcb5..000000000 --- a/distributions/hf-endpoint/build.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: hf-endpoint -distribution_spec: - description: "Like local, but use Hugging Face Inference Endpoints for running LLM inference.\nSee https://hf.co/docs/api-endpoints." - providers: - inference: remote::hf::endpoint - memory: meta-reference - safety: meta-reference - agents: meta-reference - telemetry: meta-reference -image_type: conda diff --git a/distributions/hf-endpoint/build.yaml b/distributions/hf-endpoint/build.yaml new file mode 120000 index 000000000..a73c70c05 --- /dev/null +++ b/distributions/hf-endpoint/build.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/hf-endpoint/build.yaml \ No newline at end of file diff --git a/distributions/hf-serverless/build.yaml b/distributions/hf-serverless/build.yaml deleted file mode 100644 index f6da3ad4d..000000000 --- a/distributions/hf-serverless/build.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: hf-serverless -distribution_spec: - description: "Like local, but use Hugging Face Inference API (serverless) for running LLM inference.\nSee https://hf.co/docs/api-inference." - providers: - inference: remote::hf::serverless - memory: meta-reference - safety: meta-reference - agents: meta-reference - telemetry: meta-reference -image_type: conda diff --git a/distributions/hf-serverless/build.yaml b/distributions/hf-serverless/build.yaml new file mode 120000 index 000000000..f2db0fd55 --- /dev/null +++ b/distributions/hf-serverless/build.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/hf-serverless/build.yaml \ No newline at end of file diff --git a/distributions/meta-reference-gpu/build.yaml b/distributions/meta-reference-gpu/build.yaml deleted file mode 100644 index 5b1521a92..000000000 --- a/distributions/meta-reference-gpu/build.yaml +++ /dev/null @@ -1,14 +0,0 @@ -name: meta-reference-gpu -distribution_spec: - docker_image: pytorch/pytorch - description: Use code from `llama_stack` itself to serve all llama stack APIs - providers: - inference: meta-reference - memory: - - meta-reference - - remote::chromadb - - remote::pgvector - safety: meta-reference - agents: meta-reference - telemetry: meta-reference -image_type: docker diff --git a/distributions/meta-reference-gpu/build.yaml b/distributions/meta-reference-gpu/build.yaml new file mode 120000 index 000000000..4418195eb --- /dev/null +++ b/distributions/meta-reference-gpu/build.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/meta-reference-gpu/build.yaml \ No newline at end of file diff --git a/distributions/meta-reference-quantized-gpu/build.yaml b/distributions/meta-reference-quantized-gpu/build.yaml deleted file mode 100644 index e9ddb4aad..000000000 --- a/distributions/meta-reference-quantized-gpu/build.yaml +++ /dev/null @@ -1,14 +0,0 @@ -name: meta-reference-quantized-gpu -distribution_spec: - docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime - description: Use code from `llama_stack` itself to serve all llama stack APIs - providers: - inference: meta-reference-quantized - memory: - - meta-reference - - remote::chromadb - - remote::pgvector - safety: meta-reference - agents: meta-reference - telemetry: meta-reference -image_type: docker diff --git a/distributions/meta-reference-quantized-gpu/build.yaml b/distributions/meta-reference-quantized-gpu/build.yaml new file mode 120000 index 000000000..f3dbe996f --- /dev/null +++ b/distributions/meta-reference-quantized-gpu/build.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/meta-reference-quantized-gpu/build.yaml \ No newline at end of file diff --git a/distributions/ollama/build.yaml b/distributions/ollama/build.yaml deleted file mode 100644 index c27f40929..000000000 --- a/distributions/ollama/build.yaml +++ /dev/null @@ -1,13 +0,0 @@ -name: ollama -distribution_spec: - description: Use ollama for running LLM inference - providers: - inference: remote::ollama - memory: - - meta-reference - - remote::chromadb - - remote::pgvector - safety: meta-reference - agents: meta-reference - telemetry: meta-reference -image_type: docker diff --git a/distributions/ollama/build.yaml b/distributions/ollama/build.yaml new file mode 120000 index 000000000..8772548e0 --- /dev/null +++ b/distributions/ollama/build.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/ollama/build.yaml \ No newline at end of file diff --git a/distributions/tgi/build.yaml b/distributions/tgi/build.yaml deleted file mode 100644 index 2c0ca1d33..000000000 --- a/distributions/tgi/build.yaml +++ /dev/null @@ -1,13 +0,0 @@ -name: tgi -distribution_spec: - description: Use TGI for running LLM inference - providers: - inference: remote::tgi - memory: - - meta-reference - - remote::chromadb - - remote::pgvector - safety: meta-reference - agents: meta-reference - telemetry: meta-reference -image_type: docker diff --git a/distributions/tgi/build.yaml b/distributions/tgi/build.yaml new file mode 120000 index 000000000..73e59ad84 --- /dev/null +++ b/distributions/tgi/build.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/tgi/build.yaml \ No newline at end of file diff --git a/distributions/together/build.yaml b/distributions/together/build.yaml deleted file mode 100644 index 49eab859d..000000000 --- a/distributions/together/build.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: together -distribution_spec: - description: Use Together.ai for running LLM inference - providers: - inference: remote::together - memory: remote::weaviate - safety: remote::together - agents: meta-reference - telemetry: meta-reference -image_type: docker diff --git a/distributions/together/build.yaml b/distributions/together/build.yaml new file mode 120000 index 000000000..3877a9c96 --- /dev/null +++ b/distributions/together/build.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/together/build.yaml \ No newline at end of file diff --git a/distributions/vllm/build.yaml b/distributions/vllm/build.yaml deleted file mode 100644 index f41352eb1..000000000 --- a/distributions/vllm/build.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: vllm -distribution_spec: - description: Like local, but use vLLM for running LLM inference - providers: - inference: vllm - memory: meta-reference - safety: meta-reference - agents: meta-reference - telemetry: meta-reference -image_type: conda \ No newline at end of file diff --git a/distributions/vllm/build.yaml b/distributions/vllm/build.yaml new file mode 120000 index 000000000..dfc9401b6 --- /dev/null +++ b/distributions/vllm/build.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/vllm/build.yaml \ No newline at end of file diff --git a/llama_stack/cli/stack/build.py b/llama_stack/cli/stack/build.py index fde471986..2273e67fb 100644 --- a/llama_stack/cli/stack/build.py +++ b/llama_stack/cli/stack/build.py @@ -12,15 +12,7 @@ import os from functools import lru_cache from pathlib import Path -TEMPLATES_PATH = ( - Path(os.path.relpath(__file__)).parent.parent.parent.parent / "distributions----" -) - -# build.yaml templates exist in the llama-stack/distributions while wheel installs llama-stack/llama_stack -# we copied the distributions folder to llama-stack/llama_stack/cli/distributions for wheel builds, -# so we need to check both locations -if not TEMPLATES_PATH.exists(): - TEMPLATES_PATH = Path(os.path.relpath(__file__)).parent.parent.parent / "templates" +TEMPLATES_PATH = Path(os.path.relpath(__file__)).parent.parent.parent / "templates" @lru_cache() diff --git a/llama_stack/templates/bedrock/build.yaml b/llama_stack/templates/bedrock/build.yaml deleted file mode 120000 index 28a253c78..000000000 --- a/llama_stack/templates/bedrock/build.yaml +++ /dev/null @@ -1 +0,0 @@ -../../distributions/bedrock/build.yaml \ No newline at end of file diff --git a/llama_stack/templates/bedrock/build.yaml b/llama_stack/templates/bedrock/build.yaml new file mode 100644 index 000000000..ae7b27d49 --- /dev/null +++ b/llama_stack/templates/bedrock/build.yaml @@ -0,0 +1,10 @@ +name: bedrock +distribution_spec: + description: Use Amazon Bedrock APIs. + providers: + inference: remote::bedrock + memory: meta-reference + safety: meta-reference + agents: meta-reference + telemetry: meta-reference +image_type: conda diff --git a/llama_stack/templates/databricks/build.yaml b/llama_stack/templates/databricks/build.yaml new file mode 100644 index 000000000..2188dd0a0 --- /dev/null +++ b/llama_stack/templates/databricks/build.yaml @@ -0,0 +1,10 @@ +name: databricks +distribution_spec: + description: Use Databricks for running LLM inference + providers: + inference: remote::databricks + memory: meta-reference + safety: meta-reference + agents: meta-reference + telemetry: meta-reference +image_type: conda diff --git a/llama_stack/templates/fireworks/build.yaml b/llama_stack/templates/fireworks/build.yaml deleted file mode 120000 index 14e6cb2fe..000000000 --- a/llama_stack/templates/fireworks/build.yaml +++ /dev/null @@ -1 +0,0 @@ -../../distributions/fireworks/build.yaml \ No newline at end of file diff --git a/llama_stack/templates/fireworks/build.yaml b/llama_stack/templates/fireworks/build.yaml new file mode 100644 index 000000000..2e5cf0753 --- /dev/null +++ b/llama_stack/templates/fireworks/build.yaml @@ -0,0 +1,10 @@ +name: fireworks +distribution_spec: + description: Use Fireworks.ai for running LLM inference + providers: + inference: remote::fireworks + memory: meta-reference + safety: meta-reference + agents: meta-reference + telemetry: meta-reference +image_type: docker diff --git a/llama_stack/templates/hf-endpoint/build.yaml b/llama_stack/templates/hf-endpoint/build.yaml deleted file mode 120000 index 469d4da77..000000000 --- a/llama_stack/templates/hf-endpoint/build.yaml +++ /dev/null @@ -1 +0,0 @@ -../../distributions/hf-endpoint/build.yaml \ No newline at end of file diff --git a/llama_stack/templates/hf-endpoint/build.yaml b/llama_stack/templates/hf-endpoint/build.yaml new file mode 100644 index 000000000..750bebcb5 --- /dev/null +++ b/llama_stack/templates/hf-endpoint/build.yaml @@ -0,0 +1,10 @@ +name: hf-endpoint +distribution_spec: + description: "Like local, but use Hugging Face Inference Endpoints for running LLM inference.\nSee https://hf.co/docs/api-endpoints." + providers: + inference: remote::hf::endpoint + memory: meta-reference + safety: meta-reference + agents: meta-reference + telemetry: meta-reference +image_type: conda diff --git a/llama_stack/templates/hf-serverless/build.yaml b/llama_stack/templates/hf-serverless/build.yaml deleted file mode 120000 index 6f14f7bc4..000000000 --- a/llama_stack/templates/hf-serverless/build.yaml +++ /dev/null @@ -1 +0,0 @@ -../../distributions/hf-serverless/build.yaml \ No newline at end of file diff --git a/llama_stack/templates/hf-serverless/build.yaml b/llama_stack/templates/hf-serverless/build.yaml new file mode 100644 index 000000000..f6da3ad4d --- /dev/null +++ b/llama_stack/templates/hf-serverless/build.yaml @@ -0,0 +1,10 @@ +name: hf-serverless +distribution_spec: + description: "Like local, but use Hugging Face Inference API (serverless) for running LLM inference.\nSee https://hf.co/docs/api-inference." + providers: + inference: remote::hf::serverless + memory: meta-reference + safety: meta-reference + agents: meta-reference + telemetry: meta-reference +image_type: conda diff --git a/llama_stack/templates/meta-reference-gpu/build.yaml b/llama_stack/templates/meta-reference-gpu/build.yaml deleted file mode 120000 index 7a7c35fb5..000000000 --- a/llama_stack/templates/meta-reference-gpu/build.yaml +++ /dev/null @@ -1 +0,0 @@ -../../distributions/meta-reference-gpu/build.yaml \ No newline at end of file diff --git a/llama_stack/templates/meta-reference-gpu/build.yaml b/llama_stack/templates/meta-reference-gpu/build.yaml new file mode 100644 index 000000000..5b1521a92 --- /dev/null +++ b/llama_stack/templates/meta-reference-gpu/build.yaml @@ -0,0 +1,14 @@ +name: meta-reference-gpu +distribution_spec: + docker_image: pytorch/pytorch + description: Use code from `llama_stack` itself to serve all llama stack APIs + providers: + inference: meta-reference + memory: + - meta-reference + - remote::chromadb + - remote::pgvector + safety: meta-reference + agents: meta-reference + telemetry: meta-reference +image_type: docker diff --git a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml new file mode 100644 index 000000000..e9ddb4aad --- /dev/null +++ b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml @@ -0,0 +1,14 @@ +name: meta-reference-quantized-gpu +distribution_spec: + docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime + description: Use code from `llama_stack` itself to serve all llama stack APIs + providers: + inference: meta-reference-quantized + memory: + - meta-reference + - remote::chromadb + - remote::pgvector + safety: meta-reference + agents: meta-reference + telemetry: meta-reference +image_type: docker diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml deleted file mode 120000 index d8bf8a15d..000000000 --- a/llama_stack/templates/ollama/build.yaml +++ /dev/null @@ -1 +0,0 @@ -../../distributions/ollama/build.yaml \ No newline at end of file diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml new file mode 100644 index 000000000..c27f40929 --- /dev/null +++ b/llama_stack/templates/ollama/build.yaml @@ -0,0 +1,13 @@ +name: ollama +distribution_spec: + description: Use ollama for running LLM inference + providers: + inference: remote::ollama + memory: + - meta-reference + - remote::chromadb + - remote::pgvector + safety: meta-reference + agents: meta-reference + telemetry: meta-reference +image_type: docker diff --git a/llama_stack/templates/tgi/build.yaml b/llama_stack/templates/tgi/build.yaml deleted file mode 120000 index b3144911c..000000000 --- a/llama_stack/templates/tgi/build.yaml +++ /dev/null @@ -1 +0,0 @@ -../../distributions/tgi/build.yaml \ No newline at end of file diff --git a/llama_stack/templates/tgi/build.yaml b/llama_stack/templates/tgi/build.yaml new file mode 100644 index 000000000..2c0ca1d33 --- /dev/null +++ b/llama_stack/templates/tgi/build.yaml @@ -0,0 +1,13 @@ +name: tgi +distribution_spec: + description: Use TGI for running LLM inference + providers: + inference: remote::tgi + memory: + - meta-reference + - remote::chromadb + - remote::pgvector + safety: meta-reference + agents: meta-reference + telemetry: meta-reference +image_type: docker diff --git a/llama_stack/templates/together/build.yaml b/llama_stack/templates/together/build.yaml deleted file mode 120000 index 69bc4510a..000000000 --- a/llama_stack/templates/together/build.yaml +++ /dev/null @@ -1 +0,0 @@ -../../distributions/together/build.yaml \ No newline at end of file diff --git a/llama_stack/templates/together/build.yaml b/llama_stack/templates/together/build.yaml new file mode 100644 index 000000000..49eab859d --- /dev/null +++ b/llama_stack/templates/together/build.yaml @@ -0,0 +1,10 @@ +name: together +distribution_spec: + description: Use Together.ai for running LLM inference + providers: + inference: remote::together + memory: remote::weaviate + safety: remote::together + agents: meta-reference + telemetry: meta-reference +image_type: docker diff --git a/llama_stack/templates/vllm/build.yaml b/llama_stack/templates/vllm/build.yaml deleted file mode 120000 index 8152145d3..000000000 --- a/llama_stack/templates/vllm/build.yaml +++ /dev/null @@ -1 +0,0 @@ -../../distributions/vllm/build.yaml \ No newline at end of file diff --git a/llama_stack/templates/vllm/build.yaml b/llama_stack/templates/vllm/build.yaml new file mode 100644 index 000000000..f41352eb1 --- /dev/null +++ b/llama_stack/templates/vllm/build.yaml @@ -0,0 +1,10 @@ +name: vllm +distribution_spec: + description: Like local, but use vLLM for running LLM inference + providers: + inference: vllm + memory: meta-reference + safety: meta-reference + agents: meta-reference + telemetry: meta-reference +image_type: conda \ No newline at end of file