From 19adb4070abdebc826bf3de4ec3061fcec03d3e5 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Fri, 25 Oct 2024 12:08:32 -0700 Subject: [PATCH] add build files to templates --- llama_stack/templates/bedrock/build.yaml | 10 ++++++++++ llama_stack/templates/databricks/build.yaml | 10 ++++++++++ llama_stack/templates/fireworks/build.yaml | 10 ++++++++++ llama_stack/templates/hf-endpoint/build.yaml | 10 ++++++++++ llama_stack/templates/hf-serverless/build.yaml | 10 ++++++++++ .../templates/meta-reference-gpu/build.yaml | 14 ++++++++++++++ .../meta-reference-quantized-gpu/build.yaml | 14 ++++++++++++++ llama_stack/templates/ollama/build.yaml | 13 +++++++++++++ llama_stack/templates/tgi/build.yaml | 13 +++++++++++++ llama_stack/templates/together/build.yaml | 10 ++++++++++ llama_stack/templates/vllm/build.yaml | 10 ++++++++++ 11 files changed, 124 insertions(+) create mode 100644 llama_stack/templates/bedrock/build.yaml create mode 100644 llama_stack/templates/databricks/build.yaml create mode 100644 llama_stack/templates/fireworks/build.yaml create mode 100644 llama_stack/templates/hf-endpoint/build.yaml create mode 100644 llama_stack/templates/hf-serverless/build.yaml create mode 100644 llama_stack/templates/meta-reference-gpu/build.yaml create mode 100644 llama_stack/templates/meta-reference-quantized-gpu/build.yaml create mode 100644 llama_stack/templates/ollama/build.yaml create mode 100644 llama_stack/templates/tgi/build.yaml create mode 100644 llama_stack/templates/together/build.yaml create mode 100644 llama_stack/templates/vllm/build.yaml diff --git a/llama_stack/templates/bedrock/build.yaml b/llama_stack/templates/bedrock/build.yaml new file mode 100644 index 000000000..ae7b27d49 --- /dev/null +++ b/llama_stack/templates/bedrock/build.yaml @@ -0,0 +1,10 @@ +name: bedrock +distribution_spec: + description: Use Amazon Bedrock APIs. + providers: + inference: remote::bedrock + memory: meta-reference + safety: meta-reference + agents: meta-reference + telemetry: meta-reference +image_type: conda diff --git a/llama_stack/templates/databricks/build.yaml b/llama_stack/templates/databricks/build.yaml new file mode 100644 index 000000000..2188dd0a0 --- /dev/null +++ b/llama_stack/templates/databricks/build.yaml @@ -0,0 +1,10 @@ +name: databricks +distribution_spec: + description: Use Databricks for running LLM inference + providers: + inference: remote::databricks + memory: meta-reference + safety: meta-reference + agents: meta-reference + telemetry: meta-reference +image_type: conda diff --git a/llama_stack/templates/fireworks/build.yaml b/llama_stack/templates/fireworks/build.yaml new file mode 100644 index 000000000..2e5cf0753 --- /dev/null +++ b/llama_stack/templates/fireworks/build.yaml @@ -0,0 +1,10 @@ +name: fireworks +distribution_spec: + description: Use Fireworks.ai for running LLM inference + providers: + inference: remote::fireworks + memory: meta-reference + safety: meta-reference + agents: meta-reference + telemetry: meta-reference +image_type: docker diff --git a/llama_stack/templates/hf-endpoint/build.yaml b/llama_stack/templates/hf-endpoint/build.yaml new file mode 100644 index 000000000..750bebcb5 --- /dev/null +++ b/llama_stack/templates/hf-endpoint/build.yaml @@ -0,0 +1,10 @@ +name: hf-endpoint +distribution_spec: + description: "Like local, but use Hugging Face Inference Endpoints for running LLM inference.\nSee https://hf.co/docs/api-endpoints." + providers: + inference: remote::hf::endpoint + memory: meta-reference + safety: meta-reference + agents: meta-reference + telemetry: meta-reference +image_type: conda diff --git a/llama_stack/templates/hf-serverless/build.yaml b/llama_stack/templates/hf-serverless/build.yaml new file mode 100644 index 000000000..f6da3ad4d --- /dev/null +++ b/llama_stack/templates/hf-serverless/build.yaml @@ -0,0 +1,10 @@ +name: hf-serverless +distribution_spec: + description: "Like local, but use Hugging Face Inference API (serverless) for running LLM inference.\nSee https://hf.co/docs/api-inference." + providers: + inference: remote::hf::serverless + memory: meta-reference + safety: meta-reference + agents: meta-reference + telemetry: meta-reference +image_type: conda diff --git a/llama_stack/templates/meta-reference-gpu/build.yaml b/llama_stack/templates/meta-reference-gpu/build.yaml new file mode 100644 index 000000000..08e034154 --- /dev/null +++ b/llama_stack/templates/meta-reference-gpu/build.yaml @@ -0,0 +1,14 @@ +name: meta-reference-gpu +distribution_spec: + docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime + description: Use code from `llama_stack` itself to serve all llama stack APIs + providers: + inference: meta-reference + memory: + - meta-reference + - remote::chromadb + - remote::pgvector + safety: meta-reference + agents: meta-reference + telemetry: meta-reference +image_type: docker diff --git a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml new file mode 100644 index 000000000..e9ddb4aad --- /dev/null +++ b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml @@ -0,0 +1,14 @@ +name: meta-reference-quantized-gpu +distribution_spec: + docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime + description: Use code from `llama_stack` itself to serve all llama stack APIs + providers: + inference: meta-reference-quantized + memory: + - meta-reference + - remote::chromadb + - remote::pgvector + safety: meta-reference + agents: meta-reference + telemetry: meta-reference +image_type: docker diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml new file mode 100644 index 000000000..c27f40929 --- /dev/null +++ b/llama_stack/templates/ollama/build.yaml @@ -0,0 +1,13 @@ +name: ollama +distribution_spec: + description: Use ollama for running LLM inference + providers: + inference: remote::ollama + memory: + - meta-reference + - remote::chromadb + - remote::pgvector + safety: meta-reference + agents: meta-reference + telemetry: meta-reference +image_type: docker diff --git a/llama_stack/templates/tgi/build.yaml b/llama_stack/templates/tgi/build.yaml new file mode 100644 index 000000000..2c0ca1d33 --- /dev/null +++ b/llama_stack/templates/tgi/build.yaml @@ -0,0 +1,13 @@ +name: tgi +distribution_spec: + description: Use TGI for running LLM inference + providers: + inference: remote::tgi + memory: + - meta-reference + - remote::chromadb + - remote::pgvector + safety: meta-reference + agents: meta-reference + telemetry: meta-reference +image_type: docker diff --git a/llama_stack/templates/together/build.yaml b/llama_stack/templates/together/build.yaml new file mode 100644 index 000000000..49eab859d --- /dev/null +++ b/llama_stack/templates/together/build.yaml @@ -0,0 +1,10 @@ +name: together +distribution_spec: + description: Use Together.ai for running LLM inference + providers: + inference: remote::together + memory: remote::weaviate + safety: remote::together + agents: meta-reference + telemetry: meta-reference +image_type: docker diff --git a/llama_stack/templates/vllm/build.yaml b/llama_stack/templates/vllm/build.yaml new file mode 100644 index 000000000..814fafd32 --- /dev/null +++ b/llama_stack/templates/vllm/build.yaml @@ -0,0 +1,10 @@ +name: vllm +distribution_spec: + description: Like local, but use vLLM for running LLM inference + providers: + inference: vllm + memory: meta-reference + safety: meta-reference + agents: meta-reference + telemetry: meta-reference +image_type: conda