add inline-vllm details, fix things

2025-10-16 06:53:47 +00:00 · 2024-11-08 12:01:05 -08:00 · 2024-11-08 12:01:05 -08:00 · 38cdbdec5a
commit 38cdbdec5a
parent 02c66b49fc
12 changed files with 142 additions and 101 deletions
--- a/llama_stack/templates/inline-vllm/build.yaml
+++ b/llama_stack/templates/inline-vllm/build.yaml
@ -0,0 +1,13 @@
+name: meta-reference-gpu
+distribution_spec:
+  docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
+  description: Use code from `llama_stack` itself to serve all llama stack APIs
+  providers:
+    inference: meta-reference
+    memory:
+    - meta-reference
+    - remote::chromadb
+    - remote::pgvector
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference