add inline-vllm details, fix things

This commit is contained in:
Ashwin Bharambe 2024-11-08 12:01:05 -08:00
parent 02c66b49fc
commit 38cdbdec5a
12 changed files with 142 additions and 101 deletions

View file

@ -0,0 +1,13 @@
name: meta-reference-gpu
distribution_spec:
docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
description: Use code from `llama_stack` itself to serve all llama stack APIs
providers:
inference: meta-reference
memory:
- meta-reference
- remote::chromadb
- remote::pgvector
safety: meta-reference
agents: meta-reference
telemetry: meta-reference