llama-stack-mirror/llama_stack/templates/hf-serverless/build.yaml

name: hf-serverless
distribution_spec:
  description: "Like local, but use Hugging Face Inference API (serverless) for running LLM inference.\nSee https://hf.co/docs/api-inference."
  providers:
    inference: remote::hf::serverless
    memory: inline::faiss
    safety: inline::llama-guard
    agents: inline::meta-reference
    telemetry: inline::meta-reference