Make TGI adapter compatible with HF Inference API (#97)

2025-06-30 19:53:44 +00:00 · 2024-09-25 23:08:31 +02:00 · 2024-09-25 23:08:31 +02:00 · 615ed4bfbc
commit 615ed4bfbc
parent 851c30597a
7 changed files with 122 additions and 96 deletions
--- a/llama_stack/distribution/templates/local-hf-endpoint-build.yaml
+++ b/llama_stack/distribution/templates/local-hf-endpoint-build.yaml
@ -0,0 +1,10 @@
+name: local-hf-endpoint
+distribution_spec:
+  description: "Like local, but use Hugging Face Inference Endpoints for running LLM inference.\nSee https://hf.co/docs/api-endpoints."
+  providers:
+    inference: remote::hf::endpoint
+    memory: meta-reference
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
+image_type: conda
--- a/llama_stack/distribution/templates/local-hf-serverless-build.yaml
+++ b/llama_stack/distribution/templates/local-hf-serverless-build.yaml
@ -0,0 +1,10 @@
+name: local-hf-serverless
+distribution_spec:
+  description: "Like local, but use Hugging Face Inference API (serverless) for running LLM inference.\nSee https://hf.co/docs/api-inference."
+  providers:
+    inference: remote::hf::serverless
+    memory: meta-reference
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
+image_type: conda
--- a/llama_stack/distribution/templates/local-tgi-build.yaml
+++ b/llama_stack/distribution/templates/local-tgi-build.yaml
@ -1,6 +1,6 @@
 name: local-tgi
 distribution_spec:
-  description: Use TGI (local or with Hugging Face Inference Endpoints for running LLM inference. When using HF Inference Endpoints, you must provide the name of the endpoint).
+  description: Like local, but use a TGI server for running LLM inference.
  providers:
    inference: remote::tgi
    memory: meta-reference