[Inference] Use huggingface_hub inference client for TGI adapter (#53)

* Use huggingface_hub inference client for TGI inference * Update the default value for TGI URL * Use InferenceClient.text_generation for TGI inference * Fixes post-review and split TGI adapter into local and Inference Endpoints ones * Update CLI reference and add typing * Rename TGI Adapter class * Use HfApi to get the namespace when not provide in the hf endpoint name * Remove unecessary method argument * Improve TGI adapter initialization condition * Move helper into impl file + fix merging conflicts
2024-09-12 18:11:35 +02:00 · 2024-09-12 18:11:35 +02:00 · 736092f6bc
commit 736092f6bc
parent 191cd28831
6 changed files with 171 additions and 72 deletions
--- a/llama_toolchain/core/distribution_registry.py
+++ b/llama_toolchain/core/distribution_registry.py
@ -65,11 +65,23 @@ def available_distribution_specs() -> List[DistributionSpec]:
                Api.telemetry: "console",
            },
        ),
+        DistributionSpec(
+            distribution_type="local-plus-tgi-inference",
+            description="Use TGI for running LLM inference",
+            providers={
+                Api.inference: remote_provider_type("tgi"),
+                Api.safety: "meta-reference",
+                Api.agentic_system: "meta-reference",
+                Api.memory: "meta-reference-faiss",
+            },
+        ),
    ]


@lru_cache()
-def resolve_distribution_spec(distribution_type: str) -> Optional[DistributionSpec]:
+def resolve_distribution_spec(
+    distribution_type: str,
+) -> Optional[DistributionSpec]:
    for spec in available_distribution_specs():
        if spec.distribution_type == distribution_type:
            return spec