diff --git a/docs/cli_reference.md b/docs/cli_reference.md index 68942d552..aca750224 100644 --- a/docs/cli_reference.md +++ b/docs/cli_reference.md @@ -285,10 +285,10 @@ llama stack list-distributions | | "agentic_system": "meta-reference", | | | | "memory": "meta-reference-faiss" | | | | } | | -+--------------------------------+---------------------------------------+-------------------------------------------------------------------------------------------+ -| local-plus-tgi-inference | { | Use TGI (local or with | -| | "inference": "remote::tgi", | Hugging Face Inference Endpoints) for running LLM inference | -| | "safety": "meta-reference", | | +|--------------------------------|---------------------------------------|-------------------------------------------------------------------------------------------| +| local-plus-tgi-inference | { | Use TGI (local or with [Hugging Face Inference Endpoints](https://huggingface.co/ | +| | "inference": "remote::tgi", | inference-endpoints/dedicated)) for running LLM inference. When using HF Inference | +| | "safety": "meta-reference", | Endpoints, please provide hf_namespace (username or organization name) and endpoint name. | | | "agentic_system": "meta-reference", | | | | "memory": "meta-reference-faiss" | | | | } | | diff --git a/llama_toolchain/inference/adapters/tgi/config.py b/llama_toolchain/inference/adapters/tgi/config.py index 267ddbced..93accd6e1 100644 --- a/llama_toolchain/inference/adapters/tgi/config.py +++ b/llama_toolchain/inference/adapters/tgi/config.py @@ -7,7 +7,7 @@ from typing import Optional from llama_models.schema_utils import json_schema_type -from pydantic import BaseModel, Field, field_validator +from pydantic import BaseModel, Field @json_schema_type diff --git a/llama_toolchain/inference/adapters/tgi/tgi.py b/llama_toolchain/inference/adapters/tgi/tgi.py index 0557dfc04..71eb7605c 100644 --- a/llama_toolchain/inference/adapters/tgi/tgi.py +++ b/llama_toolchain/inference/adapters/tgi/tgi.py @@ -41,7 +41,7 @@ class LocalTGIAdapter(Inference): def client(self) -> InferenceClient: return InferenceClient(model=self.config.url, token=self.config.api_token) - def _get_endpoint_info(self): + def _get_endpoint_info(self) -> Dict[str, Any]: return {**self.client.get_endpoint_info(), "inference_url": self.config.url} async def initialize(self) -> None: