forked from phoenix-oss/llama-stack-mirror
[Inference] Use huggingface_hub inference client for TGI adapter (#53)
* Use huggingface_hub inference client for TGI inference * Update the default value for TGI URL * Use InferenceClient.text_generation for TGI inference * Fixes post-review and split TGI adapter into local and Inference Endpoints ones * Update CLI reference and add typing * Rename TGI Adapter class * Use HfApi to get the namespace when not provide in the hf endpoint name * Remove unecessary method argument * Improve TGI adapter initialization condition * Move helper into impl file + fix merging conflicts
This commit is contained in:
parent
191cd28831
commit
736092f6bc
6 changed files with 171 additions and 72 deletions
|
@ -65,11 +65,23 @@ def available_distribution_specs() -> List[DistributionSpec]:
|
|||
Api.telemetry: "console",
|
||||
},
|
||||
),
|
||||
DistributionSpec(
|
||||
distribution_type="local-plus-tgi-inference",
|
||||
description="Use TGI for running LLM inference",
|
||||
providers={
|
||||
Api.inference: remote_provider_type("tgi"),
|
||||
Api.safety: "meta-reference",
|
||||
Api.agentic_system: "meta-reference",
|
||||
Api.memory: "meta-reference-faiss",
|
||||
},
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def resolve_distribution_spec(distribution_type: str) -> Optional[DistributionSpec]:
|
||||
def resolve_distribution_spec(
|
||||
distribution_type: str,
|
||||
) -> Optional[DistributionSpec]:
|
||||
for spec in available_distribution_specs():
|
||||
if spec.distribution_type == distribution_type:
|
||||
return spec
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue