model -> model_id for TGI

2024-11-22 15:40:08 -08:00 · 2024-11-22 15:40:08 -08:00 · 97dc5b68e5
commit 97dc5b68e5
parent c2c53d0272
2 changed files with 5 additions and 7 deletions
--- a/docs/source/index.md
+++ b/docs/source/index.md
@ -75,8 +75,6 @@ Llama Stack already has a number of "adapters" available for some popular Infere
 - Check out [Zero to Hero](zero_to_hero_guide) guide to learn in details about how to build your first agent.
 - See how you can use [Llama Stack Distributions](distributions/index) to get started with popular inference and other service providers.
 Kutta
 We also provide a number of Client side SDKs to make it easier to connect to Llama Stack server in your preferred language.
 |  **Language** |  **Client SDK** | **Package** |
--- a/llama_stack/providers/remote/inference/tgi/tgi.py
+++ b/llama_stack/providers/remote/inference/tgi/tgi.py
@ -74,7 +74,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
    async def completion(
        self,
-        model: str,
+        model_id: str,
        content: InterleavedTextMedia,
        sampling_params: Optional[SamplingParams] = SamplingParams(),
        response_format: Optional[ResponseFormat] = None,
@ -82,7 +82,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
        logprobs: Optional[LogProbConfig] = None,
    ) -> AsyncGenerator:
        request = CompletionRequest(
-            model=model,
+            model=model_id,
            content=content,
            sampling_params=sampling_params,
            response_format=response_format,
@ -176,7 +176,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
    async def chat_completion(
        self,
-        model: str,
+        model_id: str,
        messages: List[Message],
        sampling_params: Optional[SamplingParams] = SamplingParams(),
        tools: Optional[List[ToolDefinition]] = None,
@ -187,7 +187,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
        logprobs: Optional[LogProbConfig] = None,
    ) -> AsyncGenerator:
        request = ChatCompletionRequest(
-            model=model,
+            model=model_id,
            messages=messages,
            sampling_params=sampling_params,
            tools=tools or [],
@ -256,7 +256,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
    async def embeddings(
        self,
-        model: str,
+        model_id: str,
        contents: List[InterleavedTextMedia],
    ) -> EmbeddingsResponse:
        raise NotImplementedError()