diff --git a/docs/source/index.md b/docs/source/index.md
index 9cabc375c..95ceb88e3 100644
--- a/docs/source/index.md
+++ b/docs/source/index.md
@@ -75,8 +75,6 @@ Llama Stack already has a number of "adapters" available for some popular Infere
 - Check out [Zero to Hero](zero_to_hero_guide) guide to learn in details about how to build your first agent.
 - See how you can use [Llama Stack Distributions](distributions/index) to get started with popular inference and other service providers.
 
-Kutta
-
 We also provide a number of Client side SDKs to make it easier to connect to Llama Stack server in your preferred language.
 
 |  **Language** |  **Client SDK** | **Package** |
diff --git a/llama_stack/providers/remote/inference/tgi/tgi.py b/llama_stack/providers/remote/inference/tgi/tgi.py
index d57fbdc17..dad055cbd 100644
--- a/llama_stack/providers/remote/inference/tgi/tgi.py
+++ b/llama_stack/providers/remote/inference/tgi/tgi.py
@@ -74,7 +74,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
 
     async def completion(
         self,
-        model: str,
+        model_id: str,
         content: InterleavedTextMedia,
         sampling_params: Optional[SamplingParams] = SamplingParams(),
         response_format: Optional[ResponseFormat] = None,
@@ -82,7 +82,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
         logprobs: Optional[LogProbConfig] = None,
     ) -> AsyncGenerator:
         request = CompletionRequest(
-            model=model,
+            model=model_id,
             content=content,
             sampling_params=sampling_params,
             response_format=response_format,
@@ -176,7 +176,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
 
     async def chat_completion(
         self,
-        model: str,
+        model_id: str,
         messages: List[Message],
         sampling_params: Optional[SamplingParams] = SamplingParams(),
         tools: Optional[List[ToolDefinition]] = None,
@@ -187,7 +187,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
         logprobs: Optional[LogProbConfig] = None,
     ) -> AsyncGenerator:
         request = ChatCompletionRequest(
-            model=model,
+            model=model_id,
             messages=messages,
             sampling_params=sampling_params,
             tools=tools or [],
@@ -256,7 +256,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
 
     async def embeddings(
         self,
-        model: str,
+        model_id: str,
         contents: List[InterleavedTextMedia],
     ) -> EmbeddingsResponse:
         raise NotImplementedError()