diff --git a/docs/source/index.md b/docs/source/index.md index 9cabc375c..95ceb88e3 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -75,8 +75,6 @@ Llama Stack already has a number of "adapters" available for some popular Infere - Check out [Zero to Hero](zero_to_hero_guide) guide to learn in details about how to build your first agent. - See how you can use [Llama Stack Distributions](distributions/index) to get started with popular inference and other service providers. -Kutta - We also provide a number of Client side SDKs to make it easier to connect to Llama Stack server in your preferred language. | **Language** | **Client SDK** | **Package** | diff --git a/llama_stack/providers/remote/inference/tgi/tgi.py b/llama_stack/providers/remote/inference/tgi/tgi.py index d57fbdc17..dad055cbd 100644 --- a/llama_stack/providers/remote/inference/tgi/tgi.py +++ b/llama_stack/providers/remote/inference/tgi/tgi.py @@ -74,7 +74,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate): async def completion( self, - model: str, + model_id: str, content: InterleavedTextMedia, sampling_params: Optional[SamplingParams] = SamplingParams(), response_format: Optional[ResponseFormat] = None, @@ -82,7 +82,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate): logprobs: Optional[LogProbConfig] = None, ) -> AsyncGenerator: request = CompletionRequest( - model=model, + model=model_id, content=content, sampling_params=sampling_params, response_format=response_format, @@ -176,7 +176,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate): async def chat_completion( self, - model: str, + model_id: str, messages: List[Message], sampling_params: Optional[SamplingParams] = SamplingParams(), tools: Optional[List[ToolDefinition]] = None, @@ -187,7 +187,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate): logprobs: Optional[LogProbConfig] = None, ) -> AsyncGenerator: request = ChatCompletionRequest( - model=model, + model=model_id, messages=messages, sampling_params=sampling_params, tools=tools or [], @@ -256,7 +256,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate): async def embeddings( self, - model: str, + model_id: str, contents: List[InterleavedTextMedia], ) -> EmbeddingsResponse: raise NotImplementedError()