From 7bfcfe80b530bf1e87eb3ca9ce83537e64575209 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 19 Nov 2024 15:50:26 -0800 Subject: [PATCH] Add logs (prints :/) to dump out what URL vllm / tgi is connecting to --- llama_stack/providers/remote/inference/tgi/tgi.py | 1 + llama_stack/providers/remote/inference/vllm/vllm.py | 1 + 2 files changed, 2 insertions(+) diff --git a/llama_stack/providers/remote/inference/tgi/tgi.py b/llama_stack/providers/remote/inference/tgi/tgi.py index 30745cb10..92492e3da 100644 --- a/llama_stack/providers/remote/inference/tgi/tgi.py +++ b/llama_stack/providers/remote/inference/tgi/tgi.py @@ -264,6 +264,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate): class TGIAdapter(_HfAdapter): async def initialize(self, config: TGIImplConfig) -> None: + print(f"Initializing TGI client with url={config.url}") self.client = AsyncInferenceClient(model=config.url, token=config.api_token) endpoint_info = await self.client.get_endpoint_info() self.max_tokens = endpoint_info["max_total_tokens"] diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index 788f6cac4..3c877639c 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -53,6 +53,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): self.client = None async def initialize(self) -> None: + print(f"Initializing VLLM client with base_url={self.config.url}") self.client = OpenAI(base_url=self.config.url, api_key=self.config.api_token) async def shutdown(self) -> None: