forked from phoenix-oss/llama-stack-mirror
use logging instead of prints (#499)
# What does this PR do? This PR moves all print statements to use logging. Things changed: - Had to add `await start_trace("sse_generator")` to server.py to actually get tracing working. else was not seeing any logs - If no telemetry provider is provided in the run.yaml, we will write to stdout - by default, the logs are going to be in JSON, but we expose an option to configure to output in a human readable way.
This commit is contained in:
parent
4e1105e563
commit
6395dadc2b
36 changed files with 234 additions and 163 deletions
|
@ -34,7 +34,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
|||
|
||||
from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class _HfAdapter(Inference, ModelsProtocolPrivate):
|
||||
|
@ -264,7 +264,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
|
|||
|
||||
class TGIAdapter(_HfAdapter):
|
||||
async def initialize(self, config: TGIImplConfig) -> None:
|
||||
print(f"Initializing TGI client with url={config.url}")
|
||||
log.info(f"Initializing TGI client with url={config.url}")
|
||||
self.client = AsyncInferenceClient(model=config.url, token=config.api_token)
|
||||
endpoint_info = await self.client.get_endpoint_info()
|
||||
self.max_tokens = endpoint_info["max_total_tokens"]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue