use logging instead of prints (#499)

# What does this PR do?

This PR moves all print statements to use logging. Things changed:
- Had to add `await start_trace("sse_generator")` to server.py to
actually get tracing working. else was not seeing any logs
- If no telemetry provider is provided in the run.yaml, we will write to
stdout
- by default, the logs are going to be in JSON, but we expose an option
to configure to output in a human readable way.
This commit is contained in:
Dinesh Yeduguru 2024-11-21 11:32:53 -08:00 committed by GitHub
parent 4e1105e563
commit 6395dadc2b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
36 changed files with 234 additions and 163 deletions

View file

@ -3,6 +3,8 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import logging
from typing import AsyncGenerator
from llama_models.llama3.api.chat_format import ChatFormat
@ -34,6 +36,9 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
from .config import VLLMInferenceAdapterConfig
log = logging.getLogger(__name__)
def build_model_aliases():
return [
build_model_alias(
@ -53,7 +58,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
self.client = None
async def initialize(self) -> None:
print(f"Initializing VLLM client with base_url={self.config.url}")
log.info(f"Initializing VLLM client with base_url={self.config.url}")
self.client = OpenAI(base_url=self.config.url, api_key=self.config.api_token)
async def shutdown(self) -> None: