use logging instead of prints (#499)

# What does this PR do?

This PR moves all print statements to use logging. Things changed:
- Had to add `await start_trace("sse_generator")` to server.py to
actually get tracing working. else was not seeing any logs
- If no telemetry provider is provided in the run.yaml, we will write to
stdout
- by default, the logs are going to be in JSON, but we expose an option
to configure to output in a human readable way.
This commit is contained in:
Dinesh Yeduguru 2024-11-21 11:32:53 -08:00 committed by GitHub
parent 4e1105e563
commit 6395dadc2b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
36 changed files with 234 additions and 163 deletions

View file

@ -7,14 +7,13 @@
import base64
import io
import json
import logging
from typing import Tuple
import httpx
from llama_models.llama3.api.chat_format import ChatFormat
from PIL import Image as PIL_Image
from termcolor import cprint
from llama_models.llama3.api.datatypes import * # noqa: F403
from llama_stack.apis.inference import * # noqa: F403
from llama_models.datatypes import ModelFamily
@ -29,6 +28,8 @@ from llama_models.sku_list import resolve_model
from llama_stack.providers.utils.inference import supported_inference_models
log = logging.getLogger(__name__)
def content_has_media(content: InterleavedTextMedia):
def _has_media_content(c):
@ -175,13 +176,13 @@ def chat_completion_request_to_messages(
"""
model = resolve_model(llama_model)
if model is None:
cprint(f"Could not resolve model {llama_model}", color="red")
log.error(f"Could not resolve model {llama_model}")
return request.messages
allowed_models = supported_inference_models()
descriptors = [m.descriptor() for m in allowed_models]
if model.descriptor() not in descriptors:
cprint(f"Unsupported inference model? {model.descriptor()}", color="red")
log.error(f"Unsupported inference model? {model.descriptor()}")
return request.messages
if model.model_family == ModelFamily.llama3_1 or (