use logging instead of prints (#499)

# What does this PR do?

This PR moves all print statements to use logging. Things changed:
- Had to add `await start_trace("sse_generator")` to server.py to
actually get tracing working. else was not seeing any logs
- If no telemetry provider is provided in the run.yaml, we will write to
stdout
- by default, the logs are going to be in JSON, but we expose an option
to configure to output in a human readable way.
This commit is contained in:
Dinesh Yeduguru 2024-11-21 11:32:53 -08:00 committed by GitHub
parent 4e1105e563
commit 6395dadc2b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
36 changed files with 234 additions and 163 deletions

View file

@ -11,6 +11,7 @@
# the root directory of this source tree.
import json
import logging
import multiprocessing
import os
import tempfile
@ -37,6 +38,8 @@ from llama_stack.apis.inference import ChatCompletionRequest, CompletionRequest
from .generation import TokenResult
log = logging.getLogger(__name__)
class ProcessingMessageName(str, Enum):
ready_request = "ready_request"
@ -183,16 +186,16 @@ def retrieve_requests(reply_socket_url: str):
group=get_model_parallel_group(),
)
if isinstance(updates[0], CancelSentinel):
print("quitting generation loop because request was cancelled")
log.info(
"quitting generation loop because request was cancelled"
)
break
if mp_rank_0():
send_obj(EndSentinel())
except Exception as e:
print(f"[debug] got exception {e}")
import traceback
log.exception("exception in generation loop")
traceback.print_exc()
if mp_rank_0():
send_obj(ExceptionResponse(error=str(e)))
@ -252,7 +255,7 @@ def worker_process_entrypoint(
except StopIteration:
break
print("[debug] worker process done")
log.info("[debug] worker process done")
def launch_dist_group(
@ -313,7 +316,7 @@ def start_model_parallel_process(
request_socket.send(encode_msg(ReadyRequest()))
response = request_socket.recv()
print("Loaded model...")
log.info("Loaded model...")
return request_socket, process
@ -361,7 +364,7 @@ class ModelParallelProcessGroup:
break
if isinstance(obj, ExceptionResponse):
print(f"[debug] got exception {obj.error}")
log.error(f"[debug] got exception {obj.error}")
raise Exception(obj.error)
if isinstance(obj, TaskResponse):