forked from phoenix-oss/llama-stack-mirror
use logging instead of prints (#499)
# What does this PR do? This PR moves all print statements to use logging. Things changed: - Had to add `await start_trace("sse_generator")` to server.py to actually get tracing working. else was not seeing any logs - If no telemetry provider is provided in the run.yaml, we will write to stdout - by default, the logs are going to be in JSON, but we expose an option to configure to output in a human readable way.
This commit is contained in:
parent
4e1105e563
commit
6395dadc2b
36 changed files with 234 additions and 163 deletions
|
@ -11,6 +11,7 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
import json
|
||||
import logging
|
||||
import multiprocessing
|
||||
import os
|
||||
import tempfile
|
||||
|
@ -37,6 +38,8 @@ from llama_stack.apis.inference import ChatCompletionRequest, CompletionRequest
|
|||
|
||||
from .generation import TokenResult
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ProcessingMessageName(str, Enum):
|
||||
ready_request = "ready_request"
|
||||
|
@ -183,16 +186,16 @@ def retrieve_requests(reply_socket_url: str):
|
|||
group=get_model_parallel_group(),
|
||||
)
|
||||
if isinstance(updates[0], CancelSentinel):
|
||||
print("quitting generation loop because request was cancelled")
|
||||
log.info(
|
||||
"quitting generation loop because request was cancelled"
|
||||
)
|
||||
break
|
||||
|
||||
if mp_rank_0():
|
||||
send_obj(EndSentinel())
|
||||
except Exception as e:
|
||||
print(f"[debug] got exception {e}")
|
||||
import traceback
|
||||
log.exception("exception in generation loop")
|
||||
|
||||
traceback.print_exc()
|
||||
if mp_rank_0():
|
||||
send_obj(ExceptionResponse(error=str(e)))
|
||||
|
||||
|
@ -252,7 +255,7 @@ def worker_process_entrypoint(
|
|||
except StopIteration:
|
||||
break
|
||||
|
||||
print("[debug] worker process done")
|
||||
log.info("[debug] worker process done")
|
||||
|
||||
|
||||
def launch_dist_group(
|
||||
|
@ -313,7 +316,7 @@ def start_model_parallel_process(
|
|||
|
||||
request_socket.send(encode_msg(ReadyRequest()))
|
||||
response = request_socket.recv()
|
||||
print("Loaded model...")
|
||||
log.info("Loaded model...")
|
||||
|
||||
return request_socket, process
|
||||
|
||||
|
@ -361,7 +364,7 @@ class ModelParallelProcessGroup:
|
|||
break
|
||||
|
||||
if isinstance(obj, ExceptionResponse):
|
||||
print(f"[debug] got exception {obj.error}")
|
||||
log.error(f"[debug] got exception {obj.error}")
|
||||
raise Exception(obj.error)
|
||||
|
||||
if isinstance(obj, TaskResponse):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue