use logging instead of prints (#499)

# What does this PR do? This PR moves all print statements to use logging. Things changed: - Had to add `await start_trace("sse_generator")` to server.py to actually get tracing working. else was not seeing any logs - If no telemetry provider is provided in the run.yaml, we will write to stdout - by default, the logs are going to be in JSON, but we expose an option to configure to output in a human readable way.
2024-11-21 11:32:53 -08:00 · 2024-11-21 11:32:53 -08:00 · 6395dadc2b
commit 6395dadc2b
parent 4e1105e563
36 changed files with 234 additions and 163 deletions
--- a/llama_stack/providers/inline/inference/meta_reference/quantization/scripts/quantize_checkpoint.py
+++ b/llama_stack/providers/inline/inference/meta_reference/quantization/scripts/quantize_checkpoint.py
@ -8,6 +8,7 @@
 # This software may be used and distributed in accordance with the terms of the Llama 3 Community License Agreement.

 import json
+import logging
 import os
 import shutil
 import sys
@ -32,6 +33,8 @@ from llama_stack.providers.inline.inference.meta_reference.quantization.fp8_impl
    quantize_fp8,
 )

+log = logging.getLogger(__name__)
+

 def main(
    ckpt_dir: str,
@ -102,7 +105,7 @@ def main(
        else:
            torch.set_default_tensor_type(torch.cuda.HalfTensor)

-        print(ckpt_path)
+        log.info(ckpt_path)
        assert (
            quantized_ckpt_dir is not None
        ), "QUantized checkpoint directory should not be None"