use logging instead of prints (#499)

# What does this PR do? This PR moves all print statements to use logging. Things changed: - Had to add `await start_trace("sse_generator")` to server.py to actually get tracing working. else was not seeing any logs - If no telemetry provider is provided in the run.yaml, we will write to stdout - by default, the logs are going to be in JSON, but we expose an option to configure to output in a human readable way.
2024-11-21 11:32:53 -08:00 · 2024-11-21 11:32:53 -08:00 · 6395dadc2b
commit 6395dadc2b
parent 4e1105e563
36 changed files with 234 additions and 163 deletions
--- a/llama_stack/providers/utils/inference/prompt_adapter.py
+++ b/llama_stack/providers/utils/inference/prompt_adapter.py
@ -7,14 +7,13 @@
 import base64
 import io
 import json
+import logging
 from typing import Tuple

 import httpx

 from llama_models.llama3.api.chat_format import ChatFormat
 from PIL import Image as PIL_Image
-from termcolor import cprint
-
 from llama_models.llama3.api.datatypes import *  # noqa: F403
 from llama_stack.apis.inference import *  # noqa: F403
 from llama_models.datatypes import ModelFamily
@ -29,6 +28,8 @@ from llama_models.sku_list import resolve_model

 from llama_stack.providers.utils.inference import supported_inference_models

+log = logging.getLogger(__name__)
+

 def content_has_media(content: InterleavedTextMedia):
    def _has_media_content(c):
@ -175,13 +176,13 @@ def chat_completion_request_to_messages(
    """
    model = resolve_model(llama_model)
    if model is None:
-        cprint(f"Could not resolve model {llama_model}", color="red")
+        log.error(f"Could not resolve model {llama_model}")
        return request.messages

    allowed_models = supported_inference_models()
    descriptors = [m.descriptor() for m in allowed_models]
    if model.descriptor() not in descriptors:
-        cprint(f"Unsupported inference model? {model.descriptor()}", color="red")
+        log.error(f"Unsupported inference model? {model.descriptor()}")
        return request.messages

    if model.model_family == ModelFamily.llama3_1 or (
--- a/llama_stack/providers/utils/kvstore/postgres/postgres.py
+++ b/llama_stack/providers/utils/kvstore/postgres/postgres.py
@ -4,6 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

+import logging
 from datetime import datetime
 from typing import List, Optional

@ -13,6 +14,8 @@ from psycopg2.extras import DictCursor
 from ..api import KVStore
 from ..config import PostgresKVStoreConfig

+log = logging.getLogger(__name__)
+

 class PostgresKVStoreImpl(KVStore):
    def __init__(self, config: PostgresKVStoreConfig):
@ -43,9 +46,8 @@ class PostgresKVStoreImpl(KVStore):
                """
            )
        except Exception as e:
-            import traceback

-            traceback.print_exc()
+            log.exception("Could not connect to PostgreSQL database server")
            raise RuntimeError("Could not connect to PostgreSQL database server") from e

    def _namespaced_key(self, key: str) -> str:
--- a/llama_stack/providers/utils/memory/vector_store.py
+++ b/llama_stack/providers/utils/memory/vector_store.py
@ -5,6 +5,7 @@
 # the root directory of this source tree.
 import base64
 import io
+import logging
 import re
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
@ -16,13 +17,14 @@ import httpx
 import numpy as np
 from numpy.typing import NDArray
 from pypdf import PdfReader
-from termcolor import cprint

 from llama_models.llama3.api.datatypes import *  # noqa: F403
 from llama_models.llama3.api.tokenizer import Tokenizer

 from llama_stack.apis.memory import *  # noqa: F403

+log = logging.getLogger(__name__)
+
 ALL_MINILM_L6_V2_DIMENSION = 384

 EMBEDDING_MODELS = {}
@ -35,7 +37,7 @@ def get_embedding_model(model: str) -> "SentenceTransformer":
    if loaded_model is not None:
        return loaded_model

-    print(f"Loading sentence transformer for {model}...")
+    log.info(f"Loading sentence transformer for {model}...")
    from sentence_transformers import SentenceTransformer

    loaded_model = SentenceTransformer(model)
@ -92,7 +94,7 @@ def content_from_data(data_url: str) -> str:
        return "\n".join([page.extract_text() for page in pdf_reader.pages])

    else:
-        cprint("Could not extract content from data_url properly.", color="red")
+        log.error("Could not extract content from data_url properly.")
        return ""


--- a/llama_stack/providers/utils/telemetry/tracing.py
+++ b/llama_stack/providers/utils/telemetry/tracing.py
@ -17,6 +17,8 @@ from typing import Any, Callable, Dict, List

 from llama_stack.apis.telemetry import *  # noqa: F403

+log = logging.getLogger(__name__)
+

 def generate_short_uuid(len: int = 12):
    full_uuid = uuid.uuid4()
@ -40,7 +42,7 @@ class BackgroundLogger:
        try:
            self.log_queue.put_nowait(event)
        except queue.Full:
-            print("Log queue is full, dropping event")
+            log.error("Log queue is full, dropping event")

    def _process_logs(self):
        while True:
@ -125,7 +127,7 @@ async def start_trace(name: str, attributes: Dict[str, Any] = None):
    global CURRENT_TRACE_CONTEXT, BACKGROUND_LOGGER

    if BACKGROUND_LOGGER is None:
-        print("No Telemetry implementation set. Skipping trace initialization...")
+        log.info("No Telemetry implementation set. Skipping trace initialization...")
        return

    trace_id = generate_short_uuid()