use logging instead of prints (#499)

# What does this PR do? This PR moves all print statements to use logging. Things changed: - Had to add `await start_trace("sse_generator")` to server.py to actually get tracing working. else was not seeing any logs - If no telemetry provider is provided in the run.yaml, we will write to stdout - by default, the logs are going to be in JSON, but we expose an option to configure to output in a human readable way.
2024-11-21 11:32:53 -08:00 · 2024-11-21 11:32:53 -08:00 · 6395dadc2b
commit 6395dadc2b
parent 4e1105e563
36 changed files with 234 additions and 163 deletions
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@ -4,6 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

+import logging
 from typing import AsyncGenerator

 import httpx
@ -39,6 +40,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
    request_has_media,
 )

+log = logging.getLogger(__name__)

 model_aliases = [
    build_model_alias(
@ -105,7 +107,7 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate):
        return AsyncClient(host=self.url)

    async def initialize(self) -> None:
-        print(f"checking connectivity to Ollama at `{self.url}`...")
+        log.info(f"checking connectivity to Ollama at `{self.url}`...")
        try:
            await self.client.ps()
        except httpx.ConnectError as e:
--- a/llama_stack/providers/remote/inference/tgi/tgi.py
+++ b/llama_stack/providers/remote/inference/tgi/tgi.py
@ -34,7 +34,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (

 from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig

-logger = logging.getLogger(__name__)
+log = logging.getLogger(__name__)


 class _HfAdapter(Inference, ModelsProtocolPrivate):
@ -264,7 +264,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):

 class TGIAdapter(_HfAdapter):
    async def initialize(self, config: TGIImplConfig) -> None:
-        print(f"Initializing TGI client with url={config.url}")
+        log.info(f"Initializing TGI client with url={config.url}")
        self.client = AsyncInferenceClient(model=config.url, token=config.api_token)
        endpoint_info = await self.client.get_endpoint_info()
        self.max_tokens = endpoint_info["max_total_tokens"]
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@ -3,6 +3,8 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
+
+import logging
 from typing import AsyncGenerator

 from llama_models.llama3.api.chat_format import ChatFormat
@ -34,6 +36,9 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 from .config import VLLMInferenceAdapterConfig


+log = logging.getLogger(__name__)
+
+
 def build_model_aliases():
    return [
        build_model_alias(
@ -53,7 +58,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
        self.client = None

    async def initialize(self) -> None:
-        print(f"Initializing VLLM client with base_url={self.config.url}")
+        log.info(f"Initializing VLLM client with base_url={self.config.url}")
        self.client = OpenAI(base_url=self.config.url, api_key=self.config.api_token)

    async def shutdown(self) -> None:
--- a/llama_stack/providers/remote/memory/chroma/chroma.py
+++ b/llama_stack/providers/remote/memory/chroma/chroma.py
@ -5,6 +5,7 @@
 # the root directory of this source tree.

 import json
+import logging
 from typing import List
 from urllib.parse import urlparse

@ -21,6 +22,8 @@ from llama_stack.providers.utils.memory.vector_store import (
    EmbeddingIndex,
 )

+log = logging.getLogger(__name__)
+

 class ChromaIndex(EmbeddingIndex):
    def __init__(self, client: chromadb.AsyncHttpClient, collection):
@ -56,10 +59,7 @@ class ChromaIndex(EmbeddingIndex):
                doc = json.loads(doc)
                chunk = Chunk(**doc)
            except Exception:
-                import traceback
-
-                traceback.print_exc()
-                print(f"Failed to parse document: {doc}")
+                log.exception(f"Failed to parse document: {doc}")
                continue

            chunks.append(chunk)
@ -73,7 +73,7 @@ class ChromaIndex(EmbeddingIndex):

 class ChromaMemoryAdapter(Memory, MemoryBanksProtocolPrivate):
    def __init__(self, url: str) -> None:
-        print(f"Initializing ChromaMemoryAdapter with url: {url}")
+        log.info(f"Initializing ChromaMemoryAdapter with url: {url}")
        url = url.rstrip("/")
        parsed = urlparse(url)

@ -88,12 +88,10 @@ class ChromaMemoryAdapter(Memory, MemoryBanksProtocolPrivate):

    async def initialize(self) -> None:
        try:
-            print(f"Connecting to Chroma server at: {self.host}:{self.port}")
+            log.info(f"Connecting to Chroma server at: {self.host}:{self.port}")
            self.client = await chromadb.AsyncHttpClient(host=self.host, port=self.port)
        except Exception as e:
-            import traceback
-
-            traceback.print_exc()
+            log.exception("Could not connect to Chroma server")
            raise RuntimeError("Could not connect to Chroma server") from e

    async def shutdown(self) -> None:
@ -123,10 +121,7 @@ class ChromaMemoryAdapter(Memory, MemoryBanksProtocolPrivate):
                data = json.loads(collection.metadata["bank"])
                bank = parse_obj_as(VectorMemoryBank, data)
            except Exception:
-                import traceback
-
-                traceback.print_exc()
-                print(f"Failed to parse bank: {collection.metadata}")
+                log.exception(f"Failed to parse bank: {collection.metadata}")
                continue

            index = BankWithIndex(
--- a/llama_stack/providers/remote/memory/pgvector/pgvector.py
+++ b/llama_stack/providers/remote/memory/pgvector/pgvector.py
@ -4,6 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

+import logging
 from typing import List, Tuple

 import psycopg2
@ -24,6 +25,8 @@ from llama_stack.providers.utils.memory.vector_store import (

 from .config import PGVectorConfig

+log = logging.getLogger(__name__)
+

 def check_extension_version(cur):
    cur.execute("SELECT extversion FROM pg_extension WHERE extname = 'vector'")
@ -124,7 +127,7 @@ class PGVectorMemoryAdapter(Memory, MemoryBanksProtocolPrivate):
        self.cache = {}

    async def initialize(self) -> None:
-        print(f"Initializing PGVector memory adapter with config: {self.config}")
+        log.info(f"Initializing PGVector memory adapter with config: {self.config}")
        try:
            self.conn = psycopg2.connect(
                host=self.config.host,
@ -138,7 +141,7 @@ class PGVectorMemoryAdapter(Memory, MemoryBanksProtocolPrivate):

            version = check_extension_version(self.cursor)
            if version:
-                print(f"Vector extension version: {version}")
+                log.info(f"Vector extension version: {version}")
            else:
                raise RuntimeError("Vector extension is not installed.")

@ -151,9 +154,7 @@ class PGVectorMemoryAdapter(Memory, MemoryBanksProtocolPrivate):
            """
            )
        except Exception as e:
-            import traceback
-
-            traceback.print_exc()
+            log.exception("Could not connect to PGVector database server")
            raise RuntimeError("Could not connect to PGVector database server") from e

    async def shutdown(self) -> None:
--- a/llama_stack/providers/remote/memory/qdrant/qdrant.py
+++ b/llama_stack/providers/remote/memory/qdrant/qdrant.py
@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-import traceback
+import logging
 import uuid
 from typing import Any, Dict, List

@ -23,6 +23,7 @@ from llama_stack.providers.utils.memory.vector_store import (
    EmbeddingIndex,
 )

+log = logging.getLogger(__name__)
 CHUNK_ID_KEY = "_chunk_id"


@ -90,7 +91,7 @@ class QdrantIndex(EmbeddingIndex):
            try:
                chunk = Chunk(**point.payload["chunk_content"])
            except Exception:
-                traceback.print_exc()
+                log.exception("Failed to parse chunk")
                continue

            chunks.append(chunk)
--- a/llama_stack/providers/remote/memory/weaviate/weaviate.py
+++ b/llama_stack/providers/remote/memory/weaviate/weaviate.py
@ -4,6 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import json
+import logging

 from typing import Any, Dict, List, Optional

@ -22,6 +23,8 @@ from llama_stack.providers.utils.memory.vector_store import (

 from .config import WeaviateConfig, WeaviateRequestProviderData

+log = logging.getLogger(__name__)
+

 class WeaviateIndex(EmbeddingIndex):
    def __init__(self, client: weaviate.Client, collection_name: str):
@ -69,10 +72,7 @@ class WeaviateIndex(EmbeddingIndex):
                chunk_dict = json.loads(chunk_json)
                chunk = Chunk(**chunk_dict)
            except Exception:
-                import traceback
-
-                traceback.print_exc()
-                print(f"Failed to parse document: {chunk_json}")
+                log.exception(f"Failed to parse document: {chunk_json}")
                continue

            chunks.append(chunk)