Merge branch 'meta-llama:main' into main

2025-12-17 16:52:41 +00:00 · 2024-11-21 15:47:54 -05:00 · 2024-11-21 15:47:54 -05:00 · 19bc7e8942
commit 19bc7e8942
parent 09302347d3 654722da7d
38 changed files with 244 additions and 173 deletions
--- a/llama_stack/providers/inline/inference/meta_reference/inference.py
+++ b/llama_stack/providers/inline/inference/meta_reference/inference.py
@ -5,6 +5,7 @@
 # the root directory of this source tree.

 import asyncio
+import logging

 from typing import AsyncGenerator, List

@ -25,6 +26,7 @@ from .config import MetaReferenceInferenceConfig
 from .generation import Llama
 from .model_parallel import LlamaModelParallelGenerator

+log = logging.getLogger(__name__)
 # there's a single model parallel process running serving the model. for now,
 # we don't support multiple concurrent requests to this process.
 SEMAPHORE = asyncio.Semaphore(1)
@ -49,7 +51,7 @@ class MetaReferenceInferenceImpl(Inference, ModelRegistryHelper, ModelsProtocolP
        # verify that the checkpoint actually is for this model lol

    async def initialize(self) -> None:
-        print(f"Loading model `{self.model.descriptor()}`")
+        log.info(f"Loading model `{self.model.descriptor()}`")
        if self.config.create_distributed_process_group:
            self.generator = LlamaModelParallelGenerator(self.config)
            self.generator.start()