mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-17 16:52:41 +00:00
Merge branch 'meta-llama:main' into main
This commit is contained in:
commit
19bc7e8942
38 changed files with 244 additions and 173 deletions
|
|
@ -5,6 +5,7 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
|
||||
from typing import AsyncGenerator, List
|
||||
|
||||
|
|
@ -25,6 +26,7 @@ from .config import MetaReferenceInferenceConfig
|
|||
from .generation import Llama
|
||||
from .model_parallel import LlamaModelParallelGenerator
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
# there's a single model parallel process running serving the model. for now,
|
||||
# we don't support multiple concurrent requests to this process.
|
||||
SEMAPHORE = asyncio.Semaphore(1)
|
||||
|
|
@ -49,7 +51,7 @@ class MetaReferenceInferenceImpl(Inference, ModelRegistryHelper, ModelsProtocolP
|
|||
# verify that the checkpoint actually is for this model lol
|
||||
|
||||
async def initialize(self) -> None:
|
||||
print(f"Loading model `{self.model.descriptor()}`")
|
||||
log.info(f"Loading model `{self.model.descriptor()}`")
|
||||
if self.config.create_distributed_process_group:
|
||||
self.generator = LlamaModelParallelGenerator(self.config)
|
||||
self.generator.start()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue