mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-31 07:30:00 +00:00
updates
This commit is contained in:
parent
0cfb2e2473
commit
73d927850e
4 changed files with 43 additions and 316 deletions
|
|
@ -5,7 +5,6 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
from typing import AsyncGenerator, List, Optional, Union
|
||||
|
||||
|
|
@ -44,6 +43,7 @@ from llama_stack.apis.inference import (
|
|||
UserMessage,
|
||||
)
|
||||
from llama_stack.apis.models import Model, ModelType
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.models.llama.llama3.chat_format import ChatFormat as Llama3ChatFormat
|
||||
from llama_stack.models.llama.llama3.tokenizer import Tokenizer as Llama3Tokenizer
|
||||
from llama_stack.models.llama.llama4.chat_format import ChatFormat as Llama4ChatFormat
|
||||
|
|
@ -72,7 +72,7 @@ from .config import MetaReferenceInferenceConfig
|
|||
from .generators import LlamaGenerator
|
||||
from .model_parallel import LlamaModelParallelGenerator
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
log = get_logger(__name__, category="inference")
|
||||
# there's a single model parallel process running serving the model. for now,
|
||||
# we don't support multiple concurrent requests to this process.
|
||||
SEMAPHORE = asyncio.Semaphore(1)
|
||||
|
|
@ -159,7 +159,7 @@ class MetaReferenceInferenceImpl(
|
|||
self.model_id = model_id
|
||||
self.llama_model = llama_model
|
||||
|
||||
print("Warming up...")
|
||||
log.info("Warming up...")
|
||||
await self.completion(
|
||||
model_id=model_id,
|
||||
content="Hello, world!",
|
||||
|
|
@ -170,7 +170,7 @@ class MetaReferenceInferenceImpl(
|
|||
messages=[UserMessage(content="Hi how are you?")],
|
||||
sampling_params=SamplingParams(max_tokens=20),
|
||||
)
|
||||
print("Warmed up!")
|
||||
log.info("Warmed up!")
|
||||
|
||||
def check_model(self, request) -> None:
|
||||
if self.model_id is None or self.llama_model is None:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue