mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-28 02:53:30 +00:00
Split off meta-reference-quantized provider
This commit is contained in:
parent
7ff5800dea
commit
1ff0476002
10 changed files with 54 additions and 58 deletions
|
@ -17,7 +17,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
|||
chat_completion_request_to_messages,
|
||||
)
|
||||
|
||||
from .config import MetaReferenceImplConfig
|
||||
from .config import MetaReferenceInferenceConfig
|
||||
from .model_parallel import LlamaModelParallelGenerator
|
||||
|
||||
# there's a single model parallel process running serving the model. for now,
|
||||
|
@ -26,7 +26,7 @@ SEMAPHORE = asyncio.Semaphore(1)
|
|||
|
||||
|
||||
class MetaReferenceInferenceImpl(Inference, ModelsProtocolPrivate):
|
||||
def __init__(self, config: MetaReferenceImplConfig) -> None:
|
||||
def __init__(self, config: MetaReferenceInferenceConfig) -> None:
|
||||
self.config = config
|
||||
model = resolve_model(config.model)
|
||||
if model is None:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue