mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 20:14:13 +00:00
Distribution server now functioning
This commit is contained in:
parent
041cafbee3
commit
2cf9915806
21 changed files with 635 additions and 266 deletions
|
@ -13,7 +13,7 @@ from llama_models.llama3_1.api.chat_format import ChatFormat
|
|||
from llama_models.llama3_1.api.datatypes import Message
|
||||
from llama_models.llama3_1.api.tokenizer import Tokenizer
|
||||
|
||||
from .api.config import InlineImplConfig
|
||||
from .api.config import MetaReferenceImplConfig
|
||||
from .generation import Llama
|
||||
from .parallel_utils import ModelParallelProcessGroup
|
||||
|
||||
|
@ -42,7 +42,7 @@ class ModelRunner:
|
|||
)
|
||||
|
||||
|
||||
def init_model_cb(config: InlineImplConfig):
|
||||
def init_model_cb(config: MetaReferenceImplConfig):
|
||||
llama = Llama.build(config)
|
||||
return ModelRunner(llama)
|
||||
|
||||
|
@ -58,7 +58,7 @@ class LlamaModelParallelGenerator:
|
|||
clear at the callsite why we need to use a context manager.
|
||||
"""
|
||||
|
||||
def __init__(self, config: InlineImplConfig):
|
||||
def __init__(self, config: MetaReferenceImplConfig):
|
||||
self.config = config
|
||||
|
||||
# this is a hack because Agent's loop uses this to tokenize and check if input is too long
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue