forked from phoenix-oss/llama-stack-mirror
JSON serialization for parallel processing queue (#232)
* send/recv pydantic json over socket * fixup * address feedback * bidirectional wrapper * second round of feedback
This commit is contained in:
parent
0f66ae0f61
commit
7a8aa775e5
3 changed files with 158 additions and 52 deletions
|
@ -35,12 +35,14 @@ from llama_models.llama3.reference_impl.multimodal.model import (
|
|||
CrossAttentionTransformer,
|
||||
)
|
||||
from llama_models.sku_list import resolve_model
|
||||
from termcolor import cprint
|
||||
|
||||
from llama_stack.apis.inference import QuantizationType
|
||||
|
||||
from llama_stack.distribution.utils.model_utils import model_local_dir
|
||||
|
||||
from pydantic import BaseModel
|
||||
from termcolor import cprint
|
||||
|
||||
from .config import MetaReferenceImplConfig
|
||||
|
||||
|
||||
|
@ -58,8 +60,7 @@ def model_checkpoint_dir(model) -> str:
|
|||
return str(checkpoint_dir)
|
||||
|
||||
|
||||
@dataclass
|
||||
class TokenResult:
|
||||
class TokenResult(BaseModel):
|
||||
token: int
|
||||
text: str
|
||||
logprobs: Optional[List[float]] = None
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue