JSON serialization for parallel processing queue (#232)

* send/recv pydantic json over socket

* fixup

* address feedback

* bidirectional wrapper

* second round of feedback
This commit is contained in:
Dalton Flanagan 2024-10-09 17:24:12 -04:00 committed by GitHub
parent 0f66ae0f61
commit 7a8aa775e5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 158 additions and 52 deletions

View file

@ -35,12 +35,14 @@ from llama_models.llama3.reference_impl.multimodal.model import (
CrossAttentionTransformer,
)
from llama_models.sku_list import resolve_model
from termcolor import cprint
from llama_stack.apis.inference import QuantizationType
from llama_stack.distribution.utils.model_utils import model_local_dir
from pydantic import BaseModel
from termcolor import cprint
from .config import MetaReferenceImplConfig
@ -58,8 +60,7 @@ def model_checkpoint_dir(model) -> str:
return str(checkpoint_dir)
@dataclass
class TokenResult:
class TokenResult(BaseModel):
token: int
text: str
logprobs: Optional[List[float]] = None