mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 12:07:34 +00:00
Distribution server now functioning
This commit is contained in:
parent
041cafbee3
commit
2cf9915806
21 changed files with 635 additions and 266 deletions
|
@ -7,9 +7,6 @@
|
|||
from enum import Enum
|
||||
from typing import Literal, Optional, Union
|
||||
|
||||
from hydra.core.config_store import ConfigStore
|
||||
|
||||
from hydra_zen import builds
|
||||
from llama_models.llama3_1.api.datatypes import CheckpointQuantizationFormat
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
@ -19,13 +16,6 @@ from typing_extensions import Annotated
|
|||
from .datatypes import QuantizationConfig
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ImplType(Enum):
|
||||
inline = "inline"
|
||||
remote = "remote"
|
||||
ollama = "ollama"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class CheckpointType(Enum):
|
||||
pytorch = "pytorch"
|
||||
|
@ -66,8 +56,8 @@ class ModelCheckpointConfig(BaseModel):
|
|||
|
||||
|
||||
@json_schema_type
|
||||
class InlineImplConfig(BaseModel):
|
||||
impl_type: Literal[ImplType.inline.value] = ImplType.inline.value
|
||||
class MetaReferenceImplConfig(BaseModel):
|
||||
model: str
|
||||
checkpoint_config: ModelCheckpointConfig
|
||||
quantization: Optional[QuantizationConfig] = None
|
||||
torch_seed: Optional[int] = None
|
||||
|
@ -75,28 +65,7 @@ class InlineImplConfig(BaseModel):
|
|||
max_batch_size: int = 1
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class RemoteImplConfig(BaseModel):
|
||||
impl_type: Literal[ImplType.remote.value] = ImplType.remote.value
|
||||
url: str = Field(..., description="The URL of the remote module")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OllamaImplConfig(BaseModel):
|
||||
impl_type: Literal[ImplType.ollama.value] = ImplType.ollama.value
|
||||
model: str = Field(..., description="The name of the model in ollama catalog")
|
||||
url: str = Field(..., description="The URL for the ollama server")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class InferenceConfig(BaseModel):
|
||||
impl_config: Annotated[
|
||||
Union[InlineImplConfig, RemoteImplConfig, OllamaImplConfig],
|
||||
Field(discriminator="impl_type"),
|
||||
]
|
||||
|
||||
|
||||
InferenceHydraConfig = builds(InferenceConfig)
|
||||
|
||||
cs = ConfigStore.instance()
|
||||
cs.store(name="inference_config", node=InferenceHydraConfig)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue