forked from phoenix-oss/llama-stack-mirror
ModelAlias -> ProviderModelEntry
This commit is contained in:
parent
561295af76
commit
07ccf908f7
27 changed files with 132 additions and 132 deletions
|
@ -130,7 +130,7 @@ llama stack run ./run-with-safety.yaml \
|
||||||
### (Optional) Update Model Serving Configuration
|
### (Optional) Update Model Serving Configuration
|
||||||
|
|
||||||
```{note}
|
```{note}
|
||||||
Please check the [model_aliases](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/ollama.py#L45) for the supported Ollama models.
|
Please check the [model_entries](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/ollama.py#L45) for the supported Ollama models.
|
||||||
```
|
```
|
||||||
|
|
||||||
To serve a new model with `ollama`
|
To serve a new model with `ollama`
|
||||||
|
|
|
@ -46,7 +46,7 @@ from llama_stack.providers.utils.inference.embedding_mixin import (
|
||||||
)
|
)
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
ModelRegistryHelper,
|
ModelRegistryHelper,
|
||||||
build_hf_repo_model_alias,
|
build_hf_repo_model_entry,
|
||||||
)
|
)
|
||||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||||
augment_content_with_response_format_prompt,
|
augment_content_with_response_format_prompt,
|
||||||
|
@ -116,7 +116,7 @@ class MetaReferenceInferenceImpl(
|
||||||
|
|
||||||
self.model_registry_helper = ModelRegistryHelper(
|
self.model_registry_helper = ModelRegistryHelper(
|
||||||
[
|
[
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
llama_model.descriptor(),
|
llama_model.descriptor(),
|
||||||
llama_model.core_model_id.value,
|
llama_model.core_model_id.value,
|
||||||
)
|
)
|
||||||
|
|
|
@ -43,12 +43,12 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||||
interleaved_content_as_str,
|
interleaved_content_as_str,
|
||||||
)
|
)
|
||||||
|
|
||||||
from .models import MODEL_ALIASES
|
from .models import MODEL_ENTRIES
|
||||||
|
|
||||||
|
|
||||||
class BedrockInferenceAdapter(ModelRegistryHelper, Inference):
|
class BedrockInferenceAdapter(ModelRegistryHelper, Inference):
|
||||||
def __init__(self, config: BedrockConfig) -> None:
|
def __init__(self, config: BedrockConfig) -> None:
|
||||||
ModelRegistryHelper.__init__(self, MODEL_ALIASES)
|
ModelRegistryHelper.__init__(self, MODEL_ENTRIES)
|
||||||
self._config = config
|
self._config = config
|
||||||
|
|
||||||
self._client = create_bedrock_client(config)
|
self._client = create_bedrock_client(config)
|
||||||
|
|
|
@ -6,19 +6,19 @@
|
||||||
|
|
||||||
from llama_stack.models.llama.datatypes import CoreModelId
|
from llama_stack.models.llama.datatypes import CoreModelId
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
build_hf_repo_model_alias,
|
build_hf_repo_model_entry,
|
||||||
)
|
)
|
||||||
|
|
||||||
MODEL_ALIASES = [
|
MODEL_ENTRIES = [
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"meta.llama3-1-8b-instruct-v1:0",
|
"meta.llama3-1-8b-instruct-v1:0",
|
||||||
CoreModelId.llama3_1_8b_instruct.value,
|
CoreModelId.llama3_1_8b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"meta.llama3-1-70b-instruct-v1:0",
|
"meta.llama3-1-70b-instruct-v1:0",
|
||||||
CoreModelId.llama3_1_70b_instruct.value,
|
CoreModelId.llama3_1_70b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"meta.llama3-1-405b-instruct-v1:0",
|
"meta.llama3-1-405b-instruct-v1:0",
|
||||||
CoreModelId.llama3_1_405b_instruct.value,
|
CoreModelId.llama3_1_405b_instruct.value,
|
||||||
),
|
),
|
||||||
|
|
|
@ -41,14 +41,14 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||||
)
|
)
|
||||||
|
|
||||||
from .config import CerebrasImplConfig
|
from .config import CerebrasImplConfig
|
||||||
from .models import model_aliases
|
from .models import model_entries
|
||||||
|
|
||||||
|
|
||||||
class CerebrasInferenceAdapter(ModelRegistryHelper, Inference):
|
class CerebrasInferenceAdapter(ModelRegistryHelper, Inference):
|
||||||
def __init__(self, config: CerebrasImplConfig) -> None:
|
def __init__(self, config: CerebrasImplConfig) -> None:
|
||||||
ModelRegistryHelper.__init__(
|
ModelRegistryHelper.__init__(
|
||||||
self,
|
self,
|
||||||
model_aliases=model_aliases,
|
model_entries=model_entries,
|
||||||
)
|
)
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
|
|
|
@ -6,15 +6,15 @@
|
||||||
|
|
||||||
from llama_stack.models.llama.datatypes import CoreModelId
|
from llama_stack.models.llama.datatypes import CoreModelId
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
build_hf_repo_model_alias,
|
build_hf_repo_model_entry,
|
||||||
)
|
)
|
||||||
|
|
||||||
model_aliases = [
|
model_entries = [
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"llama3.1-8b",
|
"llama3.1-8b",
|
||||||
CoreModelId.llama3_1_8b_instruct.value,
|
CoreModelId.llama3_1_8b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"llama-3.3-70b",
|
"llama-3.3-70b",
|
||||||
CoreModelId.llama3_3_70b_instruct.value,
|
CoreModelId.llama3_3_70b_instruct.value,
|
||||||
),
|
),
|
||||||
|
|
|
@ -25,7 +25,7 @@ from llama_stack.apis.inference import (
|
||||||
from llama_stack.models.llama.datatypes import CoreModelId
|
from llama_stack.models.llama.datatypes import CoreModelId
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
ModelRegistryHelper,
|
ModelRegistryHelper,
|
||||||
build_hf_repo_model_alias,
|
build_hf_repo_model_entry,
|
||||||
)
|
)
|
||||||
from llama_stack.providers.utils.inference.openai_compat import (
|
from llama_stack.providers.utils.inference.openai_compat import (
|
||||||
get_sampling_options,
|
get_sampling_options,
|
||||||
|
@ -38,12 +38,12 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||||
|
|
||||||
from .config import DatabricksImplConfig
|
from .config import DatabricksImplConfig
|
||||||
|
|
||||||
model_aliases = [
|
model_entries = [
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"databricks-meta-llama-3-1-70b-instruct",
|
"databricks-meta-llama-3-1-70b-instruct",
|
||||||
CoreModelId.llama3_1_70b_instruct.value,
|
CoreModelId.llama3_1_70b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"databricks-meta-llama-3-1-405b-instruct",
|
"databricks-meta-llama-3-1-405b-instruct",
|
||||||
CoreModelId.llama3_1_405b_instruct.value,
|
CoreModelId.llama3_1_405b_instruct.value,
|
||||||
),
|
),
|
||||||
|
@ -52,7 +52,7 @@ model_aliases = [
|
||||||
|
|
||||||
class DatabricksInferenceAdapter(ModelRegistryHelper, Inference):
|
class DatabricksInferenceAdapter(ModelRegistryHelper, Inference):
|
||||||
def __init__(self, config: DatabricksImplConfig) -> None:
|
def __init__(self, config: DatabricksImplConfig) -> None:
|
||||||
ModelRegistryHelper.__init__(self, model_aliases=model_aliases)
|
ModelRegistryHelper.__init__(self, model_entries=model_entries)
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
|
|
|
@ -47,12 +47,12 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||||
)
|
)
|
||||||
|
|
||||||
from .config import FireworksImplConfig
|
from .config import FireworksImplConfig
|
||||||
from .models import MODEL_ALIASES
|
from .models import MODEL_ENTRIES
|
||||||
|
|
||||||
|
|
||||||
class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData):
|
class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData):
|
||||||
def __init__(self, config: FireworksImplConfig) -> None:
|
def __init__(self, config: FireworksImplConfig) -> None:
|
||||||
ModelRegistryHelper.__init__(self, MODEL_ALIASES)
|
ModelRegistryHelper.__init__(self, MODEL_ENTRIES)
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
|
|
|
@ -6,47 +6,47 @@
|
||||||
|
|
||||||
from llama_stack.models.llama.datatypes import CoreModelId
|
from llama_stack.models.llama.datatypes import CoreModelId
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
build_hf_repo_model_alias,
|
build_hf_repo_model_entry,
|
||||||
)
|
)
|
||||||
|
|
||||||
MODEL_ALIASES = [
|
MODEL_ENTRIES = [
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"accounts/fireworks/models/llama-v3p1-8b-instruct",
|
"accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||||
CoreModelId.llama3_1_8b_instruct.value,
|
CoreModelId.llama3_1_8b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"accounts/fireworks/models/llama-v3p1-70b-instruct",
|
"accounts/fireworks/models/llama-v3p1-70b-instruct",
|
||||||
CoreModelId.llama3_1_70b_instruct.value,
|
CoreModelId.llama3_1_70b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"accounts/fireworks/models/llama-v3p1-405b-instruct",
|
"accounts/fireworks/models/llama-v3p1-405b-instruct",
|
||||||
CoreModelId.llama3_1_405b_instruct.value,
|
CoreModelId.llama3_1_405b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"accounts/fireworks/models/llama-v3p2-1b-instruct",
|
"accounts/fireworks/models/llama-v3p2-1b-instruct",
|
||||||
CoreModelId.llama3_2_1b_instruct.value,
|
CoreModelId.llama3_2_1b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"accounts/fireworks/models/llama-v3p2-3b-instruct",
|
"accounts/fireworks/models/llama-v3p2-3b-instruct",
|
||||||
CoreModelId.llama3_2_3b_instruct.value,
|
CoreModelId.llama3_2_3b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"accounts/fireworks/models/llama-v3p2-11b-vision-instruct",
|
"accounts/fireworks/models/llama-v3p2-11b-vision-instruct",
|
||||||
CoreModelId.llama3_2_11b_vision_instruct.value,
|
CoreModelId.llama3_2_11b_vision_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"accounts/fireworks/models/llama-v3p2-90b-vision-instruct",
|
"accounts/fireworks/models/llama-v3p2-90b-vision-instruct",
|
||||||
CoreModelId.llama3_2_90b_vision_instruct.value,
|
CoreModelId.llama3_2_90b_vision_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"accounts/fireworks/models/llama-v3p3-70b-instruct",
|
"accounts/fireworks/models/llama-v3p3-70b-instruct",
|
||||||
CoreModelId.llama3_3_70b_instruct.value,
|
CoreModelId.llama3_3_70b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"accounts/fireworks/models/llama-guard-3-8b",
|
"accounts/fireworks/models/llama-guard-3-8b",
|
||||||
CoreModelId.llama_guard_3_8b.value,
|
CoreModelId.llama_guard_3_8b.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"accounts/fireworks/models/llama-guard-3-11b-vision",
|
"accounts/fireworks/models/llama-guard-3-11b-vision",
|
||||||
CoreModelId.llama_guard_3_11b_vision.value,
|
CoreModelId.llama_guard_3_11b_vision.value,
|
||||||
),
|
),
|
||||||
|
|
|
@ -31,8 +31,8 @@ from llama_stack.models.llama.sku_list import CoreModelId
|
||||||
from llama_stack.providers.remote.inference.groq.config import GroqConfig
|
from llama_stack.providers.remote.inference.groq.config import GroqConfig
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
ModelRegistryHelper,
|
ModelRegistryHelper,
|
||||||
build_hf_repo_model_alias,
|
build_hf_repo_model_entry,
|
||||||
build_model_alias,
|
build_model_entry,
|
||||||
)
|
)
|
||||||
|
|
||||||
from .groq_utils import (
|
from .groq_utils import (
|
||||||
|
@ -41,20 +41,20 @@ from .groq_utils import (
|
||||||
convert_chat_completion_response_stream,
|
convert_chat_completion_response_stream,
|
||||||
)
|
)
|
||||||
|
|
||||||
_MODEL_ALIASES = [
|
_MODEL_ENTRIES = [
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"llama3-8b-8192",
|
"llama3-8b-8192",
|
||||||
CoreModelId.llama3_1_8b_instruct.value,
|
CoreModelId.llama3_1_8b_instruct.value,
|
||||||
),
|
),
|
||||||
build_model_alias(
|
build_model_entry(
|
||||||
"llama-3.1-8b-instant",
|
"llama-3.1-8b-instant",
|
||||||
CoreModelId.llama3_1_8b_instruct.value,
|
CoreModelId.llama3_1_8b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"llama3-70b-8192",
|
"llama3-70b-8192",
|
||||||
CoreModelId.llama3_70b_instruct.value,
|
CoreModelId.llama3_70b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"llama-3.3-70b-versatile",
|
"llama-3.3-70b-versatile",
|
||||||
CoreModelId.llama3_3_70b_instruct.value,
|
CoreModelId.llama3_3_70b_instruct.value,
|
||||||
),
|
),
|
||||||
|
@ -62,7 +62,7 @@ _MODEL_ALIASES = [
|
||||||
# Preview models aren't recommended for production use, but we include this one
|
# Preview models aren't recommended for production use, but we include this one
|
||||||
# to pass the test fixture
|
# to pass the test fixture
|
||||||
# TODO(aidand): Replace this with a stable model once Groq supports it
|
# TODO(aidand): Replace this with a stable model once Groq supports it
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"llama-3.2-3b-preview",
|
"llama-3.2-3b-preview",
|
||||||
CoreModelId.llama3_2_3b_instruct.value,
|
CoreModelId.llama3_2_3b_instruct.value,
|
||||||
),
|
),
|
||||||
|
@ -73,7 +73,7 @@ class GroqInferenceAdapter(Inference, ModelRegistryHelper, NeedsRequestProviderD
|
||||||
_config: GroqConfig
|
_config: GroqConfig
|
||||||
|
|
||||||
def __init__(self, config: GroqConfig):
|
def __init__(self, config: GroqConfig):
|
||||||
ModelRegistryHelper.__init__(self, model_aliases=_MODEL_ALIASES)
|
ModelRegistryHelper.__init__(self, model_entries=_MODEL_ENTRIES)
|
||||||
self._config = config
|
self._config = config
|
||||||
|
|
||||||
def completion(
|
def completion(
|
||||||
|
|
|
@ -6,43 +6,43 @@
|
||||||
|
|
||||||
from llama_stack.models.llama.datatypes import CoreModelId
|
from llama_stack.models.llama.datatypes import CoreModelId
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
build_hf_repo_model_alias,
|
build_hf_repo_model_entry,
|
||||||
)
|
)
|
||||||
|
|
||||||
_MODEL_ALIASES = [
|
_MODEL_ENTRIES = [
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"meta/llama3-8b-instruct",
|
"meta/llama3-8b-instruct",
|
||||||
CoreModelId.llama3_8b_instruct.value,
|
CoreModelId.llama3_8b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"meta/llama3-70b-instruct",
|
"meta/llama3-70b-instruct",
|
||||||
CoreModelId.llama3_70b_instruct.value,
|
CoreModelId.llama3_70b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"meta/llama-3.1-8b-instruct",
|
"meta/llama-3.1-8b-instruct",
|
||||||
CoreModelId.llama3_1_8b_instruct.value,
|
CoreModelId.llama3_1_8b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"meta/llama-3.1-70b-instruct",
|
"meta/llama-3.1-70b-instruct",
|
||||||
CoreModelId.llama3_1_70b_instruct.value,
|
CoreModelId.llama3_1_70b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"meta/llama-3.1-405b-instruct",
|
"meta/llama-3.1-405b-instruct",
|
||||||
CoreModelId.llama3_1_405b_instruct.value,
|
CoreModelId.llama3_1_405b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"meta/llama-3.2-1b-instruct",
|
"meta/llama-3.2-1b-instruct",
|
||||||
CoreModelId.llama3_2_1b_instruct.value,
|
CoreModelId.llama3_2_1b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"meta/llama-3.2-3b-instruct",
|
"meta/llama-3.2-3b-instruct",
|
||||||
CoreModelId.llama3_2_3b_instruct.value,
|
CoreModelId.llama3_2_3b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"meta/llama-3.2-11b-vision-instruct",
|
"meta/llama-3.2-11b-vision-instruct",
|
||||||
CoreModelId.llama3_2_11b_vision_instruct.value,
|
CoreModelId.llama3_2_11b_vision_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"meta/llama-3.2-90b-vision-instruct",
|
"meta/llama-3.2-90b-vision-instruct",
|
||||||
CoreModelId.llama3_2_90b_vision_instruct.value,
|
CoreModelId.llama3_2_90b_vision_instruct.value,
|
||||||
),
|
),
|
||||||
|
|
|
@ -33,7 +33,7 @@ from llama_stack.providers.utils.inference.model_registry import (
|
||||||
from llama_stack.providers.utils.inference.prompt_adapter import content_has_media
|
from llama_stack.providers.utils.inference.prompt_adapter import content_has_media
|
||||||
|
|
||||||
from . import NVIDIAConfig
|
from . import NVIDIAConfig
|
||||||
from .models import _MODEL_ALIASES
|
from .models import _MODEL_ENTRIES
|
||||||
from .openai_utils import (
|
from .openai_utils import (
|
||||||
convert_chat_completion_request,
|
convert_chat_completion_request,
|
||||||
convert_completion_request,
|
convert_completion_request,
|
||||||
|
@ -50,7 +50,7 @@ logger = logging.getLogger(__name__)
|
||||||
class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
|
class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
|
||||||
def __init__(self, config: NVIDIAConfig) -> None:
|
def __init__(self, config: NVIDIAConfig) -> None:
|
||||||
# TODO(mf): filter by available models
|
# TODO(mf): filter by available models
|
||||||
ModelRegistryHelper.__init__(self, model_aliases=_MODEL_ALIASES)
|
ModelRegistryHelper.__init__(self, model_entries=_MODEL_ENTRIES)
|
||||||
|
|
||||||
logger.info(f"Initializing NVIDIAInferenceAdapter({config.url})...")
|
logger.info(f"Initializing NVIDIAInferenceAdapter({config.url})...")
|
||||||
|
|
||||||
|
|
|
@ -35,8 +35,8 @@ from llama_stack.models.llama.datatypes import CoreModelId
|
||||||
from llama_stack.providers.datatypes import ModelsProtocolPrivate
|
from llama_stack.providers.datatypes import ModelsProtocolPrivate
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
ModelRegistryHelper,
|
ModelRegistryHelper,
|
||||||
build_hf_repo_model_alias,
|
build_hf_repo_model_entry,
|
||||||
build_model_alias,
|
build_model_entry,
|
||||||
)
|
)
|
||||||
from llama_stack.providers.utils.inference.openai_compat import (
|
from llama_stack.providers.utils.inference.openai_compat import (
|
||||||
OpenAICompatCompletionChoice,
|
OpenAICompatCompletionChoice,
|
||||||
|
@ -58,74 +58,74 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
model_aliases = [
|
model_entries = [
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"llama3.1:8b-instruct-fp16",
|
"llama3.1:8b-instruct-fp16",
|
||||||
CoreModelId.llama3_1_8b_instruct.value,
|
CoreModelId.llama3_1_8b_instruct.value,
|
||||||
),
|
),
|
||||||
build_model_alias(
|
build_model_entry(
|
||||||
"llama3.1:8b",
|
"llama3.1:8b",
|
||||||
CoreModelId.llama3_1_8b_instruct.value,
|
CoreModelId.llama3_1_8b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"llama3.1:70b-instruct-fp16",
|
"llama3.1:70b-instruct-fp16",
|
||||||
CoreModelId.llama3_1_70b_instruct.value,
|
CoreModelId.llama3_1_70b_instruct.value,
|
||||||
),
|
),
|
||||||
build_model_alias(
|
build_model_entry(
|
||||||
"llama3.1:70b",
|
"llama3.1:70b",
|
||||||
CoreModelId.llama3_1_70b_instruct.value,
|
CoreModelId.llama3_1_70b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"llama3.1:405b-instruct-fp16",
|
"llama3.1:405b-instruct-fp16",
|
||||||
CoreModelId.llama3_1_405b_instruct.value,
|
CoreModelId.llama3_1_405b_instruct.value,
|
||||||
),
|
),
|
||||||
build_model_alias(
|
build_model_entry(
|
||||||
"llama3.1:405b",
|
"llama3.1:405b",
|
||||||
CoreModelId.llama3_1_405b_instruct.value,
|
CoreModelId.llama3_1_405b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"llama3.2:1b-instruct-fp16",
|
"llama3.2:1b-instruct-fp16",
|
||||||
CoreModelId.llama3_2_1b_instruct.value,
|
CoreModelId.llama3_2_1b_instruct.value,
|
||||||
),
|
),
|
||||||
build_model_alias(
|
build_model_entry(
|
||||||
"llama3.2:1b",
|
"llama3.2:1b",
|
||||||
CoreModelId.llama3_2_1b_instruct.value,
|
CoreModelId.llama3_2_1b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"llama3.2:3b-instruct-fp16",
|
"llama3.2:3b-instruct-fp16",
|
||||||
CoreModelId.llama3_2_3b_instruct.value,
|
CoreModelId.llama3_2_3b_instruct.value,
|
||||||
),
|
),
|
||||||
build_model_alias(
|
build_model_entry(
|
||||||
"llama3.2:3b",
|
"llama3.2:3b",
|
||||||
CoreModelId.llama3_2_3b_instruct.value,
|
CoreModelId.llama3_2_3b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"llama3.2-vision:11b-instruct-fp16",
|
"llama3.2-vision:11b-instruct-fp16",
|
||||||
CoreModelId.llama3_2_11b_vision_instruct.value,
|
CoreModelId.llama3_2_11b_vision_instruct.value,
|
||||||
),
|
),
|
||||||
build_model_alias(
|
build_model_entry(
|
||||||
"llama3.2-vision:latest",
|
"llama3.2-vision:latest",
|
||||||
CoreModelId.llama3_2_11b_vision_instruct.value,
|
CoreModelId.llama3_2_11b_vision_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"llama3.2-vision:90b-instruct-fp16",
|
"llama3.2-vision:90b-instruct-fp16",
|
||||||
CoreModelId.llama3_2_90b_vision_instruct.value,
|
CoreModelId.llama3_2_90b_vision_instruct.value,
|
||||||
),
|
),
|
||||||
build_model_alias(
|
build_model_entry(
|
||||||
"llama3.2-vision:90b",
|
"llama3.2-vision:90b",
|
||||||
CoreModelId.llama3_2_90b_vision_instruct.value,
|
CoreModelId.llama3_2_90b_vision_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"llama3.3:70b",
|
"llama3.3:70b",
|
||||||
CoreModelId.llama3_3_70b_instruct.value,
|
CoreModelId.llama3_3_70b_instruct.value,
|
||||||
),
|
),
|
||||||
# The Llama Guard models don't have their full fp16 versions
|
# The Llama Guard models don't have their full fp16 versions
|
||||||
# so we are going to alias their default version to the canonical SKU
|
# so we are going to alias their default version to the canonical SKU
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"llama-guard3:8b",
|
"llama-guard3:8b",
|
||||||
CoreModelId.llama_guard_3_8b.value,
|
CoreModelId.llama_guard_3_8b.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"llama-guard3:1b",
|
"llama-guard3:1b",
|
||||||
CoreModelId.llama_guard_3_1b.value,
|
CoreModelId.llama_guard_3_1b.value,
|
||||||
),
|
),
|
||||||
|
@ -134,7 +134,7 @@ model_aliases = [
|
||||||
|
|
||||||
class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate):
|
class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||||
def __init__(self, url: str) -> None:
|
def __init__(self, url: str) -> None:
|
||||||
self.register_helper = ModelRegistryHelper(model_aliases)
|
self.register_helper = ModelRegistryHelper(model_entries)
|
||||||
self.url = url
|
self.url = url
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
|
@ -6,43 +6,43 @@
|
||||||
|
|
||||||
from llama_stack.models.llama.datatypes import CoreModelId
|
from llama_stack.models.llama.datatypes import CoreModelId
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
build_hf_repo_model_alias,
|
build_hf_repo_model_entry,
|
||||||
)
|
)
|
||||||
|
|
||||||
MODEL_ALIASES = [
|
MODEL_ENTRIES = [
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"Meta-Llama-3.1-8B-Instruct",
|
"Meta-Llama-3.1-8B-Instruct",
|
||||||
CoreModelId.llama3_1_8b_instruct.value,
|
CoreModelId.llama3_1_8b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"Meta-Llama-3.1-70B-Instruct",
|
"Meta-Llama-3.1-70B-Instruct",
|
||||||
CoreModelId.llama3_1_70b_instruct.value,
|
CoreModelId.llama3_1_70b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"Meta-Llama-3.1-405B-Instruct",
|
"Meta-Llama-3.1-405B-Instruct",
|
||||||
CoreModelId.llama3_1_405b_instruct.value,
|
CoreModelId.llama3_1_405b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"Meta-Llama-3.2-1B-Instruct",
|
"Meta-Llama-3.2-1B-Instruct",
|
||||||
CoreModelId.llama3_2_1b_instruct.value,
|
CoreModelId.llama3_2_1b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"Meta-Llama-3.2-3B-Instruct",
|
"Meta-Llama-3.2-3B-Instruct",
|
||||||
CoreModelId.llama3_2_3b_instruct.value,
|
CoreModelId.llama3_2_3b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"Meta-Llama-3.3-70B-Instruct",
|
"Meta-Llama-3.3-70B-Instruct",
|
||||||
CoreModelId.llama3_3_70b_instruct.value,
|
CoreModelId.llama3_3_70b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"Llama-3.2-11B-Vision-Instruct",
|
"Llama-3.2-11B-Vision-Instruct",
|
||||||
CoreModelId.llama3_2_11b_vision_instruct.value,
|
CoreModelId.llama3_2_11b_vision_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"Llama-3.2-90B-Vision-Instruct",
|
"Llama-3.2-90B-Vision-Instruct",
|
||||||
CoreModelId.llama3_2_90b_vision_instruct.value,
|
CoreModelId.llama3_2_90b_vision_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"Meta-Llama-Guard-3-8B",
|
"Meta-Llama-Guard-3-8B",
|
||||||
CoreModelId.llama_guard_3_8b.value,
|
CoreModelId.llama_guard_3_8b.value,
|
||||||
),
|
),
|
||||||
|
|
|
@ -31,12 +31,12 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||||
)
|
)
|
||||||
|
|
||||||
from .config import SambaNovaImplConfig
|
from .config import SambaNovaImplConfig
|
||||||
from .models import MODEL_ALIASES
|
from .models import MODEL_ENTRIES
|
||||||
|
|
||||||
|
|
||||||
class SambaNovaInferenceAdapter(ModelRegistryHelper, Inference):
|
class SambaNovaInferenceAdapter(ModelRegistryHelper, Inference):
|
||||||
def __init__(self, config: SambaNovaImplConfig) -> None:
|
def __init__(self, config: SambaNovaImplConfig) -> None:
|
||||||
ModelRegistryHelper.__init__(self, model_aliases=MODEL_ALIASES)
|
ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES)
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
|
|
|
@ -32,7 +32,7 @@ from llama_stack.models.llama.sku_list import all_registered_models
|
||||||
from llama_stack.providers.datatypes import ModelsProtocolPrivate
|
from llama_stack.providers.datatypes import ModelsProtocolPrivate
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
ModelRegistryHelper,
|
ModelRegistryHelper,
|
||||||
build_hf_repo_model_alias,
|
build_hf_repo_model_entry,
|
||||||
)
|
)
|
||||||
from llama_stack.providers.utils.inference.openai_compat import (
|
from llama_stack.providers.utils.inference.openai_compat import (
|
||||||
OpenAICompatCompletionChoice,
|
OpenAICompatCompletionChoice,
|
||||||
|
@ -53,9 +53,9 @@ from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImpl
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def build_hf_repo_model_aliases():
|
def build_hf_repo_model_entries():
|
||||||
return [
|
return [
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
model.huggingface_repo,
|
model.huggingface_repo,
|
||||||
model.descriptor(),
|
model.descriptor(),
|
||||||
)
|
)
|
||||||
|
@ -70,7 +70,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
|
||||||
model_id: str
|
model_id: str
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self.register_helper = ModelRegistryHelper(build_hf_repo_model_aliases())
|
self.register_helper = ModelRegistryHelper(build_hf_repo_model_entries())
|
||||||
self.huggingface_repo_to_llama_model_id = {
|
self.huggingface_repo_to_llama_model_id = {
|
||||||
model.huggingface_repo: model.descriptor() for model in all_registered_models() if model.huggingface_repo
|
model.huggingface_repo: model.descriptor() for model in all_registered_models() if model.huggingface_repo
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,43 +6,43 @@
|
||||||
|
|
||||||
from llama_stack.models.llama.datatypes import CoreModelId
|
from llama_stack.models.llama.datatypes import CoreModelId
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
build_hf_repo_model_alias,
|
build_hf_repo_model_entry,
|
||||||
)
|
)
|
||||||
|
|
||||||
MODEL_ALIASES = [
|
MODEL_ENTRIES = [
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
|
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
|
||||||
CoreModelId.llama3_1_8b_instruct.value,
|
CoreModelId.llama3_1_8b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
|
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
|
||||||
CoreModelId.llama3_1_70b_instruct.value,
|
CoreModelId.llama3_1_70b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
|
"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
|
||||||
CoreModelId.llama3_1_405b_instruct.value,
|
CoreModelId.llama3_1_405b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"meta-llama/Llama-3.2-3B-Instruct-Turbo",
|
"meta-llama/Llama-3.2-3B-Instruct-Turbo",
|
||||||
CoreModelId.llama3_2_3b_instruct.value,
|
CoreModelId.llama3_2_3b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
|
"meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
|
||||||
CoreModelId.llama3_2_11b_vision_instruct.value,
|
CoreModelId.llama3_2_11b_vision_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
|
"meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
|
||||||
CoreModelId.llama3_2_90b_vision_instruct.value,
|
CoreModelId.llama3_2_90b_vision_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
"meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
||||||
CoreModelId.llama3_3_70b_instruct.value,
|
CoreModelId.llama3_3_70b_instruct.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"meta-llama/Meta-Llama-Guard-3-8B",
|
"meta-llama/Meta-Llama-Guard-3-8B",
|
||||||
CoreModelId.llama_guard_3_8b.value,
|
CoreModelId.llama_guard_3_8b.value,
|
||||||
),
|
),
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
"meta-llama/Llama-Guard-3-11B-Vision-Turbo",
|
"meta-llama/Llama-Guard-3-11B-Vision-Turbo",
|
||||||
CoreModelId.llama_guard_3_11b_vision.value,
|
CoreModelId.llama_guard_3_11b_vision.value,
|
||||||
),
|
),
|
||||||
|
|
|
@ -46,12 +46,12 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||||
)
|
)
|
||||||
|
|
||||||
from .config import TogetherImplConfig
|
from .config import TogetherImplConfig
|
||||||
from .models import MODEL_ALIASES
|
from .models import MODEL_ENTRIES
|
||||||
|
|
||||||
|
|
||||||
class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData):
|
class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData):
|
||||||
def __init__(self, config: TogetherImplConfig) -> None:
|
def __init__(self, config: TogetherImplConfig) -> None:
|
||||||
ModelRegistryHelper.__init__(self, MODEL_ALIASES)
|
ModelRegistryHelper.__init__(self, MODEL_ENTRIES)
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
|
|
|
@ -38,7 +38,7 @@ from llama_stack.models.llama.sku_list import all_registered_models
|
||||||
from llama_stack.providers.datatypes import ModelsProtocolPrivate
|
from llama_stack.providers.datatypes import ModelsProtocolPrivate
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
ModelRegistryHelper,
|
ModelRegistryHelper,
|
||||||
build_hf_repo_model_alias,
|
build_hf_repo_model_entry,
|
||||||
)
|
)
|
||||||
from llama_stack.providers.utils.inference.openai_compat import (
|
from llama_stack.providers.utils.inference.openai_compat import (
|
||||||
OpenAICompatCompletionResponse,
|
OpenAICompatCompletionResponse,
|
||||||
|
@ -62,9 +62,9 @@ from .config import VLLMInferenceAdapterConfig
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def build_hf_repo_model_aliases():
|
def build_hf_repo_model_entries():
|
||||||
return [
|
return [
|
||||||
build_hf_repo_model_alias(
|
build_hf_repo_model_entry(
|
||||||
model.huggingface_repo,
|
model.huggingface_repo,
|
||||||
model.descriptor(),
|
model.descriptor(),
|
||||||
)
|
)
|
||||||
|
@ -204,7 +204,7 @@ async def _process_vllm_chat_completion_stream_response(
|
||||||
|
|
||||||
class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||||
def __init__(self, config: VLLMInferenceAdapterConfig) -> None:
|
def __init__(self, config: VLLMInferenceAdapterConfig) -> None:
|
||||||
self.register_helper = ModelRegistryHelper(build_hf_repo_model_aliases())
|
self.register_helper = ModelRegistryHelper(build_hf_repo_model_entries())
|
||||||
self.config = config
|
self.config = config
|
||||||
self.client = None
|
self.client = None
|
||||||
|
|
||||||
|
|
|
@ -18,7 +18,7 @@ from llama_stack.providers.utils.inference import (
|
||||||
|
|
||||||
# TODO: this class is more confusing than useful right now. We need to make it
|
# TODO: this class is more confusing than useful right now. We need to make it
|
||||||
# more closer to the Model class.
|
# more closer to the Model class.
|
||||||
class ModelAlias(BaseModel):
|
class ProviderModelEntry(BaseModel):
|
||||||
provider_model_id: str
|
provider_model_id: str
|
||||||
aliases: List[str] = Field(default_factory=list)
|
aliases: List[str] = Field(default_factory=list)
|
||||||
llama_model: Optional[str] = None
|
llama_model: Optional[str] = None
|
||||||
|
@ -32,8 +32,8 @@ def get_huggingface_repo(model_descriptor: str) -> Optional[str]:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def build_hf_repo_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAlias:
|
def build_hf_repo_model_entry(provider_model_id: str, model_descriptor: str) -> ProviderModelEntry:
|
||||||
return ModelAlias(
|
return ProviderModelEntry(
|
||||||
provider_model_id=provider_model_id,
|
provider_model_id=provider_model_id,
|
||||||
aliases=[
|
aliases=[
|
||||||
get_huggingface_repo(model_descriptor),
|
get_huggingface_repo(model_descriptor),
|
||||||
|
@ -42,8 +42,8 @@ def build_hf_repo_model_alias(provider_model_id: str, model_descriptor: str) ->
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def build_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAlias:
|
def build_model_entry(provider_model_id: str, model_descriptor: str) -> ProviderModelEntry:
|
||||||
return ModelAlias(
|
return ProviderModelEntry(
|
||||||
provider_model_id=provider_model_id,
|
provider_model_id=provider_model_id,
|
||||||
aliases=[],
|
aliases=[],
|
||||||
llama_model=model_descriptor,
|
llama_model=model_descriptor,
|
||||||
|
@ -51,10 +51,10 @@ def build_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAli
|
||||||
|
|
||||||
|
|
||||||
class ModelRegistryHelper(ModelsProtocolPrivate):
|
class ModelRegistryHelper(ModelsProtocolPrivate):
|
||||||
def __init__(self, model_aliases: List[ModelAlias]):
|
def __init__(self, model_entries: List[ProviderModelEntry]):
|
||||||
self.alias_to_provider_id_map = {}
|
self.alias_to_provider_id_map = {}
|
||||||
self.provider_id_to_llama_model_map = {}
|
self.provider_id_to_llama_model_map = {}
|
||||||
for alias_obj in model_aliases:
|
for alias_obj in model_entries:
|
||||||
for alias in alias_obj.aliases:
|
for alias in alias_obj.aliases:
|
||||||
self.alias_to_provider_id_map[alias] = alias_obj.provider_model_id
|
self.alias_to_provider_id_map[alias] = alias_obj.provider_model_id
|
||||||
# also add a mapping from provider model id to itself for easy lookup
|
# also add a mapping from provider model id to itself for easy lookup
|
||||||
|
|
|
@ -10,7 +10,7 @@ from llama_stack.apis.models import ModelInput
|
||||||
from llama_stack.distribution.datatypes import Provider, ToolGroupInput
|
from llama_stack.distribution.datatypes import Provider, ToolGroupInput
|
||||||
from llama_stack.models.llama.sku_list import all_registered_models
|
from llama_stack.models.llama.sku_list import all_registered_models
|
||||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
||||||
from llama_stack.providers.remote.inference.bedrock.models import MODEL_ALIASES
|
from llama_stack.providers.remote.inference.bedrock.models import MODEL_ENTRIES
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
|
||||||
|
@ -47,7 +47,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
provider_model_id=m.provider_model_id,
|
provider_model_id=m.provider_model_id,
|
||||||
provider_id="bedrock",
|
provider_id="bedrock",
|
||||||
)
|
)
|
||||||
for m in MODEL_ALIASES
|
for m in MODEL_ENTRIES
|
||||||
]
|
]
|
||||||
default_tool_groups = [
|
default_tool_groups = [
|
||||||
ToolGroupInput(
|
ToolGroupInput(
|
||||||
|
|
|
@ -14,7 +14,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||||
)
|
)
|
||||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
||||||
from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig
|
from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig
|
||||||
from llama_stack.providers.remote.inference.cerebras.models import model_aliases
|
from llama_stack.providers.remote.inference.cerebras.models import model_entries
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
|
||||||
|
@ -55,7 +55,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
provider_model_id=m.provider_model_id,
|
provider_model_id=m.provider_model_id,
|
||||||
provider_id="cerebras",
|
provider_id="cerebras",
|
||||||
)
|
)
|
||||||
for m in model_aliases
|
for m in model_entries
|
||||||
]
|
]
|
||||||
embedding_model = ModelInput(
|
embedding_model = ModelInput(
|
||||||
model_id="all-MiniLM-L6-v2",
|
model_id="all-MiniLM-L6-v2",
|
||||||
|
|
|
@ -19,7 +19,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||||
)
|
)
|
||||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
||||||
from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
|
from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
|
||||||
from llama_stack.providers.remote.inference.fireworks.models import MODEL_ALIASES
|
from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
|
||||||
|
@ -67,7 +67,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
provider_model_id=m.provider_model_id,
|
provider_model_id=m.provider_model_id,
|
||||||
provider_id="fireworks",
|
provider_id="fireworks",
|
||||||
)
|
)
|
||||||
for m in MODEL_ALIASES
|
for m in MODEL_ENTRIES
|
||||||
]
|
]
|
||||||
embedding_model = ModelInput(
|
embedding_model = ModelInput(
|
||||||
model_id="all-MiniLM-L6-v2",
|
model_id="all-MiniLM-L6-v2",
|
||||||
|
|
|
@ -9,7 +9,7 @@ from pathlib import Path
|
||||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
|
from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
|
||||||
from llama_stack.models.llama.sku_list import all_registered_models
|
from llama_stack.models.llama.sku_list import all_registered_models
|
||||||
from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
|
from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
|
||||||
from llama_stack.providers.remote.inference.nvidia.models import _MODEL_ALIASES
|
from llama_stack.providers.remote.inference.nvidia.models import _MODEL_ENTRIES
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
|
||||||
|
@ -45,7 +45,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
provider_model_id=m.provider_model_id,
|
provider_model_id=m.provider_model_id,
|
||||||
provider_id="nvidia",
|
provider_id="nvidia",
|
||||||
)
|
)
|
||||||
for m in _MODEL_ALIASES
|
for m in _MODEL_ENTRIES
|
||||||
]
|
]
|
||||||
default_tool_groups = [
|
default_tool_groups = [
|
||||||
ToolGroupInput(
|
ToolGroupInput(
|
||||||
|
|
|
@ -119,7 +119,7 @@ llama stack run ./run-with-safety.yaml \
|
||||||
### (Optional) Update Model Serving Configuration
|
### (Optional) Update Model Serving Configuration
|
||||||
|
|
||||||
```{note}
|
```{note}
|
||||||
Please check the [model_aliases](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/ollama.py#L45) for the supported Ollama models.
|
Please check the [model_entries](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/ollama.py#L45) for the supported Ollama models.
|
||||||
```
|
```
|
||||||
|
|
||||||
To serve a new model with `ollama`
|
To serve a new model with `ollama`
|
||||||
|
|
|
@ -14,7 +14,7 @@ from llama_stack.distribution.datatypes import (
|
||||||
)
|
)
|
||||||
from llama_stack.models.llama.sku_list import all_registered_models
|
from llama_stack.models.llama.sku_list import all_registered_models
|
||||||
from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig
|
from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig
|
||||||
from llama_stack.providers.remote.inference.sambanova.models import MODEL_ALIASES
|
from llama_stack.providers.remote.inference.sambanova.models import MODEL_ENTRIES
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
|
||||||
|
@ -47,7 +47,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
provider_model_id=m.provider_model_id,
|
provider_model_id=m.provider_model_id,
|
||||||
provider_id=name,
|
provider_id=name,
|
||||||
)
|
)
|
||||||
for m in MODEL_ALIASES
|
for m in MODEL_ENTRIES
|
||||||
]
|
]
|
||||||
|
|
||||||
default_tool_groups = [
|
default_tool_groups = [
|
||||||
|
|
|
@ -19,7 +19,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||||
)
|
)
|
||||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
||||||
from llama_stack.providers.remote.inference.together import TogetherImplConfig
|
from llama_stack.providers.remote.inference.together import TogetherImplConfig
|
||||||
from llama_stack.providers.remote.inference.together.models import MODEL_ALIASES
|
from llama_stack.providers.remote.inference.together.models import MODEL_ENTRIES
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
|
||||||
|
@ -65,7 +65,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
provider_model_id=m.provider_model_id,
|
provider_model_id=m.provider_model_id,
|
||||||
provider_id="together",
|
provider_id="together",
|
||||||
)
|
)
|
||||||
for m in MODEL_ALIASES
|
for m in MODEL_ENTRIES
|
||||||
]
|
]
|
||||||
default_tool_groups = [
|
default_tool_groups = [
|
||||||
ToolGroupInput(
|
ToolGroupInput(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue