mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-30 23:51:00 +00:00
fixes for all providers
This commit is contained in:
parent
d5874735ea
commit
948f6ece6e
8 changed files with 133 additions and 135 deletions
|
@ -11,9 +11,11 @@ from typing import AsyncGenerator, List
|
|||
from llama_models.sku_list import resolve_model
|
||||
|
||||
from llama_models.llama3.api.datatypes import * # noqa: F403
|
||||
from llama_stack.apis.inference import * # noqa: F403
|
||||
from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import build_model_alias
|
||||
from llama_stack.apis.inference import * # noqa: F403
|
||||
from llama_stack.providers.datatypes import ModelsProtocolPrivate
|
||||
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||
convert_image_media_to_url,
|
||||
request_has_media,
|
||||
|
@ -28,10 +30,19 @@ from .model_parallel import LlamaModelParallelGenerator
|
|||
SEMAPHORE = asyncio.Semaphore(1)
|
||||
|
||||
|
||||
class MetaReferenceInferenceImpl(Inference, ModelsProtocolPrivate):
|
||||
class MetaReferenceInferenceImpl(Inference, ModelRegistryHelper, ModelsProtocolPrivate):
|
||||
def __init__(self, config: MetaReferenceInferenceConfig) -> None:
|
||||
self.config = config
|
||||
model = resolve_model(config.model)
|
||||
ModelRegistryHelper.__init__(
|
||||
self,
|
||||
[
|
||||
build_model_alias(
|
||||
model.descriptor(),
|
||||
model.core_model_id,
|
||||
)
|
||||
],
|
||||
)
|
||||
if model is None:
|
||||
raise RuntimeError(f"Unknown model: {config.model}, Run `llama model list`")
|
||||
self.model = model
|
||||
|
|
|
@ -13,7 +13,7 @@ from llama_models.llama3.api.chat_format import ChatFormat
|
|||
from llama_models.llama3.api.tokenizer import Tokenizer
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
ModelAlias,
|
||||
build_model_alias,
|
||||
ModelRegistryHelper,
|
||||
)
|
||||
|
||||
|
@ -24,20 +24,17 @@ from llama_stack.providers.utils.bedrock.client import create_bedrock_client
|
|||
|
||||
|
||||
model_aliases = [
|
||||
ModelAlias(
|
||||
provider_model_id="meta.llama3-1-8b-instruct-v1:0",
|
||||
aliases=["Llama3.1-8B-Instruct"],
|
||||
llama_model=CoreModelId.llama3_1_8b_instruct,
|
||||
build_model_alias(
|
||||
"meta.llama3-1-8b-instruct-v1:0",
|
||||
CoreModelId.llama3_1_8b_instruct,
|
||||
),
|
||||
ModelAlias(
|
||||
provider_model_id="meta.llama3-1-70b-instruct-v1:0",
|
||||
aliases=["Llama3.1-70B-Instruct"],
|
||||
llama_model=CoreModelId.llama3_1_70b_instruct,
|
||||
build_model_alias(
|
||||
"meta.llama3-1-70b-instruct-v1:0",
|
||||
CoreModelId.llama3_1_70b_instruct,
|
||||
),
|
||||
ModelAlias(
|
||||
provider_model_id="meta.llama3-1-405b-instruct-v1:0",
|
||||
aliases=["Llama3.1-405B-Instruct"],
|
||||
llama_model=CoreModelId.llama3_1_405b_instruct,
|
||||
build_model_alias(
|
||||
"meta.llama3-1-405b-instruct-v1:0",
|
||||
CoreModelId.llama3_1_405b_instruct,
|
||||
),
|
||||
]
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@ from openai import OpenAI
|
|||
from llama_stack.apis.inference import * # noqa: F403
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
ModelAlias,
|
||||
build_model_alias,
|
||||
ModelRegistryHelper,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_compat import (
|
||||
|
@ -34,15 +34,13 @@ from .config import DatabricksImplConfig
|
|||
|
||||
|
||||
model_aliases = [
|
||||
ModelAlias(
|
||||
provider_model_id="databricks-meta-llama-3-1-70b-instruct",
|
||||
aliases=["Llama3.1-70B-Instruct"],
|
||||
llama_model=CoreModelId.llama3_1_70b_instruct.value,
|
||||
build_model_alias(
|
||||
"databricks-meta-llama-3-1-70b-instruct",
|
||||
CoreModelId.llama3_1_70b_instruct,
|
||||
),
|
||||
ModelAlias(
|
||||
provider_model_id="databricks-meta-llama-3-1-405b-instruct",
|
||||
aliases=["Llama3.1-405B-Instruct"],
|
||||
llama_model=CoreModelId.llama3_1_405b_instruct.value,
|
||||
build_model_alias(
|
||||
"databricks-meta-llama-3-1-405b-instruct",
|
||||
CoreModelId.llama3_1_405b_instruct,
|
||||
),
|
||||
]
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ from llama_models.llama3.api.tokenizer import Tokenizer
|
|||
from llama_stack.apis.inference import * # noqa: F403
|
||||
from llama_stack.distribution.request_headers import NeedsRequestProviderData
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
ModelAlias,
|
||||
build_model_alias,
|
||||
ModelRegistryHelper,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_compat import (
|
||||
|
@ -36,50 +36,41 @@ from .config import FireworksImplConfig
|
|||
|
||||
|
||||
model_aliases = [
|
||||
ModelAlias(
|
||||
provider_model_id="fireworks/llama-v3p1-8b-instruct",
|
||||
aliases=["Llama3.1-8B-Instruct"],
|
||||
llama_model=CoreModelId.llama3_1_8b_instruct.value,
|
||||
build_model_alias(
|
||||
"fireworks/llama-v3p1-8b-instruct",
|
||||
CoreModelId.llama3_1_8b_instruct,
|
||||
),
|
||||
ModelAlias(
|
||||
provider_model_id="fireworks/llama-v3p1-70b-instruct",
|
||||
aliases=["Llama3.1-70B-Instruct"],
|
||||
llama_model=CoreModelId.llama3_1_70b_instruct.value,
|
||||
build_model_alias(
|
||||
"fireworks/llama-v3p1-70b-instruct",
|
||||
CoreModelId.llama3_1_70b_instruct,
|
||||
),
|
||||
ModelAlias(
|
||||
provider_model_id="fireworks/llama-v3p1-405b-instruct",
|
||||
aliases=["Llama3.1-405B-Instruct"],
|
||||
llama_model=CoreModelId.llama3_1_405b_instruct.value,
|
||||
build_model_alias(
|
||||
"fireworks/llama-v3p1-405b-instruct",
|
||||
CoreModelId.llama3_1_405b_instruct,
|
||||
),
|
||||
ModelAlias(
|
||||
provider_model_id="fireworks/llama-v3p2-1b-instruct",
|
||||
aliases=["Llama3.2-1B-Instruct"],
|
||||
llama_model=CoreModelId.llama3_2_3b_instruct.value,
|
||||
build_model_alias(
|
||||
"fireworks/llama-v3p2-1b-instruct",
|
||||
CoreModelId.llama3_2_3b_instruct,
|
||||
),
|
||||
ModelAlias(
|
||||
provider_model_id="fireworks/llama-v3p2-3b-instruct",
|
||||
aliases=["Llama3.2-3B-Instruct"],
|
||||
llama_model=CoreModelId.llama3_2_11b_vision_instruct.value,
|
||||
build_model_alias(
|
||||
"fireworks/llama-v3p2-3b-instruct",
|
||||
CoreModelId.llama3_2_11b_vision_instruct,
|
||||
),
|
||||
ModelAlias(
|
||||
provider_model_id="fireworks/llama-v3p2-11b-vision-instruct",
|
||||
aliases=["Llama3.2-11B-Vision-Instruct"],
|
||||
llama_model=CoreModelId.llama3_2_11b_vision_instruct.value,
|
||||
build_model_alias(
|
||||
"fireworks/llama-v3p2-11b-vision-instruct",
|
||||
CoreModelId.llama3_2_11b_vision_instruct,
|
||||
),
|
||||
ModelAlias(
|
||||
provider_model_id="fireworks/llama-v3p2-90b-vision-instruct",
|
||||
aliases=["Llama3.2-90B-Vision-Instruct"],
|
||||
llama_model=CoreModelId.llama3_2_90b_vision_instruct.value,
|
||||
build_model_alias(
|
||||
"fireworks/llama-v3p2-90b-vision-instruct",
|
||||
CoreModelId.llama3_2_90b_vision_instruct,
|
||||
),
|
||||
ModelAlias(
|
||||
provider_model_id="fireworks/llama-guard-3-8b",
|
||||
aliases=["Llama-Guard-3-8B"],
|
||||
llama_model=CoreModelId.llama_guard_3_8b.value,
|
||||
build_model_alias(
|
||||
"fireworks/llama-guard-3-8b",
|
||||
CoreModelId.llama_guard_3_8b,
|
||||
),
|
||||
ModelAlias(
|
||||
provider_model_id="fireworks/llama-guard-3-11b-vision",
|
||||
aliases=["Llama-Guard-3-11B-Vision"],
|
||||
llama_model=CoreModelId.llama_guard_3_11b_vision.value,
|
||||
build_model_alias(
|
||||
"fireworks/llama-guard-3-11b-vision",
|
||||
CoreModelId.llama_guard_3_11b_vision,
|
||||
),
|
||||
]
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ from llama_models.llama3.api.tokenizer import Tokenizer
|
|||
from ollama import AsyncClient
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
ModelAlias,
|
||||
build_model_alias,
|
||||
ModelRegistryHelper,
|
||||
)
|
||||
|
||||
|
@ -40,40 +40,33 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
|||
|
||||
|
||||
model_aliases = [
|
||||
ModelAlias(
|
||||
provider_model_id="llama3.1:8b-instruct-fp16",
|
||||
aliases=["Llama3.1-8B-Instruct"],
|
||||
llama_model=CoreModelId.llama3_1_8b_instruct.value,
|
||||
build_model_alias(
|
||||
"llama3.1:8b-instruct-fp16",
|
||||
CoreModelId.llama3_1_8b_instruct,
|
||||
),
|
||||
ModelAlias(
|
||||
provider_model_id="llama3.1:70b-instruct-fp16",
|
||||
aliases=["Llama3.1-70B-Instruct"],
|
||||
llama_model=CoreModelId.llama3_1_70b_instruct.value,
|
||||
build_model_alias(
|
||||
"llama3.1:70b-instruct-fp16",
|
||||
CoreModelId.llama3_1_70b_instruct,
|
||||
),
|
||||
ModelAlias(
|
||||
provider_model_id="llama3.2:1b-instruct-fp16",
|
||||
aliases=["Llama3.2-1B-Instruct"],
|
||||
llama_model=CoreModelId.llama3_2_1b_instruct.value,
|
||||
build_model_alias(
|
||||
"llama3.2:1b-instruct-fp16",
|
||||
CoreModelId.llama3_2_1b_instruct,
|
||||
),
|
||||
ModelAlias(
|
||||
provider_model_id="llama3.2:3b-instruct-fp16",
|
||||
aliases=["Llama3.2-3B-Instruct"],
|
||||
llama_model=CoreModelId.llama3_2_3b_instruct.value,
|
||||
build_model_alias(
|
||||
"llama3.2:3b-instruct-fp16",
|
||||
CoreModelId.llama3_2_3b_instruct,
|
||||
),
|
||||
ModelAlias(
|
||||
provider_model_id="llama-guard3:8b",
|
||||
aliases=["Llama-Guard-3-8B"],
|
||||
llama_model=CoreModelId.llama_guard_3_8b.value,
|
||||
build_model_alias(
|
||||
"llama-guard3:8b",
|
||||
CoreModelId.llama_guard_3_8b,
|
||||
),
|
||||
ModelAlias(
|
||||
provider_model_id="llama-guard3:1b",
|
||||
aliases=["Llama-Guard-3-1B"],
|
||||
llama_model=CoreModelId.llama_guard_3_1b.value,
|
||||
build_model_alias(
|
||||
"llama-guard3:1b",
|
||||
CoreModelId.llama_guard_3_1b,
|
||||
),
|
||||
ModelAlias(
|
||||
provider_model_id="x/llama3.2-vision:11b-instruct-fp16",
|
||||
aliases=["Llama3.2-11B-Vision-Instruct"],
|
||||
llama_model=CoreModelId.llama3_2_11b_vision_instruct.value,
|
||||
build_model_alias(
|
||||
"x/llama3.2-vision:11b-instruct-fp16",
|
||||
CoreModelId.llama3_2_11b_vision_instruct,
|
||||
),
|
||||
]
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@ from together import Together
|
|||
from llama_stack.apis.inference import * # noqa: F403
|
||||
from llama_stack.distribution.request_headers import NeedsRequestProviderData
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
ModelAlias,
|
||||
build_model_alias,
|
||||
ModelRegistryHelper,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_compat import (
|
||||
|
@ -39,45 +39,37 @@ from .config import TogetherImplConfig
|
|||
|
||||
|
||||
model_aliases = [
|
||||
ModelAlias(
|
||||
provider_model_id="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
|
||||
aliases=["Llama3.1-8B-Instruct"],
|
||||
llama_model=CoreModelId.llama3_1_8b_instruct.value,
|
||||
build_model_alias(
|
||||
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
|
||||
CoreModelId.llama3_1_8b_instruct,
|
||||
),
|
||||
ModelAlias(
|
||||
provider_model_id="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
|
||||
aliases=["Llama3.1-70B-Instruct"],
|
||||
llama_model=CoreModelId.llama3_1_70b_instruct.value,
|
||||
build_model_alias(
|
||||
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
|
||||
CoreModelId.llama3_1_70b_instruct,
|
||||
),
|
||||
ModelAlias(
|
||||
provider_model_id="meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
|
||||
aliases=["Llama3.1-405B-Instruct"],
|
||||
llama_model=CoreModelId.llama3_1_405b_instruct.value,
|
||||
build_model_alias(
|
||||
"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
|
||||
CoreModelId.llama3_1_405b_instruct,
|
||||
),
|
||||
ModelAlias(
|
||||
provider_model_id="meta-llama/Llama-3.2-3B-Instruct-Turbo",
|
||||
aliases=["Llama3.2-3B-Instruct"],
|
||||
llama_model=CoreModelId.llama3_2_3b_instruct.value,
|
||||
build_model_alias(
|
||||
"meta-llama/Llama-3.2-3B-Instruct-Turbo",
|
||||
CoreModelId.llama3_2_3b_instruct,
|
||||
),
|
||||
ModelAlias(
|
||||
provider_model_id="meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
|
||||
aliases=["Llama3.2-11B-Vision-Instruct"],
|
||||
llama_model=CoreModelId.llama3_2_11b_vision_instruct.value,
|
||||
build_model_alias(
|
||||
"meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
|
||||
CoreModelId.llama3_2_11b_vision_instruct,
|
||||
),
|
||||
ModelAlias(
|
||||
provider_model_id="meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
|
||||
aliases=["Llama3.2-90B-Vision-Instruct"],
|
||||
llama_model=CoreModelId.llama3_2_90b_vision_instruct.value,
|
||||
build_model_alias(
|
||||
"meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
|
||||
CoreModelId.llama3_2_90b_vision_instruct,
|
||||
),
|
||||
ModelAlias(
|
||||
provider_model_id="meta-llama/Meta-Llama-Guard-3-8B",
|
||||
aliases=["Llama-Guard-3-8B"],
|
||||
llama_model=CoreModelId.llama_guard_3_8b.value,
|
||||
build_model_alias(
|
||||
"meta-llama/Meta-Llama-Guard-3-8B",
|
||||
CoreModelId.llama_guard_3_8b,
|
||||
),
|
||||
ModelAlias(
|
||||
provider_model_id="meta-llama/Llama-Guard-3-11B-Vision-Turbo",
|
||||
aliases=["Llama-Guard-3-11B-Vision"],
|
||||
llama_model=CoreModelId.llama_guard_3_11b_vision.value,
|
||||
build_model_alias(
|
||||
"meta-llama/Llama-Guard-3-11B-Vision-Turbo",
|
||||
CoreModelId.llama_guard_3_11b_vision,
|
||||
),
|
||||
]
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@ from llama_stack.apis.inference import * # noqa: F403
|
|||
from llama_stack.providers.datatypes import ModelsProtocolPrivate
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
ModelAlias,
|
||||
build_model_alias,
|
||||
ModelRegistryHelper,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_compat import (
|
||||
|
@ -36,10 +36,9 @@ from .config import VLLMInferenceAdapterConfig
|
|||
|
||||
def build_model_aliases():
|
||||
return [
|
||||
ModelAlias(
|
||||
provider_model_id=model.huggingface_repo,
|
||||
aliases=[model.descriptor()],
|
||||
llama_model=model.descriptor(),
|
||||
build_model_alias(
|
||||
model.huggingface_repo,
|
||||
model.core_model_id,
|
||||
)
|
||||
for model in all_registered_models()
|
||||
if model.huggingface_repo
|
||||
|
@ -55,11 +54,6 @@ class VLLMInferenceAdapter(Inference, ModelRegistryHelper, ModelsProtocolPrivate
|
|||
self.config = config
|
||||
self.formatter = ChatFormat(Tokenizer.get_instance())
|
||||
self.client = None
|
||||
self.huggingface_repo_to_llama_model_id = {
|
||||
model.huggingface_repo: model.descriptor()
|
||||
for model in all_registered_models()
|
||||
if model.huggingface_repo
|
||||
}
|
||||
|
||||
async def initialize(self) -> None:
|
||||
self.client = OpenAI(base_url=self.config.url, api_key=self.config.api_token)
|
||||
|
|
|
@ -5,13 +5,35 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
from collections import namedtuple
|
||||
from typing import List
|
||||
from typing import List, Optional
|
||||
|
||||
from llama_models.datatypes import CoreModelId
|
||||
from llama_models.sku_list import all_registered_models
|
||||
|
||||
from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
|
||||
|
||||
ModelAlias = namedtuple("ModelAlias", ["provider_model_id", "aliases", "llama_model"])
|
||||
|
||||
|
||||
def get_huggingface_repo(core_model_id: CoreModelId) -> Optional[str]:
|
||||
"""Get the Hugging Face repository for a given CoreModelId."""
|
||||
for model in all_registered_models():
|
||||
if model.core_model_id == core_model_id:
|
||||
return model.huggingface_repo
|
||||
return None
|
||||
|
||||
|
||||
def build_model_alias(provider_model_id: str, core_model_id: CoreModelId) -> ModelAlias:
|
||||
return ModelAlias(
|
||||
provider_model_id=provider_model_id,
|
||||
aliases=[
|
||||
core_model_id.value,
|
||||
get_huggingface_repo(core_model_id),
|
||||
],
|
||||
llama_model=core_model_id.value,
|
||||
)
|
||||
|
||||
|
||||
class ModelLookup:
|
||||
def __init__(
|
||||
self,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue