mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 04:04:14 +00:00
feat: add static embedding metadata to dynamic model listings for providers using OpenAIMixin (#3547)
# What does this PR do? - remove auto-download of ollama embedding models - add embedding model metadata to dynamic listing w/ unit test - add support and tests for allowed_models - removed inference provider models.py files where dynamic listing is enabled - store embedding metadata in embedding_model_metadata field on inference providers - make model_entries optional on ModelRegistryHelper and LiteLLMOpenAIMixin - make OpenAIMixin a ModelRegistryHelper - skip base64 embedding test for remote::ollama, always returns floats - only use OpenAI client for ollama model listing - remove unused build_model_entry function - remove unused get_huggingface_repo function ## Test Plan ci w/ new tests
This commit is contained in:
parent
a50b63906c
commit
b67aef2fc4
43 changed files with 368 additions and 1015 deletions
|
@ -7,12 +7,11 @@
|
|||
from pathlib import Path
|
||||
|
||||
from llama_stack.core.datatypes import BuildProvider, ModelInput, Provider, ShieldInput, ToolGroupInput
|
||||
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
||||
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
|
||||
from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
|
||||
from llama_stack.providers.remote.datasetio.nvidia import NvidiaDatasetIOConfig
|
||||
from llama_stack.providers.remote.eval.nvidia import NVIDIAEvalConfig
|
||||
from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
|
||||
from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES
|
||||
from llama_stack.providers.remote.safety.nvidia import NVIDIASafetyConfig
|
||||
|
||||
|
||||
|
@ -68,9 +67,6 @@ def get_distribution_template(name: str = "nvidia") -> DistributionTemplate:
|
|||
provider_id="nvidia",
|
||||
)
|
||||
|
||||
available_models = {
|
||||
"nvidia": MODEL_ENTRIES,
|
||||
}
|
||||
default_tool_groups = [
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::rag",
|
||||
|
@ -78,7 +74,6 @@ def get_distribution_template(name: str = "nvidia") -> DistributionTemplate:
|
|||
),
|
||||
]
|
||||
|
||||
default_models, _ = get_model_registry(available_models)
|
||||
return DistributionTemplate(
|
||||
name=name,
|
||||
distro_type="self_hosted",
|
||||
|
@ -86,7 +81,6 @@ def get_distribution_template(name: str = "nvidia") -> DistributionTemplate:
|
|||
container_image=None,
|
||||
template_path=Path(__file__).parent / "doc_template.md",
|
||||
providers=providers,
|
||||
available_models_by_provider=available_models,
|
||||
run_configs={
|
||||
"run.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
|
@ -95,7 +89,6 @@ def get_distribution_template(name: str = "nvidia") -> DistributionTemplate:
|
|||
"eval": [eval_provider],
|
||||
"files": [files_provider],
|
||||
},
|
||||
default_models=default_models,
|
||||
default_tool_groups=default_tool_groups,
|
||||
),
|
||||
"run-with-safety.yaml": RunConfigSettings(
|
||||
|
|
|
@ -92,90 +92,7 @@ metadata_store:
|
|||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: meta/llama3-8b-instruct
|
||||
provider_id: nvidia
|
||||
provider_model_id: meta/llama3-8b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta/llama3-70b-instruct
|
||||
provider_id: nvidia
|
||||
provider_model_id: meta/llama3-70b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta/llama-3.1-8b-instruct
|
||||
provider_id: nvidia
|
||||
provider_model_id: meta/llama-3.1-8b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta/llama-3.1-70b-instruct
|
||||
provider_id: nvidia
|
||||
provider_model_id: meta/llama-3.1-70b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta/llama-3.1-405b-instruct
|
||||
provider_id: nvidia
|
||||
provider_model_id: meta/llama-3.1-405b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta/llama-3.2-1b-instruct
|
||||
provider_id: nvidia
|
||||
provider_model_id: meta/llama-3.2-1b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta/llama-3.2-3b-instruct
|
||||
provider_id: nvidia
|
||||
provider_model_id: meta/llama-3.2-3b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta/llama-3.2-11b-vision-instruct
|
||||
provider_id: nvidia
|
||||
provider_model_id: meta/llama-3.2-11b-vision-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta/llama-3.2-90b-vision-instruct
|
||||
provider_id: nvidia
|
||||
provider_model_id: meta/llama-3.2-90b-vision-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta/llama-3.3-70b-instruct
|
||||
provider_id: nvidia
|
||||
provider_model_id: meta/llama-3.3-70b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: nvidia/vila
|
||||
provider_id: nvidia
|
||||
provider_model_id: nvidia/vila
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 2048
|
||||
context_length: 8192
|
||||
model_id: nvidia/llama-3.2-nv-embedqa-1b-v2
|
||||
provider_id: nvidia
|
||||
provider_model_id: nvidia/llama-3.2-nv-embedqa-1b-v2
|
||||
model_type: embedding
|
||||
- metadata:
|
||||
embedding_dimension: 1024
|
||||
context_length: 512
|
||||
model_id: nvidia/nv-embedqa-e5-v5
|
||||
provider_id: nvidia
|
||||
provider_model_id: nvidia/nv-embedqa-e5-v5
|
||||
model_type: embedding
|
||||
- metadata:
|
||||
embedding_dimension: 4096
|
||||
context_length: 512
|
||||
model_id: nvidia/nv-embedqa-mistral-7b-v2
|
||||
provider_id: nvidia
|
||||
provider_model_id: nvidia/nv-embedqa-mistral-7b-v2
|
||||
model_type: embedding
|
||||
- metadata:
|
||||
embedding_dimension: 1024
|
||||
context_length: 512
|
||||
model_id: snowflake/arctic-embed-l
|
||||
provider_id: nvidia
|
||||
provider_model_id: snowflake/arctic-embed-l
|
||||
model_type: embedding
|
||||
models: []
|
||||
shields: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue