feat: add static embedding metadata to dynamic model listings for providers using OpenAIMixin (#3547)

# What does this PR do?

- remove auto-download of ollama embedding models
- add embedding model metadata to dynamic listing w/ unit test
- add support and tests for allowed_models
- removed inference provider models.py files where dynamic listing is
enabled
- store embedding metadata in embedding_model_metadata field on
inference providers
- make model_entries optional on ModelRegistryHelper and
LiteLLMOpenAIMixin
- make OpenAIMixin a ModelRegistryHelper
- skip base64 embedding test for remote::ollama, always returns floats
- only use OpenAI client for ollama model listing
- remove unused build_model_entry function
- remove unused get_huggingface_repo function


## Test Plan

ci w/ new tests
This commit is contained in:
Matthew Farrellee 2025-09-25 17:17:00 -04:00 committed by GitHub
parent a50b63906c
commit b67aef2fc4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
43 changed files with 368 additions and 1015 deletions

View file

@ -56,15 +56,22 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
)
from .config import TogetherImplConfig
from .models import EMBEDDING_MODEL_ENTRIES, MODEL_ENTRIES
logger = get_logger(name=__name__, category="inference::together")
class TogetherInferenceAdapter(OpenAIMixin, ModelRegistryHelper, Inference, NeedsRequestProviderData):
embedding_model_metadata = {
"togethercomputer/m2-bert-80M-32k-retrieval": {"embedding_dimension": 768, "context_length": 32768},
"BAAI/bge-large-en-v1.5": {"embedding_dimension": 1024, "context_length": 512},
"BAAI/bge-base-en-v1.5": {"embedding_dimension": 768, "context_length": 512},
"Alibaba-NLP/gte-modernbert-base": {"embedding_dimension": 768, "context_length": 8192},
"intfloat/multilingual-e5-large-instruct": {"embedding_dimension": 1024, "context_length": 512},
}
def __init__(self, config: TogetherImplConfig) -> None:
ModelRegistryHelper.__init__(self, MODEL_ENTRIES, config.allowed_models)
self.config = config
self.allowed_models = config.allowed_models
self._model_cache: dict[str, Model] = {}
def get_api_key(self):
@ -264,15 +271,16 @@ class TogetherInferenceAdapter(OpenAIMixin, ModelRegistryHelper, Inference, Need
# Together's /v1/models is not compatible with OpenAI's /v1/models. Together support ticket #13355 -> will not fix, use Together's own client
for m in await self._get_client().models.list():
if m.type == "embedding":
if m.id not in EMBEDDING_MODEL_ENTRIES:
if m.id not in self.embedding_model_metadata:
logger.warning(f"Unknown embedding dimension for model {m.id}, skipping.")
continue
metadata = self.embedding_model_metadata[m.id]
self._model_cache[m.id] = Model(
provider_id=self.__provider_id__,
provider_resource_id=EMBEDDING_MODEL_ENTRIES[m.id].provider_model_id,
provider_resource_id=m.id,
identifier=m.id,
model_type=ModelType.embedding,
metadata=EMBEDDING_MODEL_ENTRIES[m.id].metadata,
metadata=metadata,
)
else:
self._model_cache[m.id] = Model(