feat: add static embedding metadata to dynamic model listings for providers using OpenAIMixin

- remove auto-download of ollama embedding models - add embedding model metadata to dynamic listing w/ unit test - add support and tests for allowed_models - removed inference provider models.py files where dynamic listing is enabled - store embedding metadata in embedding_model_metadata field on inference providers - make model_entries optional on ModelRegistryHelper and LiteLLMOpenAIMixin - make OpenAIMixin a ModelRegistryHelper - skip base64 embedding test for remote::ollama, always returns floats - only use OpenAI client for ollama model listing - remove unused build_model_entry function - remove unused get_huggingface_repo function
2025-12-20 04:48:43 +00:00 · 2025-09-25 04:56:54 -04:00 · 2025-09-25 04:56:54 -04:00 · 466ef6f490
commit 466ef6f490
parent a50b63906c
43 changed files with 370 additions and 1016 deletions
--- a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
+++ b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
@ -8,8 +8,6 @@ from llama_stack.providers.remote.inference.llama_openai_compat.config import Ll
 from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

-from .models import MODEL_ENTRIES
-
 logger = get_logger(name=__name__, category="inference::llama_openai_compat")


@ -30,7 +28,6 @@ class LlamaCompatInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
    def __init__(self, config: LlamaCompatConfig):
        LiteLLMOpenAIMixin.__init__(
            self,
-            model_entries=MODEL_ENTRIES,
            litellm_provider_name="meta_llama",
            api_key_from_config=config.api_key,
            provider_data_api_key_field="llama_api_key",
--- a/llama_stack/providers/remote/inference/llama_openai_compat/models.py
+++ b/llama_stack/providers/remote/inference/llama_openai_compat/models.py
@ -1,25 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from llama_stack.models.llama.sku_types import CoreModelId
-from llama_stack.providers.utils.inference.model_registry import (
-    build_hf_repo_model_entry,
-)
-
-MODEL_ENTRIES = [
-    build_hf_repo_model_entry(
-        "Llama-3.3-70B-Instruct",
-        CoreModelId.llama3_3_70b_instruct.value,
-    ),
-    build_hf_repo_model_entry(
-        "Llama-4-Scout-17B-16E-Instruct-FP8",
-        CoreModelId.llama4_scout_17b_16e_instruct.value,
-    ),
-    build_hf_repo_model_entry(
-        "Llama-4-Maverick-17B-128E-Instruct-FP8",
-        CoreModelId.llama4_maverick_17b_128e_instruct.value,
-    ),
-]