feat: add embedding and dynamic model support to Together inference adapter

- updated to use OpenAIMixin - workarounds for Together api quirks - recordings for together suite when subdirs=inference,pattern=openai test with: `TOGETHER_API_KEY=_NONE_ ./scripts/integration-tests.sh --stack-config server:ci-tests --setup together --subdirs inference --pattern openai`
2025-10-05 12:21:52 +00:00 · 2025-09-16 12:13:14 -04:00 · 2025-09-16 12:13:14 -04:00 · 3e02dc5c2f
commit 3e02dc5c2f
parent f4ab154ade
20 changed files with 9227 additions and 180 deletions
--- a/llama_stack/providers/remote/inference/together/models.py
+++ b/llama_stack/providers/remote/inference/together/models.py
@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from llama_stack.apis.models import ModelType
 from llama_stack.models.llama.sku_types import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
    ProviderModelEntry,
@ -21,57 +20,84 @@ SAFETY_MODELS_ENTRIES = [
        CoreModelId.llama_guard_3_11b_vision.value,
    ),
 ]
-MODEL_ENTRIES = [
-    build_hf_repo_model_entry(
-        "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-        CoreModelId.llama3_1_8b_instruct.value,
-    ),
-    build_hf_repo_model_entry(
-        "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
-        CoreModelId.llama3_1_70b_instruct.value,
-    ),
-    build_hf_repo_model_entry(
-        "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
-        CoreModelId.llama3_1_405b_instruct.value,
-    ),
-    build_hf_repo_model_entry(
-        "meta-llama/Llama-3.2-3B-Instruct-Turbo",
-        CoreModelId.llama3_2_3b_instruct.value,
-    ),
-    build_hf_repo_model_entry(
-        "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
-        CoreModelId.llama3_2_11b_vision_instruct.value,
-    ),
-    build_hf_repo_model_entry(
-        "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
-        CoreModelId.llama3_2_90b_vision_instruct.value,
-    ),
-    build_hf_repo_model_entry(
-        "meta-llama/Llama-3.3-70B-Instruct-Turbo",
-        CoreModelId.llama3_3_70b_instruct.value,
-    ),
-    ProviderModelEntry(
-        provider_model_id="togethercomputer/m2-bert-80M-8k-retrieval",
-        model_type=ModelType.embedding,
-        metadata={
-            "embedding_dimension": 768,
-            "context_length": 8192,
-        },
-    ),
-    ProviderModelEntry(
+
+# source: https://docs.together.ai/docs/serverless-models#embedding-models
+EMBEDDING_MODEL_ENTRIES = {
+    "togethercomputer/m2-bert-80M-32k-retrieval": ProviderModelEntry(
        provider_model_id="togethercomputer/m2-bert-80M-32k-retrieval",
-        model_type=ModelType.embedding,
        metadata={
            "embedding_dimension": 768,
            "context_length": 32768,
        },
    ),
-    build_hf_repo_model_entry(
-        "meta-llama/Llama-4-Scout-17B-16E-Instruct",
-        CoreModelId.llama4_scout_17b_16e_instruct.value,
+    "BAAI/bge-large-en-v1.5": ProviderModelEntry(
+        provider_model_id="BAAI/bge-large-en-v1.5",
+        metadata={
+            "embedding_dimension": 1024,
+            "context_length": 512,
+        },
    ),
-    build_hf_repo_model_entry(
-        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
-        CoreModelId.llama4_maverick_17b_128e_instruct.value,
+    "BAAI/bge-base-en-v1.5": ProviderModelEntry(
+        provider_model_id="BAAI/bge-base-en-v1.5",
+        metadata={
+            "embedding_dimension": 768,
+            "context_length": 512,
+        },
    ),
-] + SAFETY_MODELS_ENTRIES
+    "Alibaba-NLP/gte-modernbert-base": ProviderModelEntry(
+        provider_model_id="Alibaba-NLP/gte-modernbert-base",
+        metadata={
+            "embedding_dimension": 768,
+            "context_length": 8192,
+        },
+    ),
+    "intfloat/multilingual-e5-large-instruct": ProviderModelEntry(
+        provider_model_id="intfloat/multilingual-e5-large-instruct",
+        metadata={
+            "embedding_dimension": 1024,
+            "context_length": 512,
+        },
+    ),
+}
+MODEL_ENTRIES = (
+    [
+        build_hf_repo_model_entry(
+            "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+            CoreModelId.llama3_1_8b_instruct.value,
+        ),
+        build_hf_repo_model_entry(
+            "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
+            CoreModelId.llama3_1_70b_instruct.value,
+        ),
+        build_hf_repo_model_entry(
+            "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
+            CoreModelId.llama3_1_405b_instruct.value,
+        ),
+        build_hf_repo_model_entry(
+            "meta-llama/Llama-3.2-3B-Instruct-Turbo",
+            CoreModelId.llama3_2_3b_instruct.value,
+        ),
+        build_hf_repo_model_entry(
+            "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
+            CoreModelId.llama3_2_11b_vision_instruct.value,
+        ),
+        build_hf_repo_model_entry(
+            "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
+            CoreModelId.llama3_2_90b_vision_instruct.value,
+        ),
+        build_hf_repo_model_entry(
+            "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+            CoreModelId.llama3_3_70b_instruct.value,
+        ),
+        build_hf_repo_model_entry(
+            "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+            CoreModelId.llama4_scout_17b_16e_instruct.value,
+        ),
+        build_hf_repo_model_entry(
+            "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+            CoreModelId.llama4_maverick_17b_128e_instruct.value,
+        ),
+    ]
+    + SAFETY_MODELS_ENTRIES
+    + list(EMBEDDING_MODEL_ENTRIES.values())
+)