From 0f878ad87af9d1064f5da57ef7ed51f2541232a3 Mon Sep 17 00:00:00 2001
From: Yogish Baliga <yogish@together.ai>
Date: Thu, 8 May 2025 14:27:56 -0700
Subject: [PATCH] feat(provider): adding llama4 support in together inference
 provider (#2123)

# What does this PR do?
Adding Llama4 model support in TogetherAI provider
---
 .../self_hosted_distro/together.md            |  2 ++
 .../remote/inference/together/models.py       |  8 ++++++++
 .../templates/together/run-with-safety.yaml   | 20 +++++++++++++++++++
 llama_stack/templates/together/run.yaml       | 20 +++++++++++++++++++
 llama_stack/templates/verification/run.yaml   | 20 +++++++++++++++++++
 5 files changed, 70 insertions(+)

diff --git a/docs/source/distributions/self_hosted_distro/together.md b/docs/source/distributions/self_hosted_distro/together.md
index adfc2c472..18398bd16 100644
--- a/docs/source/distributions/self_hosted_distro/together.md
+++ b/docs/source/distributions/self_hosted_distro/together.md
@@ -46,6 +46,8 @@ The following models are available by default:
 - `meta-llama/Llama-3.3-70B-Instruct-Turbo (aliases: meta-llama/Llama-3.3-70B-Instruct)`
 - `meta-llama/Meta-Llama-Guard-3-8B (aliases: meta-llama/Llama-Guard-3-8B)`
 - `meta-llama/Llama-Guard-3-11B-Vision-Turbo (aliases: meta-llama/Llama-Guard-3-11B-Vision)`
+- `meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 (aliases: meta-llama/Llama-4-Maverick-17B-128E-Instruct)`
+- `meta-llama/Llama-4-Scout-17B-16E-Instruct (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct)`
 - `togethercomputer/m2-bert-80M-8k-retrieval `
 - `togethercomputer/m2-bert-80M-32k-retrieval `
 - `meta-llama/Llama-4-Scout-17B-16E-Instruct (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct, together/meta-llama/Llama-4-Scout-17B-16E-Instruct)`
diff --git a/llama_stack/providers/remote/inference/together/models.py b/llama_stack/providers/remote/inference/together/models.py
index f4b259767..9400c40c0 100644
--- a/llama_stack/providers/remote/inference/together/models.py
+++ b/llama_stack/providers/remote/inference/together/models.py
@@ -48,6 +48,14 @@ MODEL_ENTRIES = [
         "meta-llama/Llama-Guard-3-11B-Vision-Turbo",
         CoreModelId.llama_guard_3_11b_vision.value,
     ),
+    build_hf_repo_model_entry(
+        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+        CoreModelId.llama4_maverick_17b_128e_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        CoreModelId.llama4_scout_17b_16e_instruct.value,
+    ),
     ProviderModelEntry(
         provider_model_id="togethercomputer/m2-bert-80M-8k-retrieval",
         model_type=ModelType.embedding,
diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml
index fbe24064d..67114c24d 100644
--- a/llama_stack/templates/together/run-with-safety.yaml
+++ b/llama_stack/templates/together/run-with-safety.yaml
@@ -202,6 +202,26 @@ models:
   provider_id: together
   provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
   model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+  provider_id: together
+  provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
+  provider_id: together
+  provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  provider_id: together
+  provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  provider_id: together
+  provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  model_type: llm
 - metadata:
     embedding_dimension: 768
     context_length: 8192
diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml
index 1e93c58c1..f816c2fb1 100644
--- a/llama_stack/templates/together/run.yaml
+++ b/llama_stack/templates/together/run.yaml
@@ -197,6 +197,26 @@ models:
   provider_id: together
   provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
   model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+  provider_id: together
+  provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
+  provider_id: together
+  provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  provider_id: together
+  provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  provider_id: together
+  provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  model_type: llm
 - metadata:
     embedding_dimension: 768
     context_length: 8192
diff --git a/llama_stack/templates/verification/run.yaml b/llama_stack/templates/verification/run.yaml
index 73fbcfef5..7e476cc20 100644
--- a/llama_stack/templates/verification/run.yaml
+++ b/llama_stack/templates/verification/run.yaml
@@ -372,6 +372,26 @@ models:
   provider_id: together-openai-compat
   provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
   model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+  provider_id: together-openai-compat
+  provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
+  provider_id: together-openai-compat
+  provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  provider_id: together-openai-compat
+  provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  provider_id: together-openai-compat
+  provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  model_type: llm
 - metadata:
     embedding_dimension: 768
     context_length: 8192