From 0f878ad87af9d1064f5da57ef7ed51f2541232a3 Mon Sep 17 00:00:00 2001 From: Yogish Baliga Date: Thu, 8 May 2025 14:27:56 -0700 Subject: [PATCH] feat(provider): adding llama4 support in together inference provider (#2123) # What does this PR do? Adding Llama4 model support in TogetherAI provider --- .../self_hosted_distro/together.md | 2 ++ .../remote/inference/together/models.py | 8 ++++++++ .../templates/together/run-with-safety.yaml | 20 +++++++++++++++++++ llama_stack/templates/together/run.yaml | 20 +++++++++++++++++++ llama_stack/templates/verification/run.yaml | 20 +++++++++++++++++++ 5 files changed, 70 insertions(+) diff --git a/docs/source/distributions/self_hosted_distro/together.md b/docs/source/distributions/self_hosted_distro/together.md index adfc2c472..18398bd16 100644 --- a/docs/source/distributions/self_hosted_distro/together.md +++ b/docs/source/distributions/self_hosted_distro/together.md @@ -46,6 +46,8 @@ The following models are available by default: - `meta-llama/Llama-3.3-70B-Instruct-Turbo (aliases: meta-llama/Llama-3.3-70B-Instruct)` - `meta-llama/Meta-Llama-Guard-3-8B (aliases: meta-llama/Llama-Guard-3-8B)` - `meta-llama/Llama-Guard-3-11B-Vision-Turbo (aliases: meta-llama/Llama-Guard-3-11B-Vision)` +- `meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 (aliases: meta-llama/Llama-4-Maverick-17B-128E-Instruct)` +- `meta-llama/Llama-4-Scout-17B-16E-Instruct (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct)` - `togethercomputer/m2-bert-80M-8k-retrieval ` - `togethercomputer/m2-bert-80M-32k-retrieval ` - `meta-llama/Llama-4-Scout-17B-16E-Instruct (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct, together/meta-llama/Llama-4-Scout-17B-16E-Instruct)` diff --git a/llama_stack/providers/remote/inference/together/models.py b/llama_stack/providers/remote/inference/together/models.py index f4b259767..9400c40c0 100644 --- a/llama_stack/providers/remote/inference/together/models.py +++ b/llama_stack/providers/remote/inference/together/models.py @@ -48,6 +48,14 @@ MODEL_ENTRIES = [ "meta-llama/Llama-Guard-3-11B-Vision-Turbo", CoreModelId.llama_guard_3_11b_vision.value, ), + build_hf_repo_model_entry( + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + CoreModelId.llama4_maverick_17b_128e_instruct.value, + ), + build_hf_repo_model_entry( + "meta-llama/Llama-4-Scout-17B-16E-Instruct", + CoreModelId.llama4_scout_17b_16e_instruct.value, + ), ProviderModelEntry( provider_model_id="togethercomputer/m2-bert-80M-8k-retrieval", model_type=ModelType.embedding, diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml index fbe24064d..67114c24d 100644 --- a/llama_stack/templates/together/run-with-safety.yaml +++ b/llama_stack/templates/together/run-with-safety.yaml @@ -202,6 +202,26 @@ models: provider_id: together provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + provider_id: together + provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_type: llm - metadata: embedding_dimension: 768 context_length: 8192 diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml index 1e93c58c1..f816c2fb1 100644 --- a/llama_stack/templates/together/run.yaml +++ b/llama_stack/templates/together/run.yaml @@ -197,6 +197,26 @@ models: provider_id: together provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + provider_id: together + provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_type: llm - metadata: embedding_dimension: 768 context_length: 8192 diff --git a/llama_stack/templates/verification/run.yaml b/llama_stack/templates/verification/run.yaml index 73fbcfef5..7e476cc20 100644 --- a/llama_stack/templates/verification/run.yaml +++ b/llama_stack/templates/verification/run.yaml @@ -372,6 +372,26 @@ models: provider_id: together-openai-compat provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + provider_id: together-openai-compat + provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: together-openai-compat + provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: together-openai-compat + provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: together-openai-compat + provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_type: llm - metadata: embedding_dimension: 768 context_length: 8192