feat(provider): adding llama4 support in together inference provider (#2123)

# What does this PR do? Adding Llama4 model support in TogetherAI provider
2025-05-08 14:27:56 -07:00 · 2025-05-08 14:27:56 -07:00 · 0f878ad87a
commit 0f878ad87a
parent fe5f5e530c
5 changed files with 70 additions and 0 deletions
--- a/docs/source/distributions/self_hosted_distro/together.md
+++ b/docs/source/distributions/self_hosted_distro/together.md
@ -46,6 +46,8 @@ The following models are available by default:
 - `meta-llama/Llama-3.3-70B-Instruct-Turbo (aliases: meta-llama/Llama-3.3-70B-Instruct)`
 - `meta-llama/Meta-Llama-Guard-3-8B (aliases: meta-llama/Llama-Guard-3-8B)`
 - `meta-llama/Llama-Guard-3-11B-Vision-Turbo (aliases: meta-llama/Llama-Guard-3-11B-Vision)`
 - `meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 (aliases: meta-llama/Llama-4-Maverick-17B-128E-Instruct)`
 - `meta-llama/Llama-4-Scout-17B-16E-Instruct (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct)`
 - `togethercomputer/m2-bert-80M-8k-retrieval `
 - `togethercomputer/m2-bert-80M-32k-retrieval `
 - `meta-llama/Llama-4-Scout-17B-16E-Instruct (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct, together/meta-llama/Llama-4-Scout-17B-16E-Instruct)`
--- a/llama_stack/providers/remote/inference/together/models.py
+++ b/llama_stack/providers/remote/inference/together/models.py
@ -48,6 +48,14 @@ MODEL_ENTRIES = [
        "meta-llama/Llama-Guard-3-11B-Vision-Turbo",
        CoreModelId.llama_guard_3_11b_vision.value,
    ),
    build_hf_repo_model_entry(
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        CoreModelId.llama4_maverick_17b_128e_instruct.value,
    ),
    build_hf_repo_model_entry(
        "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        CoreModelId.llama4_scout_17b_16e_instruct.value,
    ),
    ProviderModelEntry(
        provider_model_id="togethercomputer/m2-bert-80M-8k-retrieval",
        model_type=ModelType.embedding,
--- a/llama_stack/templates/together/run-with-safety.yaml
+++ b/llama_stack/templates/together/run-with-safety.yaml
@ -202,6 +202,26 @@ models:
  provider_id: together
  provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
  provider_id: together
  provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
  provider_id: together
  provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
  provider_id: together
  provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
  provider_id: together
  provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
  model_type: llm
 - metadata:
    embedding_dimension: 768
    context_length: 8192
--- a/llama_stack/templates/together/run.yaml
+++ b/llama_stack/templates/together/run.yaml
@ -197,6 +197,26 @@ models:
  provider_id: together
  provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
  provider_id: together
  provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
  provider_id: together
  provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
  provider_id: together
  provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
  provider_id: together
  provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
  model_type: llm
 - metadata:
    embedding_dimension: 768
    context_length: 8192
--- a/llama_stack/templates/verification/run.yaml
+++ b/llama_stack/templates/verification/run.yaml
@ -372,6 +372,26 @@ models:
  provider_id: together-openai-compat
  provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
  provider_id: together-openai-compat
  provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
  provider_id: together-openai-compat
  provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
  provider_id: together-openai-compat
  provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
  provider_id: together-openai-compat
  provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
  model_type: llm
 - metadata:
    embedding_dimension: 768
    context_length: 8192