feat: introduce llama4 support (#1877)

As title says. Details in README, elsewhere.
2025-12-03 18:00:36 +00:00 · 2025-04-05 11:53:35 -07:00 · 2025-04-05 11:53:35 -07:00 · b8f1561956
commit b8f1561956
parent 23a99a4b22
61 changed files with 205222 additions and 6439 deletions
--- a/llama_stack/providers/utils/inference/prompt_adapter.py
+++ b/llama_stack/providers/utils/inference/prompt_adapter.py
@ -224,7 +224,9 @@ async def completion_request_to_prompt(request: CompletionRequest) -> str:
    return formatter.tokenizer.decode(model_input.tokens)


-async def completion_request_to_prompt_model_input_info(request: CompletionRequest) -> Tuple[str, int]:
+async def completion_request_to_prompt_model_input_info(
+    request: CompletionRequest,
+) -> Tuple[str, int]:
    content = augment_content_with_response_format_prompt(request.response_format, request.content)
    request.content = content
    request = await convert_request_to_raw(request)
@ -302,8 +304,12 @@ def chat_completion_request_to_messages(
    ):
        # llama3.1 and llama3.2 multimodal models follow the same tool prompt format
        messages = augment_messages_for_tools_llama_3_1(request)
-    elif model.model_family in (ModelFamily.llama3_2, ModelFamily.llama3_3):
-        # llama3.2 and llama3.3 models follow the same tool prompt format
+    elif model.model_family in (
+        ModelFamily.llama3_2,
+        ModelFamily.llama3_3,
+        ModelFamily.llama4,
+    ):
+        # llama3.2, llama3.3 and llama4 models follow the same tool prompt format
        messages = augment_messages_for_tools_llama_3_2(request)
    else:
        messages = request.messages
@ -471,7 +477,11 @@ def get_default_tool_prompt_format(model: str) -> ToolPromptFormat:
    ):
        # llama3.1 and llama3.2 multimodal models follow the same tool prompt format
        return ToolPromptFormat.json
-    elif llama_model.model_family in (ModelFamily.llama3_2, ModelFamily.llama3_3):
+    elif llama_model.model_family in (
+        ModelFamily.llama3_2,
+        ModelFamily.llama3_3,
+        ModelFamily.llama4,
+    ):
        # llama3.2 and llama3.3 models follow the same tool prompt format
        return ToolPromptFormat.python_list
    else: