feat: introduce llama4 support (#1877)

As title says. Details in README, elsewhere.
This commit is contained in:
Ashwin Bharambe 2025-04-05 11:53:35 -07:00 committed by GitHub
parent 23a99a4b22
commit b8f1561956
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
61 changed files with 205222 additions and 6439 deletions

View file

@ -224,7 +224,9 @@ async def completion_request_to_prompt(request: CompletionRequest) -> str:
return formatter.tokenizer.decode(model_input.tokens)
async def completion_request_to_prompt_model_input_info(request: CompletionRequest) -> Tuple[str, int]:
async def completion_request_to_prompt_model_input_info(
request: CompletionRequest,
) -> Tuple[str, int]:
content = augment_content_with_response_format_prompt(request.response_format, request.content)
request.content = content
request = await convert_request_to_raw(request)
@ -302,8 +304,12 @@ def chat_completion_request_to_messages(
):
# llama3.1 and llama3.2 multimodal models follow the same tool prompt format
messages = augment_messages_for_tools_llama_3_1(request)
elif model.model_family in (ModelFamily.llama3_2, ModelFamily.llama3_3):
# llama3.2 and llama3.3 models follow the same tool prompt format
elif model.model_family in (
ModelFamily.llama3_2,
ModelFamily.llama3_3,
ModelFamily.llama4,
):
# llama3.2, llama3.3 and llama4 models follow the same tool prompt format
messages = augment_messages_for_tools_llama_3_2(request)
else:
messages = request.messages
@ -471,7 +477,11 @@ def get_default_tool_prompt_format(model: str) -> ToolPromptFormat:
):
# llama3.1 and llama3.2 multimodal models follow the same tool prompt format
return ToolPromptFormat.json
elif llama_model.model_family in (ModelFamily.llama3_2, ModelFamily.llama3_3):
elif llama_model.model_family in (
ModelFamily.llama3_2,
ModelFamily.llama3_3,
ModelFamily.llama4,
):
# llama3.2 and llama3.3 models follow the same tool prompt format
return ToolPromptFormat.python_list
else: