refactor(llama4): remove duplicate implementation, update imports to llama-models, add comprehensive test for tool calling fix (issue #2584)\n\n- Removes all old llama4 code from llama-stack\n- Updates all relevant imports to use llama-models\n- Adds robust pytest to demonstrate arguments_json fix\n- Updates config/scripts as needed for new structure\n- Resolves merge conflicts with updated main branch\n- Fixes mypy and ruff issues

This commit is contained in:
skamenan7 2025-07-10 09:39:33 -04:00
parent 126d6698a7
commit 61dc2a9c58
31 changed files with 1476 additions and 205135 deletions

View file

@ -9,6 +9,8 @@ from collections.abc import Generator
from typing import Optional
import torch
from llama_models.llama4.generation import Llama4
from llama_models.llama4.tokenizer import Tokenizer as Llama4Tokenizer
from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData
from llama_stack.apis.inference import (
@ -21,8 +23,6 @@ from llama_stack.apis.inference import (
from llama_stack.models.llama.datatypes import QuantizationMode
from llama_stack.models.llama.llama3.generation import Llama3
from llama_stack.models.llama.llama3.tokenizer import Tokenizer as Llama3Tokenizer
from llama_stack.models.llama.llama4.generation import Llama4
from llama_stack.models.llama.llama4.tokenizer import Tokenizer as Llama4Tokenizer
from llama_stack.models.llama.sku_types import Model, ModelFamily
from llama_stack.providers.utils.inference.prompt_adapter import (
ChatCompletionRequestWithRawContent,
@ -34,7 +34,7 @@ from .common import model_checkpoint_dir
from .config import MetaReferenceInferenceConfig
from .inference import resolve_model
Tokenizer = Llama4Tokenizer | Llama3Tokenizer
type Tokenizer = Llama4Tokenizer | Llama3Tokenizer
class LogitsProcessor:

View file

@ -9,6 +9,8 @@ import os
import sys
from collections.abc import AsyncGenerator
from llama_models.llama4.chat_format import ChatFormat as Llama4ChatFormat
from llama_models.llama4.tokenizer import Tokenizer as Llama4Tokenizer
from pydantic import BaseModel
from termcolor import cprint
@ -47,8 +49,6 @@ from llama_stack.apis.models import Model, ModelType
from llama_stack.log import get_logger
from llama_stack.models.llama.llama3.chat_format import ChatFormat as Llama3ChatFormat
from llama_stack.models.llama.llama3.tokenizer import Tokenizer as Llama3Tokenizer
from llama_stack.models.llama.llama4.chat_format import ChatFormat as Llama4ChatFormat
from llama_stack.models.llama.llama4.tokenizer import Tokenizer as Llama4Tokenizer
from llama_stack.models.llama.sku_list import resolve_model
from llama_stack.models.llama.sku_types import ModelFamily
from llama_stack.providers.datatypes import ModelsProtocolPrivate

View file

@ -9,8 +9,9 @@ from copy import deepcopy
from functools import partial
from typing import Any
from llama_models.llama4.chat_format import ChatFormat as Llama4ChatFormat
from llama_stack.models.llama.llama3.chat_format import ChatFormat as Llama3ChatFormat
from llama_stack.models.llama.llama4.chat_format import ChatFormat as Llama4ChatFormat
from llama_stack.providers.utils.inference.prompt_adapter import (
ChatCompletionRequestWithRawContent,
CompletionRequestWithRawContent,

View file

@ -54,11 +54,11 @@ from llama_stack.models.llama.llama3.tokenizer import Tokenizer
# Conditional imports to avoid heavy dependencies during module loading
try:
from llama_stack.models.llama.llama4.chat_format import ChatFormat as Llama4ChatFormat
from llama_stack.models.llama.llama4.prompt_templates.system_prompts import (
from llama_models.llama4.chat_format import ChatFormat as Llama4ChatFormat
from llama_models.llama4.prompt_templates.system_prompts import (
PythonListCustomToolGenerator as PythonListCustomToolGeneratorLlama4,
)
from llama_stack.models.llama.llama4.tokenizer import Tokenizer as Llama4Tokenizer
from llama_models.llama4.tokenizer import Tokenizer as Llama4Tokenizer
LLAMA4_AVAILABLE = True
except ImportError: