diff --git a/llama_stack/models/llama/llama4/tokenizer.py b/llama_stack/models/llama/llama4/tokenizer.py index e12b2cae0..a0926ef5c 100644 --- a/llama_stack/models/llama/llama4/tokenizer.py +++ b/llama_stack/models/llama/llama4/tokenizer.py @@ -237,7 +237,7 @@ class Tokenizer: str: The decoded string. """ # Typecast is safe here. Tiktoken doesn't do anything list-related with the sequence. - return self.model.decode(cast(list[int], t)) + return cast(str, self.model.decode(cast(list[int], t))) @staticmethod def _split_whitespaces_or_nonwhitespaces(s: str, max_consecutive_slice_len: int) -> Iterator[str]: diff --git a/pyproject.toml b/pyproject.toml index 72f3a323f..2e572d90f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -254,7 +254,19 @@ exclude = [ "^llama_stack/providers/inline/inference/meta_reference/inference\\.py$", "^llama_stack/models/llama/llama3/generation\\.py$", "^llama_stack/models/llama/llama3/multimodal/model\\.py$", - "^llama_stack/models/llama/llama4/", + "^llama_stack/models/llama/llama4/args\\.py$", + "^llama_stack/models/llama/llama4/chat_format\\.py$", + "^llama_stack/models/llama/llama4/datatypes\\.py$", + "^llama_stack/models/llama/llama4/ffn\\.py$", + "^llama_stack/models/llama/llama4/generation\\.py$", + "^llama_stack/models/llama/llama4/model\\.py$", + "^llama_stack/models/llama/llama4/moe\\.py$", + "^llama_stack/models/llama/llama4/preprocess\\.py$", + "^llama_stack/models/llama/llama4/prompts\\.py$", + "^llama_stack/models/llama/llama4/prompt_templates/system_prompts\\.py$", + "^llama_stack/models/llama/llama4/quantization/loader\\.py$", + "^llama_stack/models/llama/llama4/vision/embedding\\.py$", + "^llama_stack/models/llama/llama4/vision/encoder\\.py$", "^llama_stack/providers/inline/inference/meta_reference/parallel_utils\\.py$", "^llama_stack/providers/inline/inference/meta_reference/quantization/fp8_impls\\.py$", "^llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers\\.py$",