diff --git a/src/llama_stack/models/llama/llama3/multimodal/encoder_utils.py b/src/llama_stack/models/llama/llama3/multimodal/encoder_utils.py index 0cc5aec81..a87d77cc3 100644 --- a/src/llama_stack/models/llama/llama3/multimodal/encoder_utils.py +++ b/src/llama_stack/models/llama/llama3/multimodal/encoder_utils.py @@ -141,7 +141,7 @@ def build_encoder_attention_mask( """ Build vision encoder attention mask that omits padding tokens. """ - masks_list = [] + masks_list: list[torch.Tensor] = [] for arx in ar: mask_i = torch.ones((num_chunks, x.shape[2], 1), dtype=x.dtype) mask_i[: arx[0] * arx[1], :ntok] = 0 diff --git a/src/llama_stack/providers/inline/batches/reference/batches.py b/src/llama_stack/providers/inline/batches/reference/batches.py index 241218dca..7c4358b84 100644 --- a/src/llama_stack/providers/inline/batches/reference/batches.py +++ b/src/llama_stack/providers/inline/batches/reference/batches.py @@ -358,11 +358,10 @@ class ReferenceBatchesImpl(Batches): # TODO(SECURITY): do something about large files file_content_response = await self.files_api.openai_retrieve_file_content(batch.input_file_id) - # Handle both bytes and memoryview types - body = file_content_response.body - if isinstance(body, memoryview): - body = bytes(body) - file_content = body.decode("utf-8") + # Handle both bytes and memoryview types - convert to bytes unconditionally + # (bytes(x) returns x if already bytes, creates new bytes from memoryview otherwise) + body_bytes = bytes(file_content_response.body) + file_content = body_bytes.decode("utf-8") for line_num, line in enumerate(file_content.strip().split("\n"), 1): if line.strip(): # skip empty lines try: