diff --git a/llama_stack/providers/utils/inference/prompt_adapter.py b/llama_stack/providers/utils/inference/prompt_adapter.py
index babfb736a..e49771980 100644
--- a/llama_stack/providers/utils/inference/prompt_adapter.py
+++ b/llama_stack/providers/utils/inference/prompt_adapter.py
@@ -185,8 +185,10 @@ async def localize_image_content(media: ImageContentItem) -> Tuple[bytes, str]:
 
         return content, format
     else:
-        pil_image = PIL_Image.open(io.BytesIO(image.data))
-        return image.data, pil_image.format
+        # data is a base64 encoded string, decode it to bytes first
+        data_bytes = base64.b64decode(image.data)
+        pil_image = PIL_Image.open(io.BytesIO(data_bytes))
+        return data_bytes, pil_image.format
 
 
 async def convert_image_content_to_url(