diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html
index f372257a0..cd92a10f5 100644
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
@@ -2554,27 +2554,22 @@
             "ImageContentItem": {
                 "type": "object",
                 "properties": {
+                    "url": {
+                        "$ref": "#/components/schemas/URL"
+                    },
+                    "data": {
+                        "type": "string",
+                        "contentEncoding": "base64"
+                    },
                     "type": {
                         "type": "string",
                         "const": "image",
                         "default": "image"
-                    },
-                    "data": {
-                        "oneOf": [
-                            {
-                                "type": "string",
-                                "contentEncoding": "base64"
-                            },
-                            {
-                                "$ref": "#/components/schemas/URL"
-                            }
-                        ]
                     }
                 },
                 "additionalProperties": false,
                 "required": [
-                    "type",
-                    "data"
+                    "type"
                 ]
             },
             "InterleavedContent": {
diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml
index 52c3aaac6..08db0699e 100644
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
@@ -1043,17 +1043,16 @@ components:
       additionalProperties: false
       properties:
         data:
-          oneOf:
-          - contentEncoding: base64
-            type: string
-          - $ref: '#/components/schemas/URL'
+          contentEncoding: base64
+          type: string
         type:
           const: image
           default: image
           type: string
+        url:
+          $ref: '#/components/schemas/URL'
       required:
       - type
-      - data
       type: object
     InferenceStep:
       additionalProperties: false
diff --git a/llama_stack/apis/common/content_types.py b/llama_stack/apis/common/content_types.py
index 1403dd782..316a4a5d6 100644
--- a/llama_stack/apis/common/content_types.py
+++ b/llama_stack/apis/common/content_types.py
@@ -4,11 +4,11 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Annotated, List, Literal, Union
+from typing import Annotated, List, Literal, Optional, Union
 
 from llama_models.schema_utils import json_schema_type, register_schema
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, model_validator
 
 
 @json_schema_type(
@@ -21,10 +21,21 @@ class URL(BaseModel):
         return self.uri
 
 
+class _URLOrData(BaseModel):
+    url: Optional[URL] = None
+    data: Optional[bytes] = None
+
+    @model_validator(mode="before")
+    @classmethod
+    def validator(cls, values):
+        if isinstance(values, dict):
+            return values
+        return {"url": values}
+
+
 @json_schema_type
-class ImageContentItem(BaseModel):
+class ImageContentItem(_URLOrData):
     type: Literal["image"] = "image"
-    data: Union[bytes, URL]
 
 
 @json_schema_type
diff --git a/llama_stack/apis/common/deployment_types.py b/llama_stack/apis/common/deployment_types.py
index 67096ac52..24de0cc91 100644
--- a/llama_stack/apis/common/deployment_types.py
+++ b/llama_stack/apis/common/deployment_types.py
@@ -11,6 +11,8 @@ from llama_models.schema_utils import json_schema_type
 
 from pydantic import BaseModel
 
+from llama_stack.apis.common.content_types import URL
+
 
 @json_schema_type
 class RestAPIMethod(Enum):
diff --git a/llama_stack/providers/tests/inference/test_vision_inference.py b/llama_stack/providers/tests/inference/test_vision_inference.py
index d29ace491..d58164676 100644
--- a/llama_stack/providers/tests/inference/test_vision_inference.py
+++ b/llama_stack/providers/tests/inference/test_vision_inference.py
@@ -11,7 +11,7 @@ import pytest
 
 from llama_models.llama3.api.datatypes import *  # noqa: F403
 from llama_stack.apis.inference import *  # noqa: F403
-from llama_stack.apis.common.content_types import ImageContentItem, TextContentItem
+from llama_stack.apis.common.content_types import ImageContentItem, TextContentItem, URL
 
 from .utils import group_chunks
 
@@ -32,7 +32,7 @@ class TestVisionModelInference:
             ),
             (
                 ImageContentItem(
-                    data=URL(
+                    url=URL(
                         uri="https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
                     )
                 ),
@@ -98,7 +98,7 @@ class TestVisionModelInference:
 
         images = [
             ImageContentItem(
-                data=URL(
+                url=URL(
                     uri="https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
                 )
             ),
diff --git a/llama_stack/providers/utils/inference/prompt_adapter.py b/llama_stack/providers/utils/inference/prompt_adapter.py
index 4f51467c2..42aa987c3 100644
--- a/llama_stack/providers/utils/inference/prompt_adapter.py
+++ b/llama_stack/providers/utils/inference/prompt_adapter.py
@@ -139,9 +139,9 @@ def request_has_media(request: Union[ChatCompletionRequest, CompletionRequest]):
 
 
 async def localize_image_content(media: ImageContentItem) -> Tuple[bytes, str]:
-    if isinstance(media.data, URL) and media.data.uri.startswith("http"):
+    if media.url and media.url.uri.startswith("http"):
         async with httpx.AsyncClient() as client:
-            r = await client.get(media.data.uri)
+            r = await client.get(media.url.uri)
             content = r.content
             content_type = r.headers.get("content-type")
             if content_type:
@@ -157,8 +157,8 @@ async def localize_image_content(media: ImageContentItem) -> Tuple[bytes, str]:
 async def convert_image_content_to_url(
     media: ImageContentItem, download: bool = False, include_format: bool = True
 ) -> str:
-    if isinstance(media.data, URL) and not download:
-        return media.data.uri
+    if media.url and not download:
+        return media.url.uri
 
     content, format = await localize_image_content(media)
     if include_format: