diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html
index f6dd1c8dc..139314776 100644
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
@@ -3761,22 +3761,29 @@
             "ImageContentItem": {
                 "type": "object",
                 "properties": {
-                    "url": {
-                        "$ref": "#/components/schemas/URL"
-                    },
-                    "data": {
-                        "type": "string",
-                        "contentEncoding": "base64"
-                    },
                     "type": {
                         "type": "string",
                         "const": "image",
                         "default": "image"
+                    },
+                    "image": {
+                        "type": "object",
+                        "properties": {
+                            "url": {
+                                "$ref": "#/components/schemas/URL"
+                            },
+                            "data": {
+                                "type": "string",
+                                "contentEncoding": "base64"
+                            }
+                        },
+                        "additionalProperties": false
                     }
                 },
                 "additionalProperties": false,
                 "required": [
-                    "type"
+                    "type",
+                    "image"
                 ]
             },
             "InterleavedContent": {
@@ -4518,7 +4525,7 @@
                         "const": "image",
                         "default": "image"
                     },
-                    "data": {
+                    "image": {
                         "type": "string",
                         "contentEncoding": "base64"
                     }
@@ -4526,7 +4533,7 @@
                 "additionalProperties": false,
                 "required": [
                     "type",
-                    "data"
+                    "image"
                 ]
             },
             "TextDelta": {
@@ -4570,7 +4577,7 @@
                         "const": "tool_call",
                         "default": "tool_call"
                     },
-                    "content": {
+                    "tool_call": {
                         "oneOf": [
                             {
                                 "type": "string"
@@ -4587,7 +4594,7 @@
                 "additionalProperties": false,
                 "required": [
                     "type",
-                    "content",
+                    "tool_call",
                     "parse_status"
                 ]
             },
diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml
index 6bbaadf8d..1a8c44bc0 100644
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
@@ -926,22 +926,27 @@ components:
     ImageContentItem:
       additionalProperties: false
       properties:
-        data:
-          contentEncoding: base64
-          type: string
+        image:
+          additionalProperties: false
+          properties:
+            data:
+              contentEncoding: base64
+              type: string
+            url:
+              $ref: '#/components/schemas/URL'
+          type: object
         type:
           const: image
           default: image
           type: string
-        url:
-          $ref: '#/components/schemas/URL'
       required:
       - type
+      - image
       type: object
     ImageDelta:
       additionalProperties: false
       properties:
-        data:
+        image:
           contentEncoding: base64
           type: string
         type:
@@ -950,7 +955,7 @@ components:
           type: string
       required:
       - type
-      - data
+      - image
       type: object
     InferenceStep:
       additionalProperties: false
@@ -2748,19 +2753,19 @@ components:
     ToolCallDelta:
       additionalProperties: false
       properties:
-        content:
+        parse_status:
+          $ref: '#/components/schemas/ToolCallParseStatus'
+        tool_call:
           oneOf:
           - type: string
           - $ref: '#/components/schemas/ToolCall'
-        parse_status:
-          $ref: '#/components/schemas/ToolCallParseStatus'
         type:
           const: tool_call
           default: tool_call
           type: string
       required:
       - type
-      - content
+      - tool_call
       - parse_status
       type: object
     ToolCallParseStatus:
diff --git a/llama_stack/apis/agents/event_logger.py b/llama_stack/apis/agents/event_logger.py
index ddb2a7cf4..7a607ffda 100644
--- a/llama_stack/apis/agents/event_logger.py
+++ b/llama_stack/apis/agents/event_logger.py
@@ -137,7 +137,7 @@ class EventLogger:
                                     event,
                                     LogEvent(
                                         role=None,
-                                        content=delta.content,
+                                        content=delta.tool_call,
                                         end="",
                                         color="cyan",
                                     ),
diff --git a/llama_stack/apis/common/content_types.py b/llama_stack/apis/common/content_types.py
index b845d09dd..1d8cea567 100644
--- a/llama_stack/apis/common/content_types.py
+++ b/llama_stack/apis/common/content_types.py
@@ -38,8 +38,9 @@ class _URLOrData(BaseModel):
 
 
 @json_schema_type
-class ImageContentItem(_URLOrData):
+class ImageContentItem(BaseModel):
     type: Literal["image"] = "image"
+    image: _URLOrData
 
 
 @json_schema_type
@@ -73,7 +74,7 @@ class TextDelta(BaseModel):
 @json_schema_type
 class ImageDelta(BaseModel):
     type: Literal["image"] = "image"
-    data: bytes
+    image: bytes
 
 
 @json_schema_type
@@ -91,7 +92,7 @@ class ToolCallDelta(BaseModel):
     # you either send an in-progress tool call so the client can stream a long
     # code generation or you send the final parsed tool call at the end of the
     # stream
-    content: Union[str, ToolCall]
+    tool_call: Union[str, ToolCall]
     parse_status: ToolCallParseStatus
 
 
diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
index 75fd75afc..1b375fba7 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
@@ -423,7 +423,7 @@ class ChatAgent(ShieldRunnerMixin):
                             step_id=step_id,
                             delta=ToolCallDelta(
                                 parse_status=ToolCallParseStatus.succeeded,
-                                content=ToolCall(
+                                tool_call=ToolCall(
                                     call_id="",
                                     tool_name=MEMORY_QUERY_TOOL,
                                     arguments={},
@@ -525,7 +525,7 @@ class ChatAgent(ShieldRunnerMixin):
                     delta = event.delta
                     if delta.type == "tool_call":
                         if delta.parse_status == ToolCallParseStatus.succeeded:
-                            tool_calls.append(delta.content)
+                            tool_calls.append(delta.tool_call)
                         if stream:
                             yield AgentTurnResponseStreamChunk(
                                 event=AgentTurnResponseEvent(
@@ -639,7 +639,7 @@ class ChatAgent(ShieldRunnerMixin):
                             tool_call=tool_call,
                             delta=ToolCallDelta(
                                 parse_status=ToolCallParseStatus.in_progress,
-                                content=tool_call,
+                                tool_call=tool_call,
                             ),
                         )
                     )
diff --git a/llama_stack/providers/inline/inference/meta_reference/inference.py b/llama_stack/providers/inline/inference/meta_reference/inference.py
index 31ad6fa28..73962ca7f 100644
--- a/llama_stack/providers/inline/inference/meta_reference/inference.py
+++ b/llama_stack/providers/inline/inference/meta_reference/inference.py
@@ -377,7 +377,7 @@ class MetaReferenceInferenceImpl(
                         event=ChatCompletionResponseEvent(
                             event_type=ChatCompletionResponseEventType.progress,
                             delta=ToolCallDelta(
-                                content="",
+                                tool_call="",
                                 parse_status=ToolCallParseStatus.started,
                             ),
                         )
@@ -395,7 +395,7 @@ class MetaReferenceInferenceImpl(
 
                 if ipython:
                     delta = ToolCallDelta(
-                        content=text,
+                        tool_call=text,
                         parse_status=ToolCallParseStatus.in_progress,
                     )
                 else:
@@ -434,7 +434,7 @@ class MetaReferenceInferenceImpl(
                     event=ChatCompletionResponseEvent(
                         event_type=ChatCompletionResponseEventType.progress,
                         delta=ToolCallDelta(
-                            content="",
+                            tool_call="",
                             parse_status=ToolCallParseStatus.failed,
                         ),
                         stop_reason=stop_reason,
@@ -446,7 +446,7 @@ class MetaReferenceInferenceImpl(
                     event=ChatCompletionResponseEvent(
                         event_type=ChatCompletionResponseEventType.progress,
                         delta=ToolCallDelta(
-                            content=tool_call,
+                            tool_call=tool_call,
                             parse_status=ToolCallParseStatus.succeeded,
                         ),
                         stop_reason=stop_reason,
diff --git a/llama_stack/providers/remote/inference/groq/groq_utils.py b/llama_stack/providers/remote/inference/groq/groq_utils.py
index b614c90f4..bd1a07d7c 100644
--- a/llama_stack/providers/remote/inference/groq/groq_utils.py
+++ b/llama_stack/providers/remote/inference/groq/groq_utils.py
@@ -218,7 +218,7 @@ async def convert_chat_completion_response_stream(
                 event=ChatCompletionResponseEvent(
                     event_type=event_type,
                     delta=ToolCallDelta(
-                        content=tool_call,
+                        tool_call=tool_call,
                         parse_status=ToolCallParseStatus.succeeded,
                     ),
                 )
diff --git a/llama_stack/providers/remote/inference/nvidia/openai_utils.py b/llama_stack/providers/remote/inference/nvidia/openai_utils.py
index e85c8dd21..0f753f80d 100644
--- a/llama_stack/providers/remote/inference/nvidia/openai_utils.py
+++ b/llama_stack/providers/remote/inference/nvidia/openai_utils.py
@@ -505,7 +505,9 @@ async def convert_openai_chat_completion_stream(
                 event=ChatCompletionResponseEvent(
                     event_type=next(event_type),
                     delta=ToolCallDelta(
-                        content=_convert_openai_tool_calls(choice.delta.tool_calls)[0],
+                        tool_call=_convert_openai_tool_calls(choice.delta.tool_calls)[
+                            0
+                        ],
                         parse_status=ToolCallParseStatus.succeeded,
                     ),
                     logprobs=_convert_openai_logprobs(choice.logprobs),
diff --git a/llama_stack/providers/tests/inference/groq/test_groq_utils.py b/llama_stack/providers/tests/inference/groq/test_groq_utils.py
index 0402a772c..f6f593f16 100644
--- a/llama_stack/providers/tests/inference/groq/test_groq_utils.py
+++ b/llama_stack/providers/tests/inference/groq/test_groq_utils.py
@@ -472,7 +472,7 @@ class TestConvertStreamChatCompletionResponse:
         iter = converted.__aiter__()
         chunk = await iter.__anext__()
         assert chunk.event.event_type == ChatCompletionResponseEventType.start
-        assert chunk.event.delta.content == ToolCall(
+        assert chunk.event.delta.tool_call == ToolCall(
             call_id="tool_call_id",
             tool_name="get_flight_info",
             arguments={"origin": "AU", "destination": "LAX"},
diff --git a/llama_stack/providers/tests/inference/test_text_inference.py b/llama_stack/providers/tests/inference/test_text_inference.py
index cbc8232c8..c39556b8e 100644
--- a/llama_stack/providers/tests/inference/test_text_inference.py
+++ b/llama_stack/providers/tests/inference/test_text_inference.py
@@ -470,16 +470,16 @@ class TestInference:
             )
             first = grouped[ChatCompletionResponseEventType.progress][0]
             if not isinstance(
-                first.event.delta.content, ToolCall
+                first.event.delta.tool_call, ToolCall
             ):  # first chunk may contain entire call
                 assert first.event.delta.parse_status == ToolCallParseStatus.started
 
         last = grouped[ChatCompletionResponseEventType.progress][-1]
         # assert last.event.stop_reason == expected_stop_reason
         assert last.event.delta.parse_status == ToolCallParseStatus.succeeded
-        assert isinstance(last.event.delta.content, ToolCall)
+        assert isinstance(last.event.delta.tool_call, ToolCall)
 
-        call = last.event.delta.content
+        call = last.event.delta.tool_call
         assert call.tool_name == "get_weather"
         assert "location" in call.arguments
         assert "San Francisco" in call.arguments["location"]
diff --git a/llama_stack/providers/tests/inference/test_vision_inference.py b/llama_stack/providers/tests/inference/test_vision_inference.py
index df2f3cfb9..100a70236 100644
--- a/llama_stack/providers/tests/inference/test_vision_inference.py
+++ b/llama_stack/providers/tests/inference/test_vision_inference.py
@@ -32,13 +32,15 @@ class TestVisionModelInference:
         "image, expected_strings",
         [
             (
-                ImageContentItem(data=PASTA_IMAGE),
+                ImageContentItem(image=dict(data=PASTA_IMAGE)),
                 ["spaghetti"],
             ),
             (
                 ImageContentItem(
-                    url=URL(
-                        uri="https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
+                    image=dict(
+                        url=URL(
+                            uri="https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
+                        )
                     )
                 ),
                 ["puppy"],
@@ -103,8 +105,10 @@ class TestVisionModelInference:
 
         images = [
             ImageContentItem(
-                url=URL(
-                    uri="https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
+                image=dict(
+                    url=URL(
+                        uri="https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
+                    )
                 )
             ),
         ]
diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py
index 127fd19f3..6c93f49c0 100644
--- a/llama_stack/providers/utils/inference/openai_compat.py
+++ b/llama_stack/providers/utils/inference/openai_compat.py
@@ -240,7 +240,7 @@ async def process_chat_completion_stream_response(
                 event=ChatCompletionResponseEvent(
                     event_type=ChatCompletionResponseEventType.progress,
                     delta=ToolCallDelta(
-                        content="",
+                        tool_call="",
                         parse_status=ToolCallParseStatus.started,
                     ),
                 )
@@ -260,7 +260,7 @@ async def process_chat_completion_stream_response(
         if ipython:
             buffer += text
             delta = ToolCallDelta(
-                content=text,
+                tool_call=text,
                 parse_status=ToolCallParseStatus.in_progress,
             )
 
@@ -289,7 +289,7 @@ async def process_chat_completion_stream_response(
             event=ChatCompletionResponseEvent(
                 event_type=ChatCompletionResponseEventType.progress,
                 delta=ToolCallDelta(
-                    content="",
+                    tool_call="",
                     parse_status=ToolCallParseStatus.failed,
                 ),
                 stop_reason=stop_reason,
@@ -301,7 +301,7 @@ async def process_chat_completion_stream_response(
             event=ChatCompletionResponseEvent(
                 event_type=ChatCompletionResponseEventType.progress,
                 delta=ToolCallDelta(
-                    content=tool_call,
+                    tool_call=tool_call,
                     parse_status=ToolCallParseStatus.succeeded,
                 ),
                 stop_reason=stop_reason,
diff --git a/llama_stack/providers/utils/inference/prompt_adapter.py b/llama_stack/providers/utils/inference/prompt_adapter.py
index 701b2ca3b..f5298d844 100644
--- a/llama_stack/providers/utils/inference/prompt_adapter.py
+++ b/llama_stack/providers/utils/inference/prompt_adapter.py
@@ -113,28 +113,29 @@ async def interleaved_content_convert_to_raw(
         elif isinstance(c, TextContentItem):
             return RawTextItem(text=c.text)
         elif isinstance(c, ImageContentItem):
-            if c.url:
+            image = c.image
+            if image.url:
                 # Load image bytes from URL
-                if c.url.uri.startswith("data"):
-                    match = re.match(r"data:image/(\w+);base64,(.+)", c.url.uri)
+                if image.url.uri.startswith("data"):
+                    match = re.match(r"data:image/(\w+);base64,(.+)", image.url.uri)
                     if not match:
                         raise ValueError(
-                            f"Invalid data URL format, {c.url.uri[:40]}..."
+                            f"Invalid data URL format, {image.url.uri[:40]}..."
                         )
                     _, image_data = match.groups()
                     data = base64.b64decode(image_data)
-                elif c.url.uri.startswith("file://"):
-                    path = c.url.uri[len("file://") :]
+                elif image.url.uri.startswith("file://"):
+                    path = image.url.uri[len("file://") :]
                     with open(path, "rb") as f:
                         data = f.read()  # type: ignore
-                elif c.url.uri.startswith("http"):
+                elif image.url.uri.startswith("http"):
                     async with httpx.AsyncClient() as client:
-                        response = await client.get(c.url.uri)
+                        response = await client.get(image.url.uri)
                         data = response.content
                 else:
                     raise ValueError("Unsupported URL type")
-            elif c.data:
-                data = c.data
+            elif image.data:
+                data = image.data
             else:
                 raise ValueError("No data or URL provided")
 
@@ -170,26 +171,29 @@ def request_has_media(request: Union[ChatCompletionRequest, CompletionRequest]):
 
 
 async def localize_image_content(media: ImageContentItem) -> Tuple[bytes, str]:
-    if media.url and media.url.uri.startswith("http"):
+    image = media.image
+    if image.url and image.url.uri.startswith("http"):
         async with httpx.AsyncClient() as client:
-            r = await client.get(media.url.uri)
+            r = await client.get(image.url.uri)
             content = r.content
             content_type = r.headers.get("content-type")
             if content_type:
                 format = content_type.split("/")[-1]
             else:
                 format = "png"
+
         return content, format
     else:
-        image = PIL_Image.open(io.BytesIO(media.data))
-        return media.data, image.format
+        pil_image = PIL_Image.open(io.BytesIO(image.data))
+        return image.data, pil_image.format
 
 
 async def convert_image_content_to_url(
     media: ImageContentItem, download: bool = False, include_format: bool = True
 ) -> str:
-    if media.url and (not download or media.url.uri.startswith("data")):
-        return media.url.uri
+    image = media.image
+    if image.url and (not download or image.url.uri.startswith("data")):
+        return image.url.uri
 
     content, format = await localize_image_content(media)
     if include_format:
diff --git a/tests/client-sdk/inference/test_inference.py b/tests/client-sdk/inference/test_inference.py
index 08c7e1693..b1f1dd139 100644
--- a/tests/client-sdk/inference/test_inference.py
+++ b/tests/client-sdk/inference/test_inference.py
@@ -258,7 +258,7 @@ def extract_tool_invocation_content(response):
     for chunk in response:
         delta = chunk.event.delta
         if delta.type == "tool_call" and delta.parse_status == "succeeded":
-            call = delta.content
+            call = delta.tool_call
             tool_invocation_content += f"[{call.tool_name}, {call.arguments}]"
     return tool_invocation_content
 
@@ -321,9 +321,11 @@ def test_image_chat_completion_non_streaming(llama_stack_client, vision_model_id
         "content": [
             {
                 "type": "image",
-                "url": {
-                    # TODO: Replace with Github based URI to resources/sample1.jpg
-                    "uri": "https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
+                "image": {
+                    "url": {
+                        # TODO: Replace with Github based URI to resources/sample1.jpg
+                        "uri": "https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
+                    },
                 },
             },
             {
@@ -348,9 +350,11 @@ def test_image_chat_completion_streaming(llama_stack_client, vision_model_id):
         "content": [
             {
                 "type": "image",
-                "url": {
-                    # TODO: Replace with Github based URI to resources/sample1.jpg
-                    "uri": "https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
+                "image": {
+                    "url": {
+                        # TODO: Replace with Github based URI to resources/sample1.jpg
+                        "uri": "https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
+                    },
                 },
             },
             {
@@ -374,14 +378,15 @@ def test_image_chat_completion_streaming(llama_stack_client, vision_model_id):
 def test_image_chat_completion_base64_url(
     llama_stack_client, vision_model_id, base64_image_url
 ):
-
     message = {
         "role": "user",
         "content": [
             {
                 "type": "image",
-                "url": {
-                    "uri": base64_image_url,
+                "image": {
+                    "url": {
+                        "uri": base64_image_url,
+                    },
                 },
             },
             {
diff --git a/tests/client-sdk/safety/test_safety.py b/tests/client-sdk/safety/test_safety.py
index 6af417a09..ac3221364 100644
--- a/tests/client-sdk/safety/test_safety.py
+++ b/tests/client-sdk/safety/test_safety.py
@@ -141,7 +141,7 @@ def test_safety_with_image(llama_stack_client, model_providers):
                 },
                 {
                     "type": "image",
-                    "url": {"uri": data_url_from_image(file_path)},
+                    "image": {"url": {"uri": data_url_from_image(file_path)}},
                 },
             ],
         }