diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py
index 202d3732b..05c609867 100644
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@@ -177,20 +177,37 @@ class ContentBuilder:
     ) -> Dict[str, MediaType]:
         "Creates the content subtree for a request or response."
 
-        def has_iterator_type(t):
-            if typing.get_origin(t) is typing.Union:
-                return any(has_iterator_type(a) for a in typing.get_args(t))
+        def is_iterator_type(t):
+            return "StreamChunk" in str(t)
+
+        def get_media_type(t):
+            if is_generic_list(t):
+                return "application/jsonl"
+            elif is_iterator_type(t):
+                return "text/event-stream"
             else:
-                # TODO: needs a proper fix where we let all types correctly flow upwards
-                # and then test against AsyncIterator
-                return "StreamChunk" in str(t)
+                return "application/json"
+
+        if typing.get_origin(payload_type) is typing.Union:
+            media_types = []
+            item_types = []
+            for x in typing.get_args(payload_type):
+                media_types.append(get_media_type(x))
+                item_types.append(x)
+
+            if len(set(media_types)) == 1:
+                # all types have the same media type
+                return {media_types[0]: self.build_media_type(payload_type, examples)}
+            else:
+                # different types have different media types
+                return {
+                    media_type: self.build_media_type(item_type, examples)
+                    for media_type, item_type in zip(media_types, item_types)
+                }
 
         if is_generic_list(payload_type):
             media_type = "application/jsonl"
             item_type = unwrap_generic_list(payload_type)
-        elif has_iterator_type(payload_type):
-            item_type = payload_type
-            media_type = "text/event-stream"
         else:
             media_type = "application/json"
             item_type = payload_type
diff --git a/docs/openapi_generator/strong_typing/schema.py b/docs/openapi_generator/strong_typing/schema.py
index f4393041f..577428035 100644
--- a/docs/openapi_generator/strong_typing/schema.py
+++ b/docs/openapi_generator/strong_typing/schema.py
@@ -248,7 +248,9 @@ class JsonSchemaGenerator:
                 type_schema.update(self._metadata_to_schema(m))
         return type_schema
 
-    def _simple_type_to_schema(self, typ: TypeLike) -> Optional[Schema]:
+    def _simple_type_to_schema(
+        self, typ: TypeLike, json_schema_extra: Optional[dict] = None
+    ) -> Optional[Schema]:
         """
         Returns the JSON schema associated with a simple, unrestricted type.
 
@@ -264,6 +266,11 @@ class JsonSchemaGenerator:
         elif typ is float:
             return {"type": "number"}
         elif typ is str:
+            if json_schema_extra and "contentEncoding" in json_schema_extra:
+                return {
+                    "type": "string",
+                    "contentEncoding": json_schema_extra["contentEncoding"],
+                }
             return {"type": "string"}
         elif typ is bytes:
             return {"type": "string", "contentEncoding": "base64"}
@@ -303,7 +310,12 @@ class JsonSchemaGenerator:
             # not a simple type
             return None
 
-    def type_to_schema(self, data_type: TypeLike, force_expand: bool = False) -> Schema:
+    def type_to_schema(
+        self,
+        data_type: TypeLike,
+        force_expand: bool = False,
+        json_schema_extra: Optional[dict] = None,
+    ) -> Schema:
         """
         Returns the JSON schema associated with a type.
 
@@ -313,7 +325,7 @@ class JsonSchemaGenerator:
         """
 
         # short-circuit for common simple types
-        schema = self._simple_type_to_schema(data_type)
+        schema = self._simple_type_to_schema(data_type, json_schema_extra)
         if schema is not None:
             return schema
 
@@ -486,15 +498,9 @@ class JsonSchemaGenerator:
         property_docstrings = get_class_property_docstrings(
             typ, self.options.property_description_fun
         )
-
         properties: Dict[str, Schema] = {}
         required: List[str] = []
         for property_name, property_type in get_class_properties(typ):
-            defaults = {}
-            if "model_fields" in members:
-                f = members["model_fields"]
-                defaults = {k: finfo.default for k, finfo in f.items()}
-
             # rename property if an alias name is specified
             alias = get_annotation(property_type, Alias)
             if alias:
@@ -502,11 +508,22 @@ class JsonSchemaGenerator:
             else:
                 output_name = property_name
 
+            defaults = {}
+            json_schema_extra = None
+            if "model_fields" in members:
+                f = members["model_fields"]
+                defaults = {k: finfo.default for k, finfo in f.items()}
+                json_schema_extra = f.get(output_name, None).json_schema_extra
+
             if is_type_optional(property_type):
                 optional_type: type = unwrap_optional_type(property_type)
-                property_def = self.type_to_schema(optional_type)
+                property_def = self.type_to_schema(
+                    optional_type, json_schema_extra=json_schema_extra
+                )
             else:
-                property_def = self.type_to_schema(property_type)
+                property_def = self.type_to_schema(
+                    property_type, json_schema_extra=json_schema_extra
+                )
                 required.append(output_name)
 
             # check if attribute has a default value initializer
diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html
index 0454e22ec..85ae516f5 100644
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
@@ -192,16 +192,14 @@
                     "200": {
                         "description": "If stream=False, returns a ChatCompletionResponse with the full completion. If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk",
                         "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ChatCompletionResponse"
+                                }
+                            },
                             "text/event-stream": {
                                 "schema": {
-                                    "oneOf": [
-                                        {
-                                            "$ref": "#/components/schemas/ChatCompletionResponse"
-                                        },
-                                        {
-                                            "$ref": "#/components/schemas/ChatCompletionResponseStreamChunk"
-                                        }
-                                    ]
+                                    "$ref": "#/components/schemas/ChatCompletionResponseStreamChunk"
                                 }
                             }
                         }
@@ -230,16 +228,14 @@
                     "200": {
                         "description": "If stream=False, returns a CompletionResponse with the full completion. If stream=True, returns an SSE event stream of CompletionResponseStreamChunk",
                         "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/CompletionResponse"
+                                }
+                            },
                             "text/event-stream": {
                                 "schema": {
-                                    "oneOf": [
-                                        {
-                                            "$ref": "#/components/schemas/CompletionResponse"
-                                        },
-                                        {
-                                            "$ref": "#/components/schemas/CompletionResponseStreamChunk"
-                                        }
-                                    ]
+                                    "$ref": "#/components/schemas/CompletionResponseStreamChunk"
                                 }
                             }
                         }
@@ -337,16 +333,14 @@
                     "200": {
                         "description": "A single turn in an interaction with an Agentic System. **OR** streamed agent turn completion response.",
                         "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/Turn"
+                                }
+                            },
                             "text/event-stream": {
                                 "schema": {
-                                    "oneOf": [
-                                        {
-                                            "$ref": "#/components/schemas/Turn"
-                                        },
-                                        {
-                                            "$ref": "#/components/schemas/AgentTurnResponseStreamChunk"
-                                        }
-                                    ]
+                                    "$ref": "#/components/schemas/AgentTurnResponseStreamChunk"
                                 }
                             }
                         }
@@ -2439,27 +2433,32 @@
                     "type": {
                         "type": "string",
                         "const": "image",
-                        "default": "image"
+                        "default": "image",
+                        "description": "Discriminator type of the content item. Always \"image\""
                     },
                     "image": {
                         "type": "object",
                         "properties": {
                             "url": {
-                                "$ref": "#/components/schemas/URL"
+                                "$ref": "#/components/schemas/URL",
+                                "description": "A URL of the image or data URL in the format of data:image/{type};base64,{data}. Note that URL could have length limits."
                             },
                             "data": {
                                 "type": "string",
-                                "contentEncoding": "base64"
+                                "contentEncoding": "base64",
+                                "description": "base64 encoded image data as string"
                             }
                         },
-                        "additionalProperties": false
+                        "additionalProperties": false,
+                        "description": "Image as a base64 encoded string or an URL"
                     }
                 },
                 "additionalProperties": false,
                 "required": [
                     "type",
                     "image"
-                ]
+                ],
+                "title": "A image content item"
             },
             "InterleavedContent": {
                 "oneOf": [
@@ -2647,17 +2646,20 @@
                     "type": {
                         "type": "string",
                         "const": "text",
-                        "default": "text"
+                        "default": "text",
+                        "description": "Discriminator type of the content item. Always \"text\""
                     },
                     "text": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "Text content"
                     }
                 },
                 "additionalProperties": false,
                 "required": [
                     "type",
                     "text"
-                ]
+                ],
+                "title": "A text content item"
             },
             "ToolCall": {
                 "type": "object",
@@ -3772,235 +3774,6 @@
                     "messages"
                 ]
             },
-            "AgentTurnResponseEvent": {
-                "type": "object",
-                "properties": {
-                    "payload": {
-                        "$ref": "#/components/schemas/AgentTurnResponseEventPayload"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "payload"
-                ]
-            },
-            "AgentTurnResponseEventPayload": {
-                "oneOf": [
-                    {
-                        "$ref": "#/components/schemas/AgentTurnResponseStepStartPayload"
-                    },
-                    {
-                        "$ref": "#/components/schemas/AgentTurnResponseStepProgressPayload"
-                    },
-                    {
-                        "$ref": "#/components/schemas/AgentTurnResponseStepCompletePayload"
-                    },
-                    {
-                        "$ref": "#/components/schemas/AgentTurnResponseTurnStartPayload"
-                    },
-                    {
-                        "$ref": "#/components/schemas/AgentTurnResponseTurnCompletePayload"
-                    }
-                ],
-                "discriminator": {
-                    "propertyName": "event_type",
-                    "mapping": {
-                        "step_start": "#/components/schemas/AgentTurnResponseStepStartPayload",
-                        "step_progress": "#/components/schemas/AgentTurnResponseStepProgressPayload",
-                        "step_complete": "#/components/schemas/AgentTurnResponseStepCompletePayload",
-                        "turn_start": "#/components/schemas/AgentTurnResponseTurnStartPayload",
-                        "turn_complete": "#/components/schemas/AgentTurnResponseTurnCompletePayload"
-                    }
-                }
-            },
-            "AgentTurnResponseStepCompletePayload": {
-                "type": "object",
-                "properties": {
-                    "event_type": {
-                        "type": "string",
-                        "const": "step_complete",
-                        "default": "step_complete"
-                    },
-                    "step_type": {
-                        "type": "string",
-                        "enum": [
-                            "inference",
-                            "tool_execution",
-                            "shield_call",
-                            "memory_retrieval"
-                        ]
-                    },
-                    "step_id": {
-                        "type": "string"
-                    },
-                    "step_details": {
-                        "oneOf": [
-                            {
-                                "$ref": "#/components/schemas/InferenceStep"
-                            },
-                            {
-                                "$ref": "#/components/schemas/ToolExecutionStep"
-                            },
-                            {
-                                "$ref": "#/components/schemas/ShieldCallStep"
-                            },
-                            {
-                                "$ref": "#/components/schemas/MemoryRetrievalStep"
-                            }
-                        ],
-                        "discriminator": {
-                            "propertyName": "step_type",
-                            "mapping": {
-                                "inference": "#/components/schemas/InferenceStep",
-                                "tool_execution": "#/components/schemas/ToolExecutionStep",
-                                "shield_call": "#/components/schemas/ShieldCallStep",
-                                "memory_retrieval": "#/components/schemas/MemoryRetrievalStep"
-                            }
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "event_type",
-                    "step_type",
-                    "step_id",
-                    "step_details"
-                ]
-            },
-            "AgentTurnResponseStepProgressPayload": {
-                "type": "object",
-                "properties": {
-                    "event_type": {
-                        "type": "string",
-                        "const": "step_progress",
-                        "default": "step_progress"
-                    },
-                    "step_type": {
-                        "type": "string",
-                        "enum": [
-                            "inference",
-                            "tool_execution",
-                            "shield_call",
-                            "memory_retrieval"
-                        ]
-                    },
-                    "step_id": {
-                        "type": "string"
-                    },
-                    "delta": {
-                        "$ref": "#/components/schemas/ContentDelta"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "event_type",
-                    "step_type",
-                    "step_id",
-                    "delta"
-                ]
-            },
-            "AgentTurnResponseStepStartPayload": {
-                "type": "object",
-                "properties": {
-                    "event_type": {
-                        "type": "string",
-                        "const": "step_start",
-                        "default": "step_start"
-                    },
-                    "step_type": {
-                        "type": "string",
-                        "enum": [
-                            "inference",
-                            "tool_execution",
-                            "shield_call",
-                            "memory_retrieval"
-                        ]
-                    },
-                    "step_id": {
-                        "type": "string"
-                    },
-                    "metadata": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "event_type",
-                    "step_type",
-                    "step_id"
-                ]
-            },
-            "AgentTurnResponseStreamChunk": {
-                "type": "object",
-                "properties": {
-                    "event": {
-                        "$ref": "#/components/schemas/AgentTurnResponseEvent"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "event"
-                ],
-                "title": "streamed agent turn completion response."
-            },
-            "AgentTurnResponseTurnCompletePayload": {
-                "type": "object",
-                "properties": {
-                    "event_type": {
-                        "type": "string",
-                        "const": "turn_complete",
-                        "default": "turn_complete"
-                    },
-                    "turn": {
-                        "$ref": "#/components/schemas/Turn"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "event_type",
-                    "turn"
-                ]
-            },
-            "AgentTurnResponseTurnStartPayload": {
-                "type": "object",
-                "properties": {
-                    "event_type": {
-                        "type": "string",
-                        "const": "turn_start",
-                        "default": "turn_start"
-                    },
-                    "turn_id": {
-                        "type": "string"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "event_type",
-                    "turn_id"
-                ]
-            },
             "InferenceStep": {
                 "type": "object",
                 "properties": {
@@ -4341,6 +4114,235 @@
                     "error"
                 ]
             },
+            "AgentTurnResponseEvent": {
+                "type": "object",
+                "properties": {
+                    "payload": {
+                        "$ref": "#/components/schemas/AgentTurnResponseEventPayload"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "payload"
+                ]
+            },
+            "AgentTurnResponseEventPayload": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/AgentTurnResponseStepStartPayload"
+                    },
+                    {
+                        "$ref": "#/components/schemas/AgentTurnResponseStepProgressPayload"
+                    },
+                    {
+                        "$ref": "#/components/schemas/AgentTurnResponseStepCompletePayload"
+                    },
+                    {
+                        "$ref": "#/components/schemas/AgentTurnResponseTurnStartPayload"
+                    },
+                    {
+                        "$ref": "#/components/schemas/AgentTurnResponseTurnCompletePayload"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "event_type",
+                    "mapping": {
+                        "step_start": "#/components/schemas/AgentTurnResponseStepStartPayload",
+                        "step_progress": "#/components/schemas/AgentTurnResponseStepProgressPayload",
+                        "step_complete": "#/components/schemas/AgentTurnResponseStepCompletePayload",
+                        "turn_start": "#/components/schemas/AgentTurnResponseTurnStartPayload",
+                        "turn_complete": "#/components/schemas/AgentTurnResponseTurnCompletePayload"
+                    }
+                }
+            },
+            "AgentTurnResponseStepCompletePayload": {
+                "type": "object",
+                "properties": {
+                    "event_type": {
+                        "type": "string",
+                        "const": "step_complete",
+                        "default": "step_complete"
+                    },
+                    "step_type": {
+                        "type": "string",
+                        "enum": [
+                            "inference",
+                            "tool_execution",
+                            "shield_call",
+                            "memory_retrieval"
+                        ]
+                    },
+                    "step_id": {
+                        "type": "string"
+                    },
+                    "step_details": {
+                        "oneOf": [
+                            {
+                                "$ref": "#/components/schemas/InferenceStep"
+                            },
+                            {
+                                "$ref": "#/components/schemas/ToolExecutionStep"
+                            },
+                            {
+                                "$ref": "#/components/schemas/ShieldCallStep"
+                            },
+                            {
+                                "$ref": "#/components/schemas/MemoryRetrievalStep"
+                            }
+                        ],
+                        "discriminator": {
+                            "propertyName": "step_type",
+                            "mapping": {
+                                "inference": "#/components/schemas/InferenceStep",
+                                "tool_execution": "#/components/schemas/ToolExecutionStep",
+                                "shield_call": "#/components/schemas/ShieldCallStep",
+                                "memory_retrieval": "#/components/schemas/MemoryRetrievalStep"
+                            }
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "event_type",
+                    "step_type",
+                    "step_id",
+                    "step_details"
+                ]
+            },
+            "AgentTurnResponseStepProgressPayload": {
+                "type": "object",
+                "properties": {
+                    "event_type": {
+                        "type": "string",
+                        "const": "step_progress",
+                        "default": "step_progress"
+                    },
+                    "step_type": {
+                        "type": "string",
+                        "enum": [
+                            "inference",
+                            "tool_execution",
+                            "shield_call",
+                            "memory_retrieval"
+                        ]
+                    },
+                    "step_id": {
+                        "type": "string"
+                    },
+                    "delta": {
+                        "$ref": "#/components/schemas/ContentDelta"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "event_type",
+                    "step_type",
+                    "step_id",
+                    "delta"
+                ]
+            },
+            "AgentTurnResponseStepStartPayload": {
+                "type": "object",
+                "properties": {
+                    "event_type": {
+                        "type": "string",
+                        "const": "step_start",
+                        "default": "step_start"
+                    },
+                    "step_type": {
+                        "type": "string",
+                        "enum": [
+                            "inference",
+                            "tool_execution",
+                            "shield_call",
+                            "memory_retrieval"
+                        ]
+                    },
+                    "step_id": {
+                        "type": "string"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "event_type",
+                    "step_type",
+                    "step_id"
+                ]
+            },
+            "AgentTurnResponseStreamChunk": {
+                "type": "object",
+                "properties": {
+                    "event": {
+                        "$ref": "#/components/schemas/AgentTurnResponseEvent"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "event"
+                ],
+                "title": "streamed agent turn completion response."
+            },
+            "AgentTurnResponseTurnCompletePayload": {
+                "type": "object",
+                "properties": {
+                    "event_type": {
+                        "type": "string",
+                        "const": "turn_complete",
+                        "default": "turn_complete"
+                    },
+                    "turn": {
+                        "$ref": "#/components/schemas/Turn"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "event_type",
+                    "turn"
+                ]
+            },
+            "AgentTurnResponseTurnStartPayload": {
+                "type": "object",
+                "properties": {
+                    "event_type": {
+                        "type": "string",
+                        "const": "turn_start",
+                        "default": "turn_start"
+                    },
+                    "turn_id": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "event_type",
+                    "turn_id"
+                ]
+            },
             "EmbeddingsRequest": {
                 "type": "object",
                 "properties": {
diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml
index 0734ef236..2a95acf38 100644
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
@@ -113,11 +113,12 @@ paths:
             If stream=False, returns a ChatCompletionResponse with the full completion.
             If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk
           content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ChatCompletionResponse'
             text/event-stream:
               schema:
-                oneOf:
-                  - $ref: '#/components/schemas/ChatCompletionResponse'
-                  - $ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
+                $ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
       tags:
         - Inference
       summary: >-
@@ -137,11 +138,12 @@ paths:
             If stream=False, returns a CompletionResponse with the full completion.
             If stream=True, returns an SSE event stream of CompletionResponseStreamChunk
           content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/CompletionResponse'
             text/event-stream:
               schema:
-                oneOf:
-                  - $ref: '#/components/schemas/CompletionResponse'
-                  - $ref: '#/components/schemas/CompletionResponseStreamChunk'
+                $ref: '#/components/schemas/CompletionResponseStreamChunk'
       tags:
         - Inference
       summary: >-
@@ -202,11 +204,12 @@ paths:
             A single turn in an interaction with an Agentic System. **OR** streamed
             agent turn completion response.
           content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Turn'
             text/event-stream:
               schema:
-                oneOf:
-                  - $ref: '#/components/schemas/Turn'
-                  - $ref: '#/components/schemas/AgentTurnResponseStreamChunk'
+                $ref: '#/components/schemas/AgentTurnResponseStreamChunk'
       tags:
         - Agents
       parameters:
@@ -1466,19 +1469,28 @@ components:
           type: string
           const: image
           default: image
+          description: >-
+            Discriminator type of the content item. Always "image"
         image:
           type: object
           properties:
             url:
               $ref: '#/components/schemas/URL'
+              description: >-
+                A URL of the image or data URL in the format of data:image/{type};base64,{data}.
+                Note that URL could have length limits.
             data:
               type: string
               contentEncoding: base64
+              description: base64 encoded image data as string
           additionalProperties: false
+          description: >-
+            Image as a base64 encoded string or an URL
       additionalProperties: false
       required:
         - type
         - image
+      title: A image content item
     InterleavedContent:
       oneOf:
         - type: string
@@ -1598,12 +1610,16 @@ components:
           type: string
           const: text
           default: text
+          description: >-
+            Discriminator type of the content item. Always "text"
         text:
           type: string
+          description: Text content
       additionalProperties: false
       required:
         - type
         - text
+      title: A text content item
     ToolCall:
       type: object
       properties:
@@ -2381,154 +2397,6 @@ components:
       additionalProperties: false
       required:
         - messages
-    AgentTurnResponseEvent:
-      type: object
-      properties:
-        payload:
-          $ref: '#/components/schemas/AgentTurnResponseEventPayload'
-      additionalProperties: false
-      required:
-        - payload
-    AgentTurnResponseEventPayload:
-      oneOf:
-        - $ref: '#/components/schemas/AgentTurnResponseStepStartPayload'
-        - $ref: '#/components/schemas/AgentTurnResponseStepProgressPayload'
-        - $ref: '#/components/schemas/AgentTurnResponseStepCompletePayload'
-        - $ref: '#/components/schemas/AgentTurnResponseTurnStartPayload'
-        - $ref: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
-      discriminator:
-        propertyName: event_type
-        mapping:
-          step_start: '#/components/schemas/AgentTurnResponseStepStartPayload'
-          step_progress: '#/components/schemas/AgentTurnResponseStepProgressPayload'
-          step_complete: '#/components/schemas/AgentTurnResponseStepCompletePayload'
-          turn_start: '#/components/schemas/AgentTurnResponseTurnStartPayload'
-          turn_complete: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
-    AgentTurnResponseStepCompletePayload:
-      type: object
-      properties:
-        event_type:
-          type: string
-          const: step_complete
-          default: step_complete
-        step_type:
-          type: string
-          enum:
-            - inference
-            - tool_execution
-            - shield_call
-            - memory_retrieval
-        step_id:
-          type: string
-        step_details:
-          oneOf:
-            - $ref: '#/components/schemas/InferenceStep'
-            - $ref: '#/components/schemas/ToolExecutionStep'
-            - $ref: '#/components/schemas/ShieldCallStep'
-            - $ref: '#/components/schemas/MemoryRetrievalStep'
-          discriminator:
-            propertyName: step_type
-            mapping:
-              inference: '#/components/schemas/InferenceStep'
-              tool_execution: '#/components/schemas/ToolExecutionStep'
-              shield_call: '#/components/schemas/ShieldCallStep'
-              memory_retrieval: '#/components/schemas/MemoryRetrievalStep'
-      additionalProperties: false
-      required:
-        - event_type
-        - step_type
-        - step_id
-        - step_details
-    AgentTurnResponseStepProgressPayload:
-      type: object
-      properties:
-        event_type:
-          type: string
-          const: step_progress
-          default: step_progress
-        step_type:
-          type: string
-          enum:
-            - inference
-            - tool_execution
-            - shield_call
-            - memory_retrieval
-        step_id:
-          type: string
-        delta:
-          $ref: '#/components/schemas/ContentDelta'
-      additionalProperties: false
-      required:
-        - event_type
-        - step_type
-        - step_id
-        - delta
-    AgentTurnResponseStepStartPayload:
-      type: object
-      properties:
-        event_type:
-          type: string
-          const: step_start
-          default: step_start
-        step_type:
-          type: string
-          enum:
-            - inference
-            - tool_execution
-            - shield_call
-            - memory_retrieval
-        step_id:
-          type: string
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-      additionalProperties: false
-      required:
-        - event_type
-        - step_type
-        - step_id
-    AgentTurnResponseStreamChunk:
-      type: object
-      properties:
-        event:
-          $ref: '#/components/schemas/AgentTurnResponseEvent'
-      additionalProperties: false
-      required:
-        - event
-      title: streamed agent turn completion response.
-    AgentTurnResponseTurnCompletePayload:
-      type: object
-      properties:
-        event_type:
-          type: string
-          const: turn_complete
-          default: turn_complete
-        turn:
-          $ref: '#/components/schemas/Turn'
-      additionalProperties: false
-      required:
-        - event_type
-        - turn
-    AgentTurnResponseTurnStartPayload:
-      type: object
-      properties:
-        event_type:
-          type: string
-          const: turn_start
-          default: turn_start
-        turn_id:
-          type: string
-      additionalProperties: false
-      required:
-        - event_type
-        - turn_id
     InferenceStep:
       type: object
       properties:
@@ -2752,6 +2620,154 @@ components:
         - info
         - warn
         - error
+    AgentTurnResponseEvent:
+      type: object
+      properties:
+        payload:
+          $ref: '#/components/schemas/AgentTurnResponseEventPayload'
+      additionalProperties: false
+      required:
+        - payload
+    AgentTurnResponseEventPayload:
+      oneOf:
+        - $ref: '#/components/schemas/AgentTurnResponseStepStartPayload'
+        - $ref: '#/components/schemas/AgentTurnResponseStepProgressPayload'
+        - $ref: '#/components/schemas/AgentTurnResponseStepCompletePayload'
+        - $ref: '#/components/schemas/AgentTurnResponseTurnStartPayload'
+        - $ref: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
+      discriminator:
+        propertyName: event_type
+        mapping:
+          step_start: '#/components/schemas/AgentTurnResponseStepStartPayload'
+          step_progress: '#/components/schemas/AgentTurnResponseStepProgressPayload'
+          step_complete: '#/components/schemas/AgentTurnResponseStepCompletePayload'
+          turn_start: '#/components/schemas/AgentTurnResponseTurnStartPayload'
+          turn_complete: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
+    AgentTurnResponseStepCompletePayload:
+      type: object
+      properties:
+        event_type:
+          type: string
+          const: step_complete
+          default: step_complete
+        step_type:
+          type: string
+          enum:
+            - inference
+            - tool_execution
+            - shield_call
+            - memory_retrieval
+        step_id:
+          type: string
+        step_details:
+          oneOf:
+            - $ref: '#/components/schemas/InferenceStep'
+            - $ref: '#/components/schemas/ToolExecutionStep'
+            - $ref: '#/components/schemas/ShieldCallStep'
+            - $ref: '#/components/schemas/MemoryRetrievalStep'
+          discriminator:
+            propertyName: step_type
+            mapping:
+              inference: '#/components/schemas/InferenceStep'
+              tool_execution: '#/components/schemas/ToolExecutionStep'
+              shield_call: '#/components/schemas/ShieldCallStep'
+              memory_retrieval: '#/components/schemas/MemoryRetrievalStep'
+      additionalProperties: false
+      required:
+        - event_type
+        - step_type
+        - step_id
+        - step_details
+    AgentTurnResponseStepProgressPayload:
+      type: object
+      properties:
+        event_type:
+          type: string
+          const: step_progress
+          default: step_progress
+        step_type:
+          type: string
+          enum:
+            - inference
+            - tool_execution
+            - shield_call
+            - memory_retrieval
+        step_id:
+          type: string
+        delta:
+          $ref: '#/components/schemas/ContentDelta'
+      additionalProperties: false
+      required:
+        - event_type
+        - step_type
+        - step_id
+        - delta
+    AgentTurnResponseStepStartPayload:
+      type: object
+      properties:
+        event_type:
+          type: string
+          const: step_start
+          default: step_start
+        step_type:
+          type: string
+          enum:
+            - inference
+            - tool_execution
+            - shield_call
+            - memory_retrieval
+        step_id:
+          type: string
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+      additionalProperties: false
+      required:
+        - event_type
+        - step_type
+        - step_id
+    AgentTurnResponseStreamChunk:
+      type: object
+      properties:
+        event:
+          $ref: '#/components/schemas/AgentTurnResponseEvent'
+      additionalProperties: false
+      required:
+        - event
+      title: streamed agent turn completion response.
+    AgentTurnResponseTurnCompletePayload:
+      type: object
+      properties:
+        event_type:
+          type: string
+          const: turn_complete
+          default: turn_complete
+        turn:
+          $ref: '#/components/schemas/Turn'
+      additionalProperties: false
+      required:
+        - event_type
+        - turn
+    AgentTurnResponseTurnStartPayload:
+      type: object
+      properties:
+        event_type:
+          type: string
+          const: turn_start
+          default: turn_start
+        turn_id:
+          type: string
+      additionalProperties: false
+      required:
+        - event_type
+        - turn_id
     EmbeddingsRequest:
       type: object
       properties:
diff --git a/llama_stack/apis/common/content_types.py b/llama_stack/apis/common/content_types.py
index 0b27a0196..8e56f59b1 100644
--- a/llama_stack/apis/common/content_types.py
+++ b/llama_stack/apis/common/content_types.py
@@ -4,14 +4,13 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import base64
 from enum import Enum
 from typing import Annotated, List, Literal, Optional, Union
 
 from llama_models.llama3.api.datatypes import ToolCall
 
 from llama_models.schema_utils import json_schema_type, register_schema
-from pydantic import BaseModel, Field, field_serializer, model_validator
+from pydantic import BaseModel, Field, model_validator
 
 
 @json_schema_type
@@ -20,8 +19,16 @@ class URL(BaseModel):
 
 
 class _URLOrData(BaseModel):
+    """
+    A URL or a base64 encoded string
+
+    :param url: A URL of the image or data URL in the format of data:image/{type};base64,{data}. Note that URL could have length limits.
+    :param data: base64 encoded image data as string
+    """
+
     url: Optional[URL] = None
-    data: Optional[bytes] = None
+    # data is a base64 encoded string, hint with contentEncoding=base64
+    data: Optional[str] = Field(contentEncoding="base64", default=None)
 
     @model_validator(mode="before")
     @classmethod
@@ -30,21 +37,27 @@ class _URLOrData(BaseModel):
             return values
         return {"url": values}
 
-    @field_serializer("data")
-    def serialize_data(self, data: Optional[bytes], _info):
-        if data is None:
-            return None
-        return base64.b64encode(data).decode("utf-8")
-
 
 @json_schema_type
 class ImageContentItem(BaseModel):
+    """A image content item
+
+    :param type: Discriminator type of the content item. Always "image"
+    :param image: Image as a base64 encoded string or an URL
+    """
+
     type: Literal["image"] = "image"
     image: _URLOrData
 
 
 @json_schema_type
 class TextContentItem(BaseModel):
+    """A text content item
+
+    :param type: Discriminator type of the content item. Always "text"
+    :param text: Text content
+    """
+
     type: Literal["text"] = "text"
     text: str
 
diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
index eca7364d7..706dd74f1 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
@@ -66,6 +66,7 @@ from llama_stack.apis.vector_io import VectorIO
 from llama_stack.providers.utils.kvstore import KVStore
 from llama_stack.providers.utils.memory.vector_store import concat_interleaved_content
 from llama_stack.providers.utils.telemetry import tracing
+
 from .persistence import AgentPersistence
 from .safety import SafetyException, ShieldRunnerMixin
 
@@ -476,9 +477,12 @@ class ChatAgent(ShieldRunnerMixin):
                 )
                 span.set_attribute("output", retrieved_context)
                 span.set_attribute("tool_name", MEMORY_QUERY_TOOL)
-                if retrieved_context:
-                    last_message = input_messages[-1]
-                    last_message.context = retrieved_context
+
+                # append retrieved_context to the last user message
+                for message in input_messages[::-1]:
+                    if isinstance(message, UserMessage):
+                        message.context = retrieved_context
+                        break
 
         output_attachments = []
 
diff --git a/llama_stack/providers/utils/inference/prompt_adapter.py b/llama_stack/providers/utils/inference/prompt_adapter.py
index f5298d844..e49771980 100644
--- a/llama_stack/providers/utils/inference/prompt_adapter.py
+++ b/llama_stack/providers/utils/inference/prompt_adapter.py
@@ -135,7 +135,8 @@ async def interleaved_content_convert_to_raw(
                 else:
                     raise ValueError("Unsupported URL type")
             elif image.data:
-                data = image.data
+                # data is a base64 encoded string, decode it to bytes for RawMediaItem
+                data = base64.b64decode(image.data)
             else:
                 raise ValueError("No data or URL provided")
 
@@ -184,8 +185,10 @@ async def localize_image_content(media: ImageContentItem) -> Tuple[bytes, str]:
 
         return content, format
     else:
-        pil_image = PIL_Image.open(io.BytesIO(image.data))
-        return image.data, pil_image.format
+        # data is a base64 encoded string, decode it to bytes first
+        data_bytes = base64.b64decode(image.data)
+        pil_image = PIL_Image.open(io.BytesIO(data_bytes))
+        return data_bytes, pil_image.format
 
 
 async def convert_image_content_to_url(
diff --git a/tests/client-sdk/agents/test_agents.py b/tests/client-sdk/agents/test_agents.py
index 4a8fdd36a..e0f86e3d7 100644
--- a/tests/client-sdk/agents/test_agents.py
+++ b/tests/client-sdk/agents/test_agents.py
@@ -211,7 +211,7 @@ def test_code_interpreter_for_attachments(llama_stack_client, agent_config):
     }
 
     codex_agent = Agent(llama_stack_client, agent_config)
-    session_id = codex_agent.create_session("test-session")
+    session_id = codex_agent.create_session(f"test-session-{uuid4()}")
     inflation_doc = AgentDocument(
         content="https://raw.githubusercontent.com/meta-llama/llama-stack-apps/main/examples/resources/inflation.csv",
         mime_type="text/csv",
@@ -285,7 +285,8 @@ def test_rag_agent(llama_stack_client, agent_config):
     llama_stack_client.tool_runtime.rag_tool.insert(
         documents=documents,
         vector_db_id=vector_db_id,
-        chunk_size_in_tokens=512,
+        # small chunks help to get specific info out of the docs
+        chunk_size_in_tokens=128,
     )
     agent_config = {
         **agent_config,
@@ -299,11 +300,15 @@ def test_rag_agent(llama_stack_client, agent_config):
         ],
     }
     rag_agent = Agent(llama_stack_client, agent_config)
-    session_id = rag_agent.create_session("test-session")
+    session_id = rag_agent.create_session(f"test-session-{uuid4()}")
     user_prompts = [
-        "What are the top 5 topics that were explained? Only list succinct bullet points.",
+        (
+            "Instead of the standard multi-head attention, what attention type does Llama3-8B use?",
+            "grouped-query",
+        ),
+        ("What command to use to get access to Llama3-8B-Instruct ?", "tune download"),
     ]
-    for prompt in user_prompts:
+    for prompt, expected_kw in user_prompts:
         print(f"User> {prompt}")
         response = rag_agent.create_turn(
             messages=[{"role": "user", "content": prompt}],
@@ -312,3 +317,69 @@ def test_rag_agent(llama_stack_client, agent_config):
         logs = [str(log) for log in EventLogger().log(response) if log is not None]
         logs_str = "".join(logs)
         assert "Tool:query_from_memory" in logs_str
+        assert expected_kw in logs_str.lower()
+
+
+def test_rag_and_code_agent(llama_stack_client, agent_config):
+    urls = ["chat.rst"]
+    documents = [
+        Document(
+            document_id=f"num-{i}",
+            content=f"https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/{url}",
+            mime_type="text/plain",
+            metadata={},
+        )
+        for i, url in enumerate(urls)
+    ]
+    vector_db_id = "test-vector-db"
+    llama_stack_client.vector_dbs.register(
+        vector_db_id=vector_db_id,
+        embedding_model="all-MiniLM-L6-v2",
+        embedding_dimension=384,
+    )
+    llama_stack_client.tool_runtime.rag_tool.insert(
+        documents=documents,
+        vector_db_id=vector_db_id,
+        chunk_size_in_tokens=128,
+    )
+    agent_config = {
+        **agent_config,
+        "toolgroups": [
+            dict(
+                name="builtin::rag",
+                args={"vector_db_ids": [vector_db_id]},
+            ),
+            "builtin::code_interpreter",
+        ],
+    }
+    agent = Agent(llama_stack_client, agent_config)
+    inflation_doc = Document(
+        document_id="test_csv",
+        content="https://raw.githubusercontent.com/meta-llama/llama-stack-apps/main/examples/resources/inflation.csv",
+        mime_type="text/csv",
+        metadata={},
+    )
+    user_prompts = [
+        (
+            "Here is a csv file, can you describe it?",
+            [inflation_doc],
+            "code_interpreter",
+        ),
+        (
+            "What are the top 5 topics that were explained? Only list succinct bullet points.",
+            [],
+            "query_from_memory",
+        ),
+    ]
+
+    for prompt, docs, tool_name in user_prompts:
+        print(f"User> {prompt}")
+        session_id = agent.create_session(f"test-session-{uuid4()}")
+        response = agent.create_turn(
+            messages=[{"role": "user", "content": prompt}],
+            session_id=session_id,
+            documents=docs,
+        )
+        logs = [str(log) for log in EventLogger().log(response) if log is not None]
+        logs_str = "".join(logs)
+        assert f"Tool:{tool_name}" in logs_str
diff --git a/tests/client-sdk/inference/test_inference.py b/tests/client-sdk/inference/test_inference.py
index 6dff1be24..b10ede357 100644
--- a/tests/client-sdk/inference/test_inference.py
+++ b/tests/client-sdk/inference/test_inference.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 import base64
-import os
+import pathlib
 
 import pytest
 from pydantic import BaseModel
@@ -57,13 +57,20 @@ def get_weather_tool_definition():
 
 
 @pytest.fixture
-def base64_image_url():
-    image_path = os.path.join(os.path.dirname(__file__), "dog.png")
-    with open(image_path, "rb") as image_file:
-        # Convert the image to base64
-        base64_string = base64.b64encode(image_file.read()).decode("utf-8")
-        base64_url = f"data:image/png;base64,{base64_string}"
-        return base64_url
+def image_path():
+    return pathlib.Path(__file__).parent / "dog.png"
+
+
+@pytest.fixture
+def base64_image_data(image_path):
+    # Convert the image to base64
+    return base64.b64encode(image_path.read_bytes()).decode("utf-8")
+
+
+@pytest.fixture
+def base64_image_url(base64_image_data, image_path):
+    # suffix includes the ., so we remove it
+    return f"data:image/{image_path.suffix[1:]};base64,{base64_image_data}"
 
 
 def test_text_completion_non_streaming(llama_stack_client, text_model_id):
@@ -371,20 +378,31 @@ def test_image_chat_completion_streaming(llama_stack_client, vision_model_id):
     assert any(expected in streamed_content for expected in {"dog", "puppy", "pup"})
 
 
-def test_image_chat_completion_base64_url(
-    llama_stack_client, vision_model_id, base64_image_url
+@pytest.mark.parametrize("type_", ["url", "data"])
+def test_image_chat_completion_base64(
+    llama_stack_client, vision_model_id, base64_image_data, base64_image_url, type_
 ):
+    image_spec = {
+        "url": {
+            "type": "image",
+            "image": {
+                "url": {
+                    "uri": base64_image_url,
+                },
+            },
+        },
+        "data": {
+            "type": "image",
+            "image": {
+                "data": base64_image_data,
+            },
+        },
+    }[type_]
+
     message = {
         "role": "user",
         "content": [
-            {
-                "type": "image",
-                "image": {
-                    "url": {
-                        "uri": base64_image_url,
-                    },
-                },
-            },
+            image_spec,
             {
                 "type": "text",
                 "text": "Describe what is in this image.",