Merge branch 'main' into add-image-support

2025-08-07 02:58:21 +00:00 · 2025-01-31 08:59:33 -05:00 · 2025-01-31 08:59:33 -05:00 · a8027d25ca
commit a8027d25ca
parent bcd14cc2d3 15dcc4ea5e
9 changed files with 637 additions and 476 deletions
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@ -177,20 +177,37 @@ class ContentBuilder:
    ) -> Dict[str, MediaType]:
        "Creates the content subtree for a request or response."
-        def has_iterator_type(t):
+        def is_iterator_type(t):
-            if typing.get_origin(t) is typing.Union:
+            return "StreamChunk" in str(t)
-                return any(has_iterator_type(a) for a in typing.get_args(t))
+
        def get_media_type(t):
            if is_generic_list(t):
                return "application/jsonl"
            elif is_iterator_type(t):
                return "text/event-stream"
            else:
-                # TODO: needs a proper fix where we let all types correctly flow upwards
+                return "application/json"
-                # and then test against AsyncIterator
+
-                return "StreamChunk" in str(t)
+        if typing.get_origin(payload_type) is typing.Union:
            media_types = []
            item_types = []
            for x in typing.get_args(payload_type):
                media_types.append(get_media_type(x))
                item_types.append(x)
            if len(set(media_types)) == 1:
                # all types have the same media type
                return {media_types[0]: self.build_media_type(payload_type, examples)}
            else:
                # different types have different media types
                return {
                    media_type: self.build_media_type(item_type, examples)
                    for media_type, item_type in zip(media_types, item_types)
                }
        if is_generic_list(payload_type):
            media_type = "application/jsonl"
            item_type = unwrap_generic_list(payload_type)
        elif has_iterator_type(payload_type):
            item_type = payload_type
            media_type = "text/event-stream"
        else:
            media_type = "application/json"
            item_type = payload_type
--- a/docs/openapi_generator/strong_typing/schema.py
+++ b/docs/openapi_generator/strong_typing/schema.py
@ -248,7 +248,9 @@ class JsonSchemaGenerator:
                type_schema.update(self._metadata_to_schema(m))
        return type_schema
-    def _simple_type_to_schema(self, typ: TypeLike) -> Optional[Schema]:
+    def _simple_type_to_schema(
        self, typ: TypeLike, json_schema_extra: Optional[dict] = None
    ) -> Optional[Schema]:
        """
        Returns the JSON schema associated with a simple, unrestricted type.
@ -264,6 +266,11 @@ class JsonSchemaGenerator:
        elif typ is float:
            return {"type": "number"}
        elif typ is str:
            if json_schema_extra and "contentEncoding" in json_schema_extra:
                return {
                    "type": "string",
                    "contentEncoding": json_schema_extra["contentEncoding"],
                }
            return {"type": "string"}
        elif typ is bytes:
            return {"type": "string", "contentEncoding": "base64"}
@ -303,7 +310,12 @@ class JsonSchemaGenerator:
            # not a simple type
            return None
-    def type_to_schema(self, data_type: TypeLike, force_expand: bool = False) -> Schema:
+    def type_to_schema(
        self,
        data_type: TypeLike,
        force_expand: bool = False,
        json_schema_extra: Optional[dict] = None,
    ) -> Schema:
        """
        Returns the JSON schema associated with a type.
@ -313,7 +325,7 @@ class JsonSchemaGenerator:
        """
        # short-circuit for common simple types
-        schema = self._simple_type_to_schema(data_type)
+        schema = self._simple_type_to_schema(data_type, json_schema_extra)
        if schema is not None:
            return schema
@ -486,15 +498,9 @@ class JsonSchemaGenerator:
        property_docstrings = get_class_property_docstrings(
            typ, self.options.property_description_fun
        )
        properties: Dict[str, Schema] = {}
        required: List[str] = []
        for property_name, property_type in get_class_properties(typ):
            defaults = {}
            if "model_fields" in members:
                f = members["model_fields"]
                defaults = {k: finfo.default for k, finfo in f.items()}
            # rename property if an alias name is specified
            alias = get_annotation(property_type, Alias)
            if alias:
@ -502,11 +508,22 @@ class JsonSchemaGenerator:
            else:
                output_name = property_name
            defaults = {}
            json_schema_extra = None
            if "model_fields" in members:
                f = members["model_fields"]
                defaults = {k: finfo.default for k, finfo in f.items()}
                json_schema_extra = f.get(output_name, None).json_schema_extra
            if is_type_optional(property_type):
                optional_type: type = unwrap_optional_type(property_type)
-                property_def = self.type_to_schema(optional_type)
+                property_def = self.type_to_schema(
                    optional_type, json_schema_extra=json_schema_extra
                )
            else:
-                property_def = self.type_to_schema(property_type)
+                property_def = self.type_to_schema(
                    property_type, json_schema_extra=json_schema_extra
                )
                required.append(output_name)
            # check if attribute has a default value initializer
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
@ -192,16 +192,14 @@
                    "200": {
                        "description": "If stream=False, returns a ChatCompletionResponse with the full completion. If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/ChatCompletionResponse"
                                }
                            },
                            "text/event-stream": {
                                "schema": {
-                                    "oneOf": [
+                                    "$ref": "#/components/schemas/ChatCompletionResponseStreamChunk"
                                        {
                                            "$ref": "#/components/schemas/ChatCompletionResponse"
                                        },
                                        {
                                            "$ref": "#/components/schemas/ChatCompletionResponseStreamChunk"
                                        }
                                    ]
                                }
                            }
                        }
@ -230,16 +228,14 @@
                    "200": {
                        "description": "If stream=False, returns a CompletionResponse with the full completion. If stream=True, returns an SSE event stream of CompletionResponseStreamChunk",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/CompletionResponse"
                                }
                            },
                            "text/event-stream": {
                                "schema": {
-                                    "oneOf": [
+                                    "$ref": "#/components/schemas/CompletionResponseStreamChunk"
                                        {
                                            "$ref": "#/components/schemas/CompletionResponse"
                                        },
                                        {
                                            "$ref": "#/components/schemas/CompletionResponseStreamChunk"
                                        }
                                    ]
                                }
                            }
                        }
@ -337,16 +333,14 @@
                    "200": {
                        "description": "A single turn in an interaction with an Agentic System. **OR** streamed agent turn completion response.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/Turn"
                                }
                            },
                            "text/event-stream": {
                                "schema": {
-                                    "oneOf": [
+                                    "$ref": "#/components/schemas/AgentTurnResponseStreamChunk"
                                        {
                                            "$ref": "#/components/schemas/Turn"
                                        },
                                        {
                                            "$ref": "#/components/schemas/AgentTurnResponseStreamChunk"
                                        }
                                    ]
                                }
                            }
                        }
@ -2439,27 +2433,32 @@
                    "type": {
                        "type": "string",
                        "const": "image",
-                        "default": "image"
+                        "default": "image",
                        "description": "Discriminator type of the content item. Always \"image\""
                    },
                    "image": {
                        "type": "object",
                        "properties": {
                            "url": {
-                                "$ref": "#/components/schemas/URL"
+                                "$ref": "#/components/schemas/URL",
                                "description": "A URL of the image or data URL in the format of data:image/{type};base64,{data}. Note that URL could have length limits."
                            },
                            "data": {
                                "type": "string",
-                                "contentEncoding": "base64"
+                                "contentEncoding": "base64",
                                "description": "base64 encoded image data as string"
                            }
                        },
-                        "additionalProperties": false
+                        "additionalProperties": false,
                        "description": "Image as a base64 encoded string or an URL"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type",
                    "image"
-                ]
+                ],
                "title": "A image content item"
            },
            "InterleavedContent": {
                "oneOf": [
@ -2647,17 +2646,20 @@
                    "type": {
                        "type": "string",
                        "const": "text",
-                        "default": "text"
+                        "default": "text",
                        "description": "Discriminator type of the content item. Always \"text\""
                    },
                    "text": {
-                        "type": "string"
+                        "type": "string",
                        "description": "Text content"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type",
                    "text"
-                ]
+                ],
                "title": "A text content item"
            },
            "ToolCall": {
                "type": "object",
@ -3772,235 +3774,6 @@
                    "messages"
                ]
            },
            "AgentTurnResponseEvent": {
                "type": "object",
                "properties": {
                    "payload": {
                        "$ref": "#/components/schemas/AgentTurnResponseEventPayload"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "payload"
                ]
            },
            "AgentTurnResponseEventPayload": {
                "oneOf": [
                    {
                        "$ref": "#/components/schemas/AgentTurnResponseStepStartPayload"
                    },
                    {
                        "$ref": "#/components/schemas/AgentTurnResponseStepProgressPayload"
                    },
                    {
                        "$ref": "#/components/schemas/AgentTurnResponseStepCompletePayload"
                    },
                    {
                        "$ref": "#/components/schemas/AgentTurnResponseTurnStartPayload"
                    },
                    {
                        "$ref": "#/components/schemas/AgentTurnResponseTurnCompletePayload"
                    }
                ],
                "discriminator": {
                    "propertyName": "event_type",
                    "mapping": {
                        "step_start": "#/components/schemas/AgentTurnResponseStepStartPayload",
                        "step_progress": "#/components/schemas/AgentTurnResponseStepProgressPayload",
                        "step_complete": "#/components/schemas/AgentTurnResponseStepCompletePayload",
                        "turn_start": "#/components/schemas/AgentTurnResponseTurnStartPayload",
                        "turn_complete": "#/components/schemas/AgentTurnResponseTurnCompletePayload"
                    }
                }
            },
            "AgentTurnResponseStepCompletePayload": {
                "type": "object",
                "properties": {
                    "event_type": {
                        "type": "string",
                        "const": "step_complete",
                        "default": "step_complete"
                    },
                    "step_type": {
                        "type": "string",
                        "enum": [
                            "inference",
                            "tool_execution",
                            "shield_call",
                            "memory_retrieval"
                        ]
                    },
                    "step_id": {
                        "type": "string"
                    },
                    "step_details": {
                        "oneOf": [
                            {
                                "$ref": "#/components/schemas/InferenceStep"
                            },
                            {
                                "$ref": "#/components/schemas/ToolExecutionStep"
                            },
                            {
                                "$ref": "#/components/schemas/ShieldCallStep"
                            },
                            {
                                "$ref": "#/components/schemas/MemoryRetrievalStep"
                            }
                        ],
                        "discriminator": {
                            "propertyName": "step_type",
                            "mapping": {
                                "inference": "#/components/schemas/InferenceStep",
                                "tool_execution": "#/components/schemas/ToolExecutionStep",
                                "shield_call": "#/components/schemas/ShieldCallStep",
                                "memory_retrieval": "#/components/schemas/MemoryRetrievalStep"
                            }
                        }
                    }
                },
                "additionalProperties": false,
                "required": [
                    "event_type",
                    "step_type",
                    "step_id",
                    "step_details"
                ]
            },
            "AgentTurnResponseStepProgressPayload": {
                "type": "object",
                "properties": {
                    "event_type": {
                        "type": "string",
                        "const": "step_progress",
                        "default": "step_progress"
                    },
                    "step_type": {
                        "type": "string",
                        "enum": [
                            "inference",
                            "tool_execution",
                            "shield_call",
                            "memory_retrieval"
                        ]
                    },
                    "step_id": {
                        "type": "string"
                    },
                    "delta": {
                        "$ref": "#/components/schemas/ContentDelta"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "event_type",
                    "step_type",
                    "step_id",
                    "delta"
                ]
            },
            "AgentTurnResponseStepStartPayload": {
                "type": "object",
                "properties": {
                    "event_type": {
                        "type": "string",
                        "const": "step_start",
                        "default": "step_start"
                    },
                    "step_type": {
                        "type": "string",
                        "enum": [
                            "inference",
                            "tool_execution",
                            "shield_call",
                            "memory_retrieval"
                        ]
                    },
                    "step_id": {
                        "type": "string"
                    },
                    "metadata": {
                        "type": "object",
                        "additionalProperties": {
                            "oneOf": [
                                {
                                    "type": "null"
                                },
                                {
                                    "type": "boolean"
                                },
                                {
                                    "type": "number"
                                },
                                {
                                    "type": "string"
                                },
                                {
                                    "type": "array"
                                },
                                {
                                    "type": "object"
                                }
                            ]
                        }
                    }
                },
                "additionalProperties": false,
                "required": [
                    "event_type",
                    "step_type",
                    "step_id"
                ]
            },
            "AgentTurnResponseStreamChunk": {
                "type": "object",
                "properties": {
                    "event": {
                        "$ref": "#/components/schemas/AgentTurnResponseEvent"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "event"
                ],
                "title": "streamed agent turn completion response."
            },
            "AgentTurnResponseTurnCompletePayload": {
                "type": "object",
                "properties": {
                    "event_type": {
                        "type": "string",
                        "const": "turn_complete",
                        "default": "turn_complete"
                    },
                    "turn": {
                        "$ref": "#/components/schemas/Turn"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "event_type",
                    "turn"
                ]
            },
            "AgentTurnResponseTurnStartPayload": {
                "type": "object",
                "properties": {
                    "event_type": {
                        "type": "string",
                        "const": "turn_start",
                        "default": "turn_start"
                    },
                    "turn_id": {
                        "type": "string"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "event_type",
                    "turn_id"
                ]
            },
            "InferenceStep": {
                "type": "object",
                "properties": {
@ -4341,6 +4114,235 @@
                    "error"
                ]
            },
            "AgentTurnResponseEvent": {
                "type": "object",
                "properties": {
                    "payload": {
                        "$ref": "#/components/schemas/AgentTurnResponseEventPayload"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "payload"
                ]
            },
            "AgentTurnResponseEventPayload": {
                "oneOf": [
                    {
                        "$ref": "#/components/schemas/AgentTurnResponseStepStartPayload"
                    },
                    {
                        "$ref": "#/components/schemas/AgentTurnResponseStepProgressPayload"
                    },
                    {
                        "$ref": "#/components/schemas/AgentTurnResponseStepCompletePayload"
                    },
                    {
                        "$ref": "#/components/schemas/AgentTurnResponseTurnStartPayload"
                    },
                    {
                        "$ref": "#/components/schemas/AgentTurnResponseTurnCompletePayload"
                    }
                ],
                "discriminator": {
                    "propertyName": "event_type",
                    "mapping": {
                        "step_start": "#/components/schemas/AgentTurnResponseStepStartPayload",
                        "step_progress": "#/components/schemas/AgentTurnResponseStepProgressPayload",
                        "step_complete": "#/components/schemas/AgentTurnResponseStepCompletePayload",
                        "turn_start": "#/components/schemas/AgentTurnResponseTurnStartPayload",
                        "turn_complete": "#/components/schemas/AgentTurnResponseTurnCompletePayload"
                    }
                }
            },
            "AgentTurnResponseStepCompletePayload": {
                "type": "object",
                "properties": {
                    "event_type": {
                        "type": "string",
                        "const": "step_complete",
                        "default": "step_complete"
                    },
                    "step_type": {
                        "type": "string",
                        "enum": [
                            "inference",
                            "tool_execution",
                            "shield_call",
                            "memory_retrieval"
                        ]
                    },
                    "step_id": {
                        "type": "string"
                    },
                    "step_details": {
                        "oneOf": [
                            {
                                "$ref": "#/components/schemas/InferenceStep"
                            },
                            {
                                "$ref": "#/components/schemas/ToolExecutionStep"
                            },
                            {
                                "$ref": "#/components/schemas/ShieldCallStep"
                            },
                            {
                                "$ref": "#/components/schemas/MemoryRetrievalStep"
                            }
                        ],
                        "discriminator": {
                            "propertyName": "step_type",
                            "mapping": {
                                "inference": "#/components/schemas/InferenceStep",
                                "tool_execution": "#/components/schemas/ToolExecutionStep",
                                "shield_call": "#/components/schemas/ShieldCallStep",
                                "memory_retrieval": "#/components/schemas/MemoryRetrievalStep"
                            }
                        }
                    }
                },
                "additionalProperties": false,
                "required": [
                    "event_type",
                    "step_type",
                    "step_id",
                    "step_details"
                ]
            },
            "AgentTurnResponseStepProgressPayload": {
                "type": "object",
                "properties": {
                    "event_type": {
                        "type": "string",
                        "const": "step_progress",
                        "default": "step_progress"
                    },
                    "step_type": {
                        "type": "string",
                        "enum": [
                            "inference",
                            "tool_execution",
                            "shield_call",
                            "memory_retrieval"
                        ]
                    },
                    "step_id": {
                        "type": "string"
                    },
                    "delta": {
                        "$ref": "#/components/schemas/ContentDelta"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "event_type",
                    "step_type",
                    "step_id",
                    "delta"
                ]
            },
            "AgentTurnResponseStepStartPayload": {
                "type": "object",
                "properties": {
                    "event_type": {
                        "type": "string",
                        "const": "step_start",
                        "default": "step_start"
                    },
                    "step_type": {
                        "type": "string",
                        "enum": [
                            "inference",
                            "tool_execution",
                            "shield_call",
                            "memory_retrieval"
                        ]
                    },
                    "step_id": {
                        "type": "string"
                    },
                    "metadata": {
                        "type": "object",
                        "additionalProperties": {
                            "oneOf": [
                                {
                                    "type": "null"
                                },
                                {
                                    "type": "boolean"
                                },
                                {
                                    "type": "number"
                                },
                                {
                                    "type": "string"
                                },
                                {
                                    "type": "array"
                                },
                                {
                                    "type": "object"
                                }
                            ]
                        }
                    }
                },
                "additionalProperties": false,
                "required": [
                    "event_type",
                    "step_type",
                    "step_id"
                ]
            },
            "AgentTurnResponseStreamChunk": {
                "type": "object",
                "properties": {
                    "event": {
                        "$ref": "#/components/schemas/AgentTurnResponseEvent"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "event"
                ],
                "title": "streamed agent turn completion response."
            },
            "AgentTurnResponseTurnCompletePayload": {
                "type": "object",
                "properties": {
                    "event_type": {
                        "type": "string",
                        "const": "turn_complete",
                        "default": "turn_complete"
                    },
                    "turn": {
                        "$ref": "#/components/schemas/Turn"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "event_type",
                    "turn"
                ]
            },
            "AgentTurnResponseTurnStartPayload": {
                "type": "object",
                "properties": {
                    "event_type": {
                        "type": "string",
                        "const": "turn_start",
                        "default": "turn_start"
                    },
                    "turn_id": {
                        "type": "string"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "event_type",
                    "turn_id"
                ]
            },
            "EmbeddingsRequest": {
                "type": "object",
                "properties": {
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
@ -113,11 +113,12 @@ paths:
            If stream=False, returns a ChatCompletionResponse with the full completion.
            If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ChatCompletionResponse'
            text/event-stream:
              schema:
-                oneOf:
+                $ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
                  - $ref: '#/components/schemas/ChatCompletionResponse'
                  - $ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
      tags:
        - Inference
      summary: >-
@ -137,11 +138,12 @@ paths:
            If stream=False, returns a CompletionResponse with the full completion.
            If stream=True, returns an SSE event stream of CompletionResponseStreamChunk
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/CompletionResponse'
            text/event-stream:
              schema:
-                oneOf:
+                $ref: '#/components/schemas/CompletionResponseStreamChunk'
                  - $ref: '#/components/schemas/CompletionResponse'
                  - $ref: '#/components/schemas/CompletionResponseStreamChunk'
      tags:
        - Inference
      summary: >-
@ -202,11 +204,12 @@ paths:
            A single turn in an interaction with an Agentic System. **OR** streamed
            agent turn completion response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Turn'
            text/event-stream:
              schema:
-                oneOf:
+                $ref: '#/components/schemas/AgentTurnResponseStreamChunk'
                  - $ref: '#/components/schemas/Turn'
                  - $ref: '#/components/schemas/AgentTurnResponseStreamChunk'
      tags:
        - Agents
      parameters:
@ -1466,19 +1469,28 @@ components:
          type: string
          const: image
          default: image
          description: >-
            Discriminator type of the content item. Always "image"
        image:
          type: object
          properties:
            url:
              $ref: '#/components/schemas/URL'
              description: >-
                A URL of the image or data URL in the format of data:image/{type};base64,{data}.
                Note that URL could have length limits.
            data:
              type: string
              contentEncoding: base64
              description: base64 encoded image data as string
          additionalProperties: false
          description: >-
            Image as a base64 encoded string or an URL
      additionalProperties: false
      required:
        - type
        - image
      title: A image content item
    InterleavedContent:
      oneOf:
        - type: string
@ -1598,12 +1610,16 @@ components:
          type: string
          const: text
          default: text
          description: >-
            Discriminator type of the content item. Always "text"
        text:
          type: string
          description: Text content
      additionalProperties: false
      required:
        - type
        - text
      title: A text content item
    ToolCall:
      type: object
      properties:
@ -2381,154 +2397,6 @@ components:
      additionalProperties: false
      required:
        - messages
    AgentTurnResponseEvent:
      type: object
      properties:
        payload:
          $ref: '#/components/schemas/AgentTurnResponseEventPayload'
      additionalProperties: false
      required:
        - payload
    AgentTurnResponseEventPayload:
      oneOf:
        - $ref: '#/components/schemas/AgentTurnResponseStepStartPayload'
        - $ref: '#/components/schemas/AgentTurnResponseStepProgressPayload'
        - $ref: '#/components/schemas/AgentTurnResponseStepCompletePayload'
        - $ref: '#/components/schemas/AgentTurnResponseTurnStartPayload'
        - $ref: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
      discriminator:
        propertyName: event_type
        mapping:
          step_start: '#/components/schemas/AgentTurnResponseStepStartPayload'
          step_progress: '#/components/schemas/AgentTurnResponseStepProgressPayload'
          step_complete: '#/components/schemas/AgentTurnResponseStepCompletePayload'
          turn_start: '#/components/schemas/AgentTurnResponseTurnStartPayload'
          turn_complete: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
    AgentTurnResponseStepCompletePayload:
      type: object
      properties:
        event_type:
          type: string
          const: step_complete
          default: step_complete
        step_type:
          type: string
          enum:
            - inference
            - tool_execution
            - shield_call
            - memory_retrieval
        step_id:
          type: string
        step_details:
          oneOf:
            - $ref: '#/components/schemas/InferenceStep'
            - $ref: '#/components/schemas/ToolExecutionStep'
            - $ref: '#/components/schemas/ShieldCallStep'
            - $ref: '#/components/schemas/MemoryRetrievalStep'
          discriminator:
            propertyName: step_type
            mapping:
              inference: '#/components/schemas/InferenceStep'
              tool_execution: '#/components/schemas/ToolExecutionStep'
              shield_call: '#/components/schemas/ShieldCallStep'
              memory_retrieval: '#/components/schemas/MemoryRetrievalStep'
      additionalProperties: false
      required:
        - event_type
        - step_type
        - step_id
        - step_details
    AgentTurnResponseStepProgressPayload:
      type: object
      properties:
        event_type:
          type: string
          const: step_progress
          default: step_progress
        step_type:
          type: string
          enum:
            - inference
            - tool_execution
            - shield_call
            - memory_retrieval
        step_id:
          type: string
        delta:
          $ref: '#/components/schemas/ContentDelta'
      additionalProperties: false
      required:
        - event_type
        - step_type
        - step_id
        - delta
    AgentTurnResponseStepStartPayload:
      type: object
      properties:
        event_type:
          type: string
          const: step_start
          default: step_start
        step_type:
          type: string
          enum:
            - inference
            - tool_execution
            - shield_call
            - memory_retrieval
        step_id:
          type: string
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
      additionalProperties: false
      required:
        - event_type
        - step_type
        - step_id
    AgentTurnResponseStreamChunk:
      type: object
      properties:
        event:
          $ref: '#/components/schemas/AgentTurnResponseEvent'
      additionalProperties: false
      required:
        - event
      title: streamed agent turn completion response.
    AgentTurnResponseTurnCompletePayload:
      type: object
      properties:
        event_type:
          type: string
          const: turn_complete
          default: turn_complete
        turn:
          $ref: '#/components/schemas/Turn'
      additionalProperties: false
      required:
        - event_type
        - turn
    AgentTurnResponseTurnStartPayload:
      type: object
      properties:
        event_type:
          type: string
          const: turn_start
          default: turn_start
        turn_id:
          type: string
      additionalProperties: false
      required:
        - event_type
        - turn_id
    InferenceStep:
      type: object
      properties:
@ -2752,6 +2620,154 @@ components:
        - info
        - warn
        - error
    AgentTurnResponseEvent:
      type: object
      properties:
        payload:
          $ref: '#/components/schemas/AgentTurnResponseEventPayload'
      additionalProperties: false
      required:
        - payload
    AgentTurnResponseEventPayload:
      oneOf:
        - $ref: '#/components/schemas/AgentTurnResponseStepStartPayload'
        - $ref: '#/components/schemas/AgentTurnResponseStepProgressPayload'
        - $ref: '#/components/schemas/AgentTurnResponseStepCompletePayload'
        - $ref: '#/components/schemas/AgentTurnResponseTurnStartPayload'
        - $ref: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
      discriminator:
        propertyName: event_type
        mapping:
          step_start: '#/components/schemas/AgentTurnResponseStepStartPayload'
          step_progress: '#/components/schemas/AgentTurnResponseStepProgressPayload'
          step_complete: '#/components/schemas/AgentTurnResponseStepCompletePayload'
          turn_start: '#/components/schemas/AgentTurnResponseTurnStartPayload'
          turn_complete: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
    AgentTurnResponseStepCompletePayload:
      type: object
      properties:
        event_type:
          type: string
          const: step_complete
          default: step_complete
        step_type:
          type: string
          enum:
            - inference
            - tool_execution
            - shield_call
            - memory_retrieval
        step_id:
          type: string
        step_details:
          oneOf:
            - $ref: '#/components/schemas/InferenceStep'
            - $ref: '#/components/schemas/ToolExecutionStep'
            - $ref: '#/components/schemas/ShieldCallStep'
            - $ref: '#/components/schemas/MemoryRetrievalStep'
          discriminator:
            propertyName: step_type
            mapping:
              inference: '#/components/schemas/InferenceStep'
              tool_execution: '#/components/schemas/ToolExecutionStep'
              shield_call: '#/components/schemas/ShieldCallStep'
              memory_retrieval: '#/components/schemas/MemoryRetrievalStep'
      additionalProperties: false
      required:
        - event_type
        - step_type
        - step_id
        - step_details
    AgentTurnResponseStepProgressPayload:
      type: object
      properties:
        event_type:
          type: string
          const: step_progress
          default: step_progress
        step_type:
          type: string
          enum:
            - inference
            - tool_execution
            - shield_call
            - memory_retrieval
        step_id:
          type: string
        delta:
          $ref: '#/components/schemas/ContentDelta'
      additionalProperties: false
      required:
        - event_type
        - step_type
        - step_id
        - delta
    AgentTurnResponseStepStartPayload:
      type: object
      properties:
        event_type:
          type: string
          const: step_start
          default: step_start
        step_type:
          type: string
          enum:
            - inference
            - tool_execution
            - shield_call
            - memory_retrieval
        step_id:
          type: string
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
      additionalProperties: false
      required:
        - event_type
        - step_type
        - step_id
    AgentTurnResponseStreamChunk:
      type: object
      properties:
        event:
          $ref: '#/components/schemas/AgentTurnResponseEvent'
      additionalProperties: false
      required:
        - event
      title: streamed agent turn completion response.
    AgentTurnResponseTurnCompletePayload:
      type: object
      properties:
        event_type:
          type: string
          const: turn_complete
          default: turn_complete
        turn:
          $ref: '#/components/schemas/Turn'
      additionalProperties: false
      required:
        - event_type
        - turn
    AgentTurnResponseTurnStartPayload:
      type: object
      properties:
        event_type:
          type: string
          const: turn_start
          default: turn_start
        turn_id:
          type: string
      additionalProperties: false
      required:
        - event_type
        - turn_id
    EmbeddingsRequest:
      type: object
      properties:
--- a/llama_stack/apis/common/content_types.py
+++ b/llama_stack/apis/common/content_types.py
@ -4,14 +4,13 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import base64
 from enum import Enum
 from typing import Annotated, List, Literal, Optional, Union
 from llama_models.llama3.api.datatypes import ToolCall
 from llama_models.schema_utils import json_schema_type, register_schema
-from pydantic import BaseModel, Field, field_serializer, model_validator
+from pydantic import BaseModel, Field, model_validator
@json_schema_type
@ -20,8 +19,16 @@ class URL(BaseModel):
 class _URLOrData(BaseModel):
    """
    A URL or a base64 encoded string
    :param url: A URL of the image or data URL in the format of data:image/{type};base64,{data}. Note that URL could have length limits.
    :param data: base64 encoded image data as string
    """
    url: Optional[URL] = None
-    data: Optional[bytes] = None
+    # data is a base64 encoded string, hint with contentEncoding=base64
    data: Optional[str] = Field(contentEncoding="base64", default=None)
    @model_validator(mode="before")
    @classmethod
@ -30,21 +37,27 @@ class _URLOrData(BaseModel):
            return values
        return {"url": values}
    @field_serializer("data")
    def serialize_data(self, data: Optional[bytes], _info):
        if data is None:
            return None
        return base64.b64encode(data).decode("utf-8")
@json_schema_type
 class ImageContentItem(BaseModel):
    """A image content item
    :param type: Discriminator type of the content item. Always "image"
    :param image: Image as a base64 encoded string or an URL
    """
    type: Literal["image"] = "image"
    image: _URLOrData
@json_schema_type
 class TextContentItem(BaseModel):
    """A text content item
    :param type: Discriminator type of the content item. Always "text"
    :param text: Text content
    """
    type: Literal["text"] = "text"
    text: str
--- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
@ -66,6 +66,7 @@ from llama_stack.apis.vector_io import VectorIO
 from llama_stack.providers.utils.kvstore import KVStore
 from llama_stack.providers.utils.memory.vector_store import concat_interleaved_content
 from llama_stack.providers.utils.telemetry import tracing
 from .persistence import AgentPersistence
 from .safety import SafetyException, ShieldRunnerMixin
@ -476,9 +477,12 @@ class ChatAgent(ShieldRunnerMixin):
                )
                span.set_attribute("output", retrieved_context)
                span.set_attribute("tool_name", MEMORY_QUERY_TOOL)
-                if retrieved_context:
+
-                    last_message = input_messages[-1]
+                # append retrieved_context to the last user message
-                    last_message.context = retrieved_context
+                for message in input_messages[::-1]:
                    if isinstance(message, UserMessage):
                        message.context = retrieved_context
                        break
        output_attachments = []
--- a/llama_stack/providers/utils/inference/prompt_adapter.py
+++ b/llama_stack/providers/utils/inference/prompt_adapter.py
@ -135,7 +135,8 @@ async def interleaved_content_convert_to_raw(
                else:
                    raise ValueError("Unsupported URL type")
            elif image.data:
-                data = image.data
+                # data is a base64 encoded string, decode it to bytes for RawMediaItem
                data = base64.b64decode(image.data)
            else:
                raise ValueError("No data or URL provided")
@ -184,8 +185,10 @@ async def localize_image_content(media: ImageContentItem) -> Tuple[bytes, str]:
        return content, format
    else:
-        pil_image = PIL_Image.open(io.BytesIO(image.data))
+        # data is a base64 encoded string, decode it to bytes first
-        return image.data, pil_image.format
+        data_bytes = base64.b64decode(image.data)
        pil_image = PIL_Image.open(io.BytesIO(data_bytes))
        return data_bytes, pil_image.format
 async def convert_image_content_to_url(
--- a/tests/client-sdk/agents/test_agents.py
+++ b/tests/client-sdk/agents/test_agents.py
@ -211,7 +211,7 @@ def test_code_interpreter_for_attachments(llama_stack_client, agent_config):
    }
    codex_agent = Agent(llama_stack_client, agent_config)
-    session_id = codex_agent.create_session("test-session")
+    session_id = codex_agent.create_session(f"test-session-{uuid4()}")
    inflation_doc = AgentDocument(
        content="https://raw.githubusercontent.com/meta-llama/llama-stack-apps/main/examples/resources/inflation.csv",
        mime_type="text/csv",
@ -285,7 +285,8 @@ def test_rag_agent(llama_stack_client, agent_config):
    llama_stack_client.tool_runtime.rag_tool.insert(
        documents=documents,
        vector_db_id=vector_db_id,
-        chunk_size_in_tokens=512,
+        # small chunks help to get specific info out of the docs
        chunk_size_in_tokens=128,
    )
    agent_config = {
        **agent_config,
@ -299,11 +300,15 @@ def test_rag_agent(llama_stack_client, agent_config):
        ],
    }
    rag_agent = Agent(llama_stack_client, agent_config)
-    session_id = rag_agent.create_session("test-session")
+    session_id = rag_agent.create_session(f"test-session-{uuid4()}")
    user_prompts = [
-        "What are the top 5 topics that were explained? Only list succinct bullet points.",
+        (
            "Instead of the standard multi-head attention, what attention type does Llama3-8B use?",
            "grouped-query",
        ),
        ("What command to use to get access to Llama3-8B-Instruct ?", "tune download"),
    ]
-    for prompt in user_prompts:
+    for prompt, expected_kw in user_prompts:
        print(f"User> {prompt}")
        response = rag_agent.create_turn(
            messages=[{"role": "user", "content": prompt}],
@ -312,3 +317,69 @@ def test_rag_agent(llama_stack_client, agent_config):
        logs = [str(log) for log in EventLogger().log(response) if log is not None]
        logs_str = "".join(logs)
        assert "Tool:query_from_memory" in logs_str
        assert expected_kw in logs_str.lower()
 def test_rag_and_code_agent(llama_stack_client, agent_config):
    urls = ["chat.rst"]
    documents = [
        Document(
            document_id=f"num-{i}",
            content=f"https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/{url}",
            mime_type="text/plain",
            metadata={},
        )
        for i, url in enumerate(urls)
    ]
    vector_db_id = "test-vector-db"
    llama_stack_client.vector_dbs.register(
        vector_db_id=vector_db_id,
        embedding_model="all-MiniLM-L6-v2",
        embedding_dimension=384,
    )
    llama_stack_client.tool_runtime.rag_tool.insert(
        documents=documents,
        vector_db_id=vector_db_id,
        chunk_size_in_tokens=128,
    )
    agent_config = {
        **agent_config,
        "toolgroups": [
            dict(
                name="builtin::rag",
                args={"vector_db_ids": [vector_db_id]},
            ),
            "builtin::code_interpreter",
        ],
    }
    agent = Agent(llama_stack_client, agent_config)
    inflation_doc = Document(
        document_id="test_csv",
        content="https://raw.githubusercontent.com/meta-llama/llama-stack-apps/main/examples/resources/inflation.csv",
        mime_type="text/csv",
        metadata={},
    )
    user_prompts = [
        (
            "Here is a csv file, can you describe it?",
            [inflation_doc],
            "code_interpreter",
        ),
        (
            "What are the top 5 topics that were explained? Only list succinct bullet points.",
            [],
            "query_from_memory",
        ),
    ]
    for prompt, docs, tool_name in user_prompts:
        print(f"User> {prompt}")
        session_id = agent.create_session(f"test-session-{uuid4()}")
        response = agent.create_turn(
            messages=[{"role": "user", "content": prompt}],
            session_id=session_id,
            documents=docs,
        )
        logs = [str(log) for log in EventLogger().log(response) if log is not None]
        logs_str = "".join(logs)
        assert f"Tool:{tool_name}" in logs_str
--- a/tests/client-sdk/inference/test_inference.py
+++ b/tests/client-sdk/inference/test_inference.py
@ -5,7 +5,7 @@
 # the root directory of this source tree.
 import base64
-import os
+import pathlib
 import pytest
 from pydantic import BaseModel
@ -57,13 +57,20 @@ def get_weather_tool_definition():
@pytest.fixture
-def base64_image_url():
+def image_path():
-    image_path = os.path.join(os.path.dirname(__file__), "dog.png")
+    return pathlib.Path(__file__).parent / "dog.png"
-    with open(image_path, "rb") as image_file:
+
-        # Convert the image to base64
+
-        base64_string = base64.b64encode(image_file.read()).decode("utf-8")
+@pytest.fixture
-        base64_url = f"data:image/png;base64,{base64_string}"
+def base64_image_data(image_path):
-        return base64_url
+    # Convert the image to base64
    return base64.b64encode(image_path.read_bytes()).decode("utf-8")
@pytest.fixture
 def base64_image_url(base64_image_data, image_path):
    # suffix includes the ., so we remove it
    return f"data:image/{image_path.suffix[1:]};base64,{base64_image_data}"
 def test_text_completion_non_streaming(llama_stack_client, text_model_id):
@ -371,20 +378,31 @@ def test_image_chat_completion_streaming(llama_stack_client, vision_model_id):
    assert any(expected in streamed_content for expected in {"dog", "puppy", "pup"})
-def test_image_chat_completion_base64_url(
+@pytest.mark.parametrize("type_", ["url", "data"])
-    llama_stack_client, vision_model_id, base64_image_url
+def test_image_chat_completion_base64(
    llama_stack_client, vision_model_id, base64_image_data, base64_image_url, type_
 ):
    image_spec = {
        "url": {
            "type": "image",
            "image": {
                "url": {
                    "uri": base64_image_url,
                },
            },
        },
        "data": {
            "type": "image",
            "image": {
                "data": base64_image_data,
            },
        },
    }[type_]
    message = {
        "role": "user",
        "content": [
-            {
+            image_spec,
                "type": "image",
                "image": {
                    "url": {
                        "uri": base64_image_url,
                    },
                },
            },
            {
                "type": "text",
                "text": "Describe what is in this image.",