diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html
index 0454e22ec..82493b69f 100644
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
@@ -2439,7 +2439,8 @@
"type": {
"type": "string",
"const": "image",
- "default": "image"
+ "default": "image",
+ "description": "Discriminator type of the content item. Always \"image\""
},
"image": {
"type": "object",
@@ -2448,18 +2449,19 @@
"$ref": "#/components/schemas/URL"
},
"data": {
- "type": "string",
- "contentEncoding": "base64"
+ "type": "string"
}
},
- "additionalProperties": false
+ "additionalProperties": false,
+ "description": "Image as a base64 encoded string or an URL"
}
},
"additionalProperties": false,
"required": [
"type",
"image"
- ]
+ ],
+ "title": "A image content item"
},
"InterleavedContent": {
"oneOf": [
@@ -2647,17 +2649,20 @@
"type": {
"type": "string",
"const": "text",
- "default": "text"
+ "default": "text",
+ "description": "Discriminator type of the content item. Always \"text\""
},
"text": {
- "type": "string"
+ "type": "string",
+ "description": "Text content"
}
},
"additionalProperties": false,
"required": [
"type",
"text"
- ]
+ ],
+ "title": "A text content item"
},
"ToolCall": {
"type": "object",
diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml
index 0734ef236..71e2b8ed9 100644
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
@@ -1466,6 +1466,8 @@ components:
type: string
const: image
default: image
+ description: >-
+ Discriminator type of the content item. Always "image"
image:
type: object
properties:
@@ -1473,12 +1475,14 @@ components:
$ref: '#/components/schemas/URL'
data:
type: string
- contentEncoding: base64
additionalProperties: false
+ description: >-
+ Image as a base64 encoded string or an URL
additionalProperties: false
required:
- type
- image
+ title: A image content item
InterleavedContent:
oneOf:
- type: string
@@ -1598,12 +1602,16 @@ components:
type: string
const: text
default: text
+ description: >-
+ Discriminator type of the content item. Always "text"
text:
type: string
+ description: Text content
additionalProperties: false
required:
- type
- text
+ title: A text content item
ToolCall:
type: object
properties:
diff --git a/llama_stack/apis/common/content_types.py b/llama_stack/apis/common/content_types.py
index 0b27a0196..3cb901924 100644
--- a/llama_stack/apis/common/content_types.py
+++ b/llama_stack/apis/common/content_types.py
@@ -4,14 +4,13 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
-import base64
from enum import Enum
from typing import Annotated, List, Literal, Optional, Union
from llama_models.llama3.api.datatypes import ToolCall
from llama_models.schema_utils import json_schema_type, register_schema
-from pydantic import BaseModel, Field, field_serializer, model_validator
+from pydantic import BaseModel, Field, model_validator
@json_schema_type
@@ -21,7 +20,8 @@ class URL(BaseModel):
class _URLOrData(BaseModel):
url: Optional[URL] = None
- data: Optional[bytes] = None
+ # data is a base64 encoded string
+ data: Optional[str] = None
@model_validator(mode="before")
@classmethod
@@ -30,21 +30,27 @@ class _URLOrData(BaseModel):
return values
return {"url": values}
- @field_serializer("data")
- def serialize_data(self, data: Optional[bytes], _info):
- if data is None:
- return None
- return base64.b64encode(data).decode("utf-8")
-
@json_schema_type
class ImageContentItem(BaseModel):
+ """A image content item
+
+ :param type: Discriminator type of the content item. Always "image"
+ :param image: Image as a base64 encoded string or an URL
+ """
+
type: Literal["image"] = "image"
image: _URLOrData
@json_schema_type
class TextContentItem(BaseModel):
+ """A text content item
+
+ :param type: Discriminator type of the content item. Always "text"
+ :param text: Text content
+ """
+
type: Literal["text"] = "text"
text: str
diff --git a/llama_stack/providers/utils/inference/prompt_adapter.py b/llama_stack/providers/utils/inference/prompt_adapter.py
index f5298d844..babfb736a 100644
--- a/llama_stack/providers/utils/inference/prompt_adapter.py
+++ b/llama_stack/providers/utils/inference/prompt_adapter.py
@@ -135,7 +135,8 @@ async def interleaved_content_convert_to_raw(
else:
raise ValueError("Unsupported URL type")
elif image.data:
- data = image.data
+ # data is a base64 encoded string, decode it to bytes for RawMediaItem
+ data = base64.b64decode(image.data)
else:
raise ValueError("No data or URL provided")