diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html index 0454e22ec..82493b69f 100644 --- a/docs/resources/llama-stack-spec.html +++ b/docs/resources/llama-stack-spec.html @@ -2439,7 +2439,8 @@ "type": { "type": "string", "const": "image", - "default": "image" + "default": "image", + "description": "Discriminator type of the content item. Always \"image\"" }, "image": { "type": "object", @@ -2448,18 +2449,19 @@ "$ref": "#/components/schemas/URL" }, "data": { - "type": "string", - "contentEncoding": "base64" + "type": "string" } }, - "additionalProperties": false + "additionalProperties": false, + "description": "Image as a base64 encoded string or an URL" } }, "additionalProperties": false, "required": [ "type", "image" - ] + ], + "title": "A image content item" }, "InterleavedContent": { "oneOf": [ @@ -2647,17 +2649,20 @@ "type": { "type": "string", "const": "text", - "default": "text" + "default": "text", + "description": "Discriminator type of the content item. Always \"text\"" }, "text": { - "type": "string" + "type": "string", + "description": "Text content" } }, "additionalProperties": false, "required": [ "type", "text" - ] + ], + "title": "A text content item" }, "ToolCall": { "type": "object", diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml index 0734ef236..71e2b8ed9 100644 --- a/docs/resources/llama-stack-spec.yaml +++ b/docs/resources/llama-stack-spec.yaml @@ -1466,6 +1466,8 @@ components: type: string const: image default: image + description: >- + Discriminator type of the content item. Always "image" image: type: object properties: @@ -1473,12 +1475,14 @@ components: $ref: '#/components/schemas/URL' data: type: string - contentEncoding: base64 additionalProperties: false + description: >- + Image as a base64 encoded string or an URL additionalProperties: false required: - type - image + title: A image content item InterleavedContent: oneOf: - type: string @@ -1598,12 +1602,16 @@ components: type: string const: text default: text + description: >- + Discriminator type of the content item. Always "text" text: type: string + description: Text content additionalProperties: false required: - type - text + title: A text content item ToolCall: type: object properties: diff --git a/llama_stack/apis/common/content_types.py b/llama_stack/apis/common/content_types.py index 0b27a0196..3cb901924 100644 --- a/llama_stack/apis/common/content_types.py +++ b/llama_stack/apis/common/content_types.py @@ -4,14 +4,13 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import base64 from enum import Enum from typing import Annotated, List, Literal, Optional, Union from llama_models.llama3.api.datatypes import ToolCall from llama_models.schema_utils import json_schema_type, register_schema -from pydantic import BaseModel, Field, field_serializer, model_validator +from pydantic import BaseModel, Field, model_validator @json_schema_type @@ -21,7 +20,8 @@ class URL(BaseModel): class _URLOrData(BaseModel): url: Optional[URL] = None - data: Optional[bytes] = None + # data is a base64 encoded string + data: Optional[str] = None @model_validator(mode="before") @classmethod @@ -30,21 +30,27 @@ class _URLOrData(BaseModel): return values return {"url": values} - @field_serializer("data") - def serialize_data(self, data: Optional[bytes], _info): - if data is None: - return None - return base64.b64encode(data).decode("utf-8") - @json_schema_type class ImageContentItem(BaseModel): + """A image content item + + :param type: Discriminator type of the content item. Always "image" + :param image: Image as a base64 encoded string or an URL + """ + type: Literal["image"] = "image" image: _URLOrData @json_schema_type class TextContentItem(BaseModel): + """A text content item + + :param type: Discriminator type of the content item. Always "text" + :param text: Text content + """ + type: Literal["text"] = "text" text: str diff --git a/llama_stack/providers/utils/inference/prompt_adapter.py b/llama_stack/providers/utils/inference/prompt_adapter.py index f5298d844..babfb736a 100644 --- a/llama_stack/providers/utils/inference/prompt_adapter.py +++ b/llama_stack/providers/utils/inference/prompt_adapter.py @@ -135,7 +135,8 @@ async def interleaved_content_convert_to_raw( else: raise ValueError("Unsupported URL type") elif image.data: - data = image.data + # data is a base64 encoded string, decode it to bytes for RawMediaItem + data = base64.b64decode(image.data) else: raise ValueError("No data or URL provided")