diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html
index 82493b69f..8f69d152e 100644
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
@@ -2446,10 +2446,12 @@
"type": "object",
"properties": {
"url": {
- "$ref": "#/components/schemas/URL"
+ "$ref": "#/components/schemas/URL",
+ "description": "A URL of the image or data URL in the format of data:image/{type};base64,{data}. Note that URL could have length limits."
},
"data": {
- "type": "string"
+ "type": "string",
+ "description": "base64 encoded image data as string"
}
},
"additionalProperties": false,
diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml
index 71e2b8ed9..937729e2b 100644
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
@@ -1473,8 +1473,12 @@ components:
properties:
url:
$ref: '#/components/schemas/URL'
+ description: >-
+ A URL of the image or data URL in the format of data:image/{type};base64,{data}.
+ Note that URL could have length limits.
data:
type: string
+ description: base64 encoded image data as string
additionalProperties: false
description: >-
Image as a base64 encoded string or an URL
diff --git a/llama_stack/apis/common/content_types.py b/llama_stack/apis/common/content_types.py
index 3cb901924..04a5913d6 100644
--- a/llama_stack/apis/common/content_types.py
+++ b/llama_stack/apis/common/content_types.py
@@ -19,8 +19,16 @@ class URL(BaseModel):
class _URLOrData(BaseModel):
+ """
+ A URL or a base64 encoded string
+
+ :param url: A URL of the image or data URL in the format of data:image/{type};base64,{data}. Note that URL could have length limits.
+ :param data: base64 encoded image data as string
+ """
+
url: Optional[URL] = None
# data is a base64 encoded string
+ # TODO: annotate with contentEncoding="base64" in OpenAPI schema
data: Optional[str] = None
@model_validator(mode="before")
diff --git a/tests/client-sdk/inference/test_inference.py b/tests/client-sdk/inference/test_inference.py
index 8ca11521c..6260d1cdf 100644
--- a/tests/client-sdk/inference/test_inference.py
+++ b/tests/client-sdk/inference/test_inference.py
@@ -5,7 +5,7 @@
# the root directory of this source tree.
import base64
-import os
+import pathlib
import pytest
from pydantic import BaseModel
@@ -48,14 +48,31 @@ def get_weather_tool_definition():
}
+# @pytest.fixture
+# def base64_image_url():
+# image_path = os.path.join(os.path.dirname(__file__), "dog.png")
+# with open(image_path, "rb") as image_file:
+# # Convert the image to base64
+# base64_string = base64.b64encode(image_file.read()).decode("utf-8")
+# base64_url = f"data:image/png;base64,{base64_string}"
+# return base64_url
+
+
@pytest.fixture
-def base64_image_url():
- image_path = os.path.join(os.path.dirname(__file__), "dog.png")
- with open(image_path, "rb") as image_file:
- # Convert the image to base64
- base64_string = base64.b64encode(image_file.read()).decode("utf-8")
- base64_url = f"data:image/png;base64,{base64_string}"
- return base64_url
+def image_path():
+ return pathlib.Path(__file__).parent / "dog.png"
+
+
+@pytest.fixture
+def base64_image_data(image_path):
+ # Convert the image to base64
+ return base64.b64encode(image_path.read_bytes()).decode("utf-8")
+
+
+@pytest.fixture
+def base64_image_url(base64_image_data, image_path):
+ # suffix includes the ., so we remove it
+ return f"data:image/{image_path.suffix[1:]};base64,{base64_image_data}"
def test_text_completion_non_streaming(llama_stack_client, text_model_id):
@@ -353,25 +370,30 @@ def test_image_chat_completion_streaming(llama_stack_client, vision_model_id):
assert any(expected in streamed_content for expected in {"dog", "puppy", "pup"})
-def test_image_chat_completion_base64_url(
- llama_stack_client, vision_model_id, base64_image_url
+@pytest.mark.parametrize("type_", ["url", "data"])
+def test_image_chat_completion_base64(
+ llama_stack_client, vision_model_id, base64_image_data, base64_image_url, type_
):
- message = {
- "role": "user",
- "content": [
- {
- "type": "image",
- "image": {
- "url": {
- "uri": base64_image_url,
- },
+ image_spec = {
+ "url": {
+ "type": "image",
+ "image": {
+ "url": {
+ "uri": base64_image_url,
},
},
- {
- "type": "text",
- "text": "Describe what is in this image.",
+ },
+ "data": {
+ "type": "image",
+ "image": {
+ "data": base64_image_data,
},
- ],
+ },
+ }[type_]
+
+ message = {
+ "role": "user",
+ "content": [image_spec],
}
response = llama_stack_client.inference.chat_completion(
model_id=vision_model_id,