From 5288b602ecf6ae5a2c869a678e4344a47da1c508 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Thu, 30 Jan 2025 15:01:18 -0800 Subject: [PATCH] add comments --- docs/resources/llama-stack-spec.html | 6 +- docs/resources/llama-stack-spec.yaml | 4 ++ llama_stack/apis/common/content_types.py | 8 +++ tests/client-sdk/inference/test_inference.py | 68 +++++++++++++------- 4 files changed, 61 insertions(+), 25 deletions(-) diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html index 82493b69f..8f69d152e 100644 --- a/docs/resources/llama-stack-spec.html +++ b/docs/resources/llama-stack-spec.html @@ -2446,10 +2446,12 @@ "type": "object", "properties": { "url": { - "$ref": "#/components/schemas/URL" + "$ref": "#/components/schemas/URL", + "description": "A URL of the image or data URL in the format of data:image/{type};base64,{data}. Note that URL could have length limits." }, "data": { - "type": "string" + "type": "string", + "description": "base64 encoded image data as string" } }, "additionalProperties": false, diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml index 71e2b8ed9..937729e2b 100644 --- a/docs/resources/llama-stack-spec.yaml +++ b/docs/resources/llama-stack-spec.yaml @@ -1473,8 +1473,12 @@ components: properties: url: $ref: '#/components/schemas/URL' + description: >- + A URL of the image or data URL in the format of data:image/{type};base64,{data}. + Note that URL could have length limits. data: type: string + description: base64 encoded image data as string additionalProperties: false description: >- Image as a base64 encoded string or an URL diff --git a/llama_stack/apis/common/content_types.py b/llama_stack/apis/common/content_types.py index 3cb901924..04a5913d6 100644 --- a/llama_stack/apis/common/content_types.py +++ b/llama_stack/apis/common/content_types.py @@ -19,8 +19,16 @@ class URL(BaseModel): class _URLOrData(BaseModel): + """ + A URL or a base64 encoded string + + :param url: A URL of the image or data URL in the format of data:image/{type};base64,{data}. Note that URL could have length limits. + :param data: base64 encoded image data as string + """ + url: Optional[URL] = None # data is a base64 encoded string + # TODO: annotate with contentEncoding="base64" in OpenAPI schema data: Optional[str] = None @model_validator(mode="before") diff --git a/tests/client-sdk/inference/test_inference.py b/tests/client-sdk/inference/test_inference.py index 8ca11521c..6260d1cdf 100644 --- a/tests/client-sdk/inference/test_inference.py +++ b/tests/client-sdk/inference/test_inference.py @@ -5,7 +5,7 @@ # the root directory of this source tree. import base64 -import os +import pathlib import pytest from pydantic import BaseModel @@ -48,14 +48,31 @@ def get_weather_tool_definition(): } +# @pytest.fixture +# def base64_image_url(): +# image_path = os.path.join(os.path.dirname(__file__), "dog.png") +# with open(image_path, "rb") as image_file: +# # Convert the image to base64 +# base64_string = base64.b64encode(image_file.read()).decode("utf-8") +# base64_url = f"data:image/png;base64,{base64_string}" +# return base64_url + + @pytest.fixture -def base64_image_url(): - image_path = os.path.join(os.path.dirname(__file__), "dog.png") - with open(image_path, "rb") as image_file: - # Convert the image to base64 - base64_string = base64.b64encode(image_file.read()).decode("utf-8") - base64_url = f"data:image/png;base64,{base64_string}" - return base64_url +def image_path(): + return pathlib.Path(__file__).parent / "dog.png" + + +@pytest.fixture +def base64_image_data(image_path): + # Convert the image to base64 + return base64.b64encode(image_path.read_bytes()).decode("utf-8") + + +@pytest.fixture +def base64_image_url(base64_image_data, image_path): + # suffix includes the ., so we remove it + return f"data:image/{image_path.suffix[1:]};base64,{base64_image_data}" def test_text_completion_non_streaming(llama_stack_client, text_model_id): @@ -353,25 +370,30 @@ def test_image_chat_completion_streaming(llama_stack_client, vision_model_id): assert any(expected in streamed_content for expected in {"dog", "puppy", "pup"}) -def test_image_chat_completion_base64_url( - llama_stack_client, vision_model_id, base64_image_url +@pytest.mark.parametrize("type_", ["url", "data"]) +def test_image_chat_completion_base64( + llama_stack_client, vision_model_id, base64_image_data, base64_image_url, type_ ): - message = { - "role": "user", - "content": [ - { - "type": "image", - "image": { - "url": { - "uri": base64_image_url, - }, + image_spec = { + "url": { + "type": "image", + "image": { + "url": { + "uri": base64_image_url, }, }, - { - "type": "text", - "text": "Describe what is in this image.", + }, + "data": { + "type": "image", + "image": { + "data": base64_image_data, }, - ], + }, + }[type_] + + message = { + "role": "user", + "content": [image_spec], } response = llama_stack_client.inference.chat_completion( model_id=vision_model_id,