From 1e673010e413e4d838db4832157b54b08d43937d Mon Sep 17 00:00:00 2001 From: Ben Browning Date: Sat, 12 Apr 2025 14:51:39 -0400 Subject: [PATCH] fix: OpenAI API chat completion messages with image_url This fixes the OpenAI API chat completions endpoint to accept messages with image_url references. Previously, we were using the InterleavedContent type which was actually a Llama Stack API type that happened to work for text message parts, but the schema differs for image message parts. So, this adds OpenAI-specific schema classes to handle text and image chat completions message parts. Signed-off-by: Ben Browning --- docs/_static/llama-stack-spec.html | 131 +++++++++++++++++++++++- docs/_static/llama-stack-spec.yaml | 79 +++++++++++++- llama_stack/apis/inference/inference.py | 41 +++++++- 3 files changed, 236 insertions(+), 15 deletions(-) diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 542fb5be5..1927f2013 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -8825,7 +8825,17 @@ "description": "Must be \"assistant\" to identify this as the model's response" }, "content": { - "$ref": "#/components/schemas/InterleavedContent", + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam" + } + } + ], "description": "The content of the model's response" }, "name": { @@ -8848,6 +8858,61 @@ "title": "OpenAIAssistantMessageParam", "description": "A message containing the model's (assistant) response in an OpenAI-compatible chat completion request." }, + "OpenAIChatCompletionContentPartImageParam": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "image_url", + "default": "image_url" + }, + "image_url": { + "$ref": "#/components/schemas/OpenAIImageURL" + } + }, + "additionalProperties": false, + "required": [ + "type", + "image_url" + ], + "title": "OpenAIChatCompletionContentPartImageParam" + }, + "OpenAIChatCompletionContentPartParam": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam" + }, + { + "$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "text": "#/components/schemas/OpenAIChatCompletionContentPartTextParam", + "image_url": "#/components/schemas/OpenAIChatCompletionContentPartImageParam" + } + } + }, + "OpenAIChatCompletionContentPartTextParam": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "text", + "default": "text" + }, + "text": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "type", + "text" + ], + "title": "OpenAIChatCompletionContentPartTextParam" + }, "OpenAIDeveloperMessageParam": { "type": "object", "properties": { @@ -8858,7 +8923,17 @@ "description": "Must be \"developer\" to identify this as a developer message" }, "content": { - "$ref": "#/components/schemas/InterleavedContent", + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam" + } + } + ], "description": "The content of the developer message" }, "name": { @@ -8874,6 +8949,22 @@ "title": "OpenAIDeveloperMessageParam", "description": "A message from the developer in an OpenAI-compatible chat completion request." }, + "OpenAIImageURL": { + "type": "object", + "properties": { + "url": { + "type": "string" + }, + "detail": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "url" + ], + "title": "OpenAIImageURL" + }, "OpenAIMessageParam": { "oneOf": [ { @@ -8913,7 +9004,17 @@ "description": "Must be \"system\" to identify this as a system message" }, "content": { - "$ref": "#/components/schemas/InterleavedContent", + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam" + } + } + ], "description": "The content of the \"system prompt\". If multiple system messages are provided, they are concatenated. The underlying Llama Stack code may also add other system messages (for example, for formatting tool definitions)." }, "name": { @@ -8943,7 +9044,17 @@ "description": "Unique identifier for the tool call this response is for" }, "content": { - "$ref": "#/components/schemas/InterleavedContent", + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam" + } + } + ], "description": "The response content from the tool" } }, @@ -8966,7 +9077,17 @@ "description": "Must be \"user\" to identify this as a user message" }, "content": { - "$ref": "#/components/schemas/InterleavedContent", + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam" + } + } + ], "description": "The content of the message, which can include text and other media" }, "name": { diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index fa7b130e2..1070b76a4 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -6057,7 +6057,11 @@ components: description: >- Must be "assistant" to identify this as the model's response content: - $ref: '#/components/schemas/InterleavedContent' + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam' description: The content of the model's response name: type: string @@ -6077,6 +6081,44 @@ components: description: >- A message containing the model's (assistant) response in an OpenAI-compatible chat completion request. + "OpenAIChatCompletionContentPartImageParam": + type: object + properties: + type: + type: string + const: image_url + default: image_url + image_url: + $ref: '#/components/schemas/OpenAIImageURL' + additionalProperties: false + required: + - type + - image_url + title: >- + OpenAIChatCompletionContentPartImageParam + OpenAIChatCompletionContentPartParam: + oneOf: + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + discriminator: + propertyName: type + mapping: + text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + OpenAIChatCompletionContentPartTextParam: + type: object + properties: + type: + type: string + const: text + default: text + text: + type: string + additionalProperties: false + required: + - type + - text + title: OpenAIChatCompletionContentPartTextParam OpenAIDeveloperMessageParam: type: object properties: @@ -6087,7 +6129,11 @@ components: description: >- Must be "developer" to identify this as a developer message content: - $ref: '#/components/schemas/InterleavedContent' + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam' description: The content of the developer message name: type: string @@ -6100,6 +6146,17 @@ components: title: OpenAIDeveloperMessageParam description: >- A message from the developer in an OpenAI-compatible chat completion request. + OpenAIImageURL: + type: object + properties: + url: + type: string + detail: + type: string + additionalProperties: false + required: + - url + title: OpenAIImageURL OpenAIMessageParam: oneOf: - $ref: '#/components/schemas/OpenAIUserMessageParam' @@ -6125,7 +6182,11 @@ components: description: >- Must be "system" to identify this as a system message content: - $ref: '#/components/schemas/InterleavedContent' + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam' description: >- The content of the "system prompt". If multiple system messages are provided, they are concatenated. The underlying Llama Stack code may also add other @@ -6155,7 +6216,11 @@ components: description: >- Unique identifier for the tool call this response is for content: - $ref: '#/components/schemas/InterleavedContent' + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam' description: The response content from the tool additionalProperties: false required: @@ -6176,7 +6241,11 @@ components: description: >- Must be "user" to identify this as a user message content: - $ref: '#/components/schemas/InterleavedContent' + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam' description: >- The content of the message, which can include text and other media name: diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py index 21753ca23..0e70c876e 100644 --- a/llama_stack/apis/inference/inference.py +++ b/llama_stack/apis/inference/inference.py @@ -442,6 +442,37 @@ class EmbeddingsResponse(BaseModel): embeddings: List[List[float]] +@json_schema_type +class OpenAIChatCompletionContentPartTextParam(BaseModel): + type: Literal["text"] = "text" + text: str + + +@json_schema_type +class OpenAIImageURL(BaseModel): + url: str + detail: Optional[str] = None + + +@json_schema_type +class OpenAIChatCompletionContentPartImageParam(BaseModel): + type: Literal["image_url"] = "image_url" + image_url: OpenAIImageURL + + +OpenAIChatCompletionContentPartParam = Annotated[ + Union[ + OpenAIChatCompletionContentPartTextParam, + OpenAIChatCompletionContentPartImageParam, + ], + Field(discriminator="type"), +] +register_schema(OpenAIChatCompletionContentPartParam, name="OpenAIChatCompletionContentPartParam") + + +OpenAIChatCompletionMessageContent = Union[str, List[OpenAIChatCompletionContentPartParam]] + + @json_schema_type class OpenAIUserMessageParam(BaseModel): """A message from the user in an OpenAI-compatible chat completion request. @@ -452,7 +483,7 @@ class OpenAIUserMessageParam(BaseModel): """ role: Literal["user"] = "user" - content: InterleavedContent + content: OpenAIChatCompletionMessageContent name: Optional[str] = None @@ -466,7 +497,7 @@ class OpenAISystemMessageParam(BaseModel): """ role: Literal["system"] = "system" - content: InterleavedContent + content: OpenAIChatCompletionMessageContent name: Optional[str] = None @@ -481,7 +512,7 @@ class OpenAIAssistantMessageParam(BaseModel): """ role: Literal["assistant"] = "assistant" - content: InterleavedContent + content: OpenAIChatCompletionMessageContent name: Optional[str] = None tool_calls: Optional[List[ToolCall]] = Field(default_factory=list) @@ -497,7 +528,7 @@ class OpenAIToolMessageParam(BaseModel): role: Literal["tool"] = "tool" tool_call_id: str - content: InterleavedContent + content: OpenAIChatCompletionMessageContent @json_schema_type @@ -510,7 +541,7 @@ class OpenAIDeveloperMessageParam(BaseModel): """ role: Literal["developer"] = "developer" - content: InterleavedContent + content: OpenAIChatCompletionMessageContent name: Optional[str] = None