feat: add base64 encoded PDF support for OpenAI Chat Completions (#2881)
Some checks failed
Coverage Badge / unit-tests (push) Failing after 1s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
Integration Tests / discover-tests (push) Successful in 3s
Test Llama Stack Build / generate-matrix (push) Successful in 6s
Vector IO Integration Tests / test-matrix (3.12, inline::milvus) (push) Failing after 12s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 7s
Vector IO Integration Tests / test-matrix (3.12, inline::sqlite-vec) (push) Failing after 13s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 9s
Unit Tests / unit-tests (3.12) (push) Failing after 8s
Vector IO Integration Tests / test-matrix (3.13, inline::faiss) (push) Failing after 14s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Failing after 10s
Vector IO Integration Tests / test-matrix (3.13, inline::sqlite-vec) (push) Failing after 13s
Unit Tests / unit-tests (3.13) (push) Failing after 10s
Test Llama Stack Build / build-single-provider (push) Failing after 15s
Vector IO Integration Tests / test-matrix (3.13, remote::chromadb) (push) Failing after 14s
Vector IO Integration Tests / test-matrix (3.13, inline::milvus) (push) Failing after 17s
Vector IO Integration Tests / test-matrix (3.12, inline::faiss) (push) Failing after 19s
Test External API and Providers / test-external (venv) (push) Failing after 16s
Test Llama Stack Build / build (push) Failing after 9s
Python Package Build Test / build (3.12) (push) Failing after 23s
Update ReadTheDocs / update-readthedocs (push) Failing after 21s
Vector IO Integration Tests / test-matrix (3.13, remote::pgvector) (push) Failing after 27s
Vector IO Integration Tests / test-matrix (3.12, remote::chromadb) (push) Failing after 29s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 31s
Vector IO Integration Tests / test-matrix (3.12, remote::pgvector) (push) Failing after 58s
Python Package Build Test / build (3.13) (push) Failing after 54s
Integration Tests / test-matrix (push) Failing after 56s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 1m4s
Pre-commit / pre-commit (push) Successful in 2m15s

# What does this PR do?
OpenAI Chat Completions supports passing a base64 encoded PDF file to a
model, but Llama Stack currently does not allow for this behavior. This
PR extends our implementation of the OpenAI API spec to change that.

Closes #2129

## Test Plan
A new functional test has been added to test the validity of such a
request

Signed-off-by: Nathan Weinberg <nweinber@redhat.com>
This commit is contained in:
Nathan Weinberg 2025-07-29 06:23:41 -04:00 committed by GitHub
parent cf8722079c
commit 870a37ff4b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 1514 additions and 1200 deletions

View file

@ -9821,13 +9821,17 @@
}, },
{ {
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam" "$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
},
{
"$ref": "#/components/schemas/OpenAIFile"
} }
], ],
"discriminator": { "discriminator": {
"propertyName": "type", "propertyName": "type",
"mapping": { "mapping": {
"text": "#/components/schemas/OpenAIChatCompletionContentPartTextParam", "text": "#/components/schemas/OpenAIChatCompletionContentPartTextParam",
"image_url": "#/components/schemas/OpenAIChatCompletionContentPartImageParam" "image_url": "#/components/schemas/OpenAIChatCompletionContentPartImageParam",
"file": "#/components/schemas/OpenAIFile"
} }
} }
}, },
@ -9974,6 +9978,41 @@
"title": "OpenAIDeveloperMessageParam", "title": "OpenAIDeveloperMessageParam",
"description": "A message from the developer in an OpenAI-compatible chat completion request." "description": "A message from the developer in an OpenAI-compatible chat completion request."
}, },
"OpenAIFile": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "file",
"default": "file"
},
"file": {
"$ref": "#/components/schemas/OpenAIFileFile"
}
},
"additionalProperties": false,
"required": [
"type",
"file"
],
"title": "OpenAIFile"
},
"OpenAIFileFile": {
"type": "object",
"properties": {
"file_data": {
"type": "string"
},
"file_id": {
"type": "string"
},
"filename": {
"type": "string"
}
},
"additionalProperties": false,
"title": "OpenAIFileFile"
},
"OpenAIImageURL": { "OpenAIImageURL": {
"type": "object", "type": "object",
"properties": { "properties": {

View file

@ -6934,11 +6934,13 @@ components:
oneOf: oneOf:
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
- $ref: '#/components/schemas/OpenAIFile'
discriminator: discriminator:
propertyName: type propertyName: type
mapping: mapping:
text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
file: '#/components/schemas/OpenAIFile'
OpenAIChatCompletionContentPartTextParam: OpenAIChatCompletionContentPartTextParam:
type: object type: object
properties: properties:
@ -7050,6 +7052,31 @@ components:
title: OpenAIDeveloperMessageParam title: OpenAIDeveloperMessageParam
description: >- description: >-
A message from the developer in an OpenAI-compatible chat completion request. A message from the developer in an OpenAI-compatible chat completion request.
OpenAIFile:
type: object
properties:
type:
type: string
const: file
default: file
file:
$ref: '#/components/schemas/OpenAIFileFile'
additionalProperties: false
required:
- type
- file
title: OpenAIFile
OpenAIFileFile:
type: object
properties:
file_data:
type: string
file_id:
type: string
filename:
type: string
additionalProperties: false
title: OpenAIFileFile
OpenAIImageURL: OpenAIImageURL:
type: object type: object
properties: properties:

View file

@ -455,8 +455,21 @@ class OpenAIChatCompletionContentPartImageParam(BaseModel):
image_url: OpenAIImageURL image_url: OpenAIImageURL
@json_schema_type
class OpenAIFileFile(BaseModel):
file_data: str | None = None
file_id: str | None = None
filename: str | None = None
@json_schema_type
class OpenAIFile(BaseModel):
type: Literal["file"] = "file"
file: OpenAIFileFile
OpenAIChatCompletionContentPartParam = Annotated[ OpenAIChatCompletionContentPartParam = Annotated[
OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam, OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile,
Field(discriminator="type"), Field(discriminator="type"),
] ]
register_schema(OpenAIChatCompletionContentPartParam, name="OpenAIChatCompletionContentPartParam") register_schema(OpenAIChatCompletionContentPartParam, name="OpenAIChatCompletionContentPartParam")

View file

@ -114,6 +114,7 @@ test = [
"sqlalchemy[asyncio]>=2.0.41", "sqlalchemy[asyncio]>=2.0.41",
"requests", "requests",
"pymilvus>=2.5.12", "pymilvus>=2.5.12",
"reportlab",
] ]
docs = [ docs = [
"setuptools", "setuptools",

View file

@ -5,8 +5,14 @@
# the root directory of this source tree. # the root directory of this source tree.
import base64
import os
import tempfile
import pytest import pytest
from openai import OpenAI from openai import OpenAI
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from llama_stack.distribution.library_client import LlamaStackAsLibraryClient from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
@ -82,6 +88,14 @@ def skip_if_provider_isnt_vllm(client_with_models, model_id):
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support vllm extra_body parameters.") pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support vllm extra_body parameters.")
def skip_if_provider_isnt_openai(client_with_models, model_id):
provider = provider_from_model(client_with_models, model_id)
if provider.provider_type != "remote::openai":
pytest.skip(
f"Model {model_id} hosted by {provider.provider_type} doesn't support chat completion calls with base64 encoded files."
)
@pytest.fixture @pytest.fixture
def openai_client(client_with_models): def openai_client(client_with_models):
base_url = f"{client_with_models.base_url}/v1/openai/v1" base_url = f"{client_with_models.base_url}/v1/openai/v1"
@ -418,3 +432,45 @@ def test_inference_store_tool_calls(compat_client, client_with_models, text_mode
# failed tool call parses show up as a message with content, so ensure # failed tool call parses show up as a message with content, so ensure
# that the retrieve response content matches the original request # that the retrieve response content matches the original request
assert retrieved_response.choices[0].message.content == content assert retrieved_response.choices[0].message.content == content
def test_openai_chat_completion_non_streaming_with_file(openai_client, client_with_models, text_model_id):
skip_if_provider_isnt_openai(client_with_models, text_model_id)
# Generate temporary PDF with "Hello World" text
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_pdf:
c = canvas.Canvas(temp_pdf.name, pagesize=letter)
c.drawString(100, 750, "Hello World")
c.save()
# Read the PDF and sencode to base64
with open(temp_pdf.name, "rb") as pdf_file:
pdf_base64 = base64.b64encode(pdf_file.read()).decode("utf-8")
# Clean up temporary file
os.unlink(temp_pdf.name)
response = openai_client.chat.completions.create(
model=text_model_id,
messages=[
{
"role": "user",
"content": "Describe what you see in this PDF file.",
},
{
"role": "user",
"content": [
{
"type": "file",
"file": {
"filename": "my-temp-hello-world-pdf",
"file_data": f"data:application/pdf;base64,{pdf_base64}",
},
}
],
},
],
stream=False,
)
message_content = response.choices[0].message.content.lower().strip()
assert "hello world" in message_content

2574
uv.lock generated

File diff suppressed because it is too large Load diff