feat: add base64 encoded PDF support for OpenAI Chat Completions (#2881)

# What does this PR do? OpenAI Chat Completions supports passing a base64 encoded PDF file to a model, but Llama Stack currently does not allow for this behavior. This PR extends our implementation of the OpenAI API spec to change that. Closes #2129 ## Test Plan A new functional test has been added to test the validity of such a request Signed-off-by: Nathan Weinberg <nweinber@redhat.com>
2025-12-03 09:53:45 +00:00 · 2025-07-29 06:23:41 -04:00 · 2025-07-29 06:23:41 -04:00 · 870a37ff4b
commit 870a37ff4b
parent cf8722079c
6 changed files with 1514 additions and 1200 deletions
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@ -5,8 +5,14 @@
 # the root directory of this source tree.


+import base64
+import os
+import tempfile
+
 import pytest
 from openai import OpenAI
+from reportlab.lib.pagesizes import letter
+from reportlab.pdfgen import canvas

 from llama_stack.distribution.library_client import LlamaStackAsLibraryClient

@ -82,6 +88,14 @@ def skip_if_provider_isnt_vllm(client_with_models, model_id):
        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support vllm extra_body parameters.")


+def skip_if_provider_isnt_openai(client_with_models, model_id):
+    provider = provider_from_model(client_with_models, model_id)
+    if provider.provider_type != "remote::openai":
+        pytest.skip(
+            f"Model {model_id} hosted by {provider.provider_type} doesn't support chat completion calls with base64 encoded files."
+        )
+
+
@pytest.fixture
 def openai_client(client_with_models):
    base_url = f"{client_with_models.base_url}/v1/openai/v1"
@ -418,3 +432,45 @@ def test_inference_store_tool_calls(compat_client, client_with_models, text_mode
        # failed tool call parses show up as a message with content, so ensure
        # that the retrieve response content matches the original request
        assert retrieved_response.choices[0].message.content == content
+
+
+def test_openai_chat_completion_non_streaming_with_file(openai_client, client_with_models, text_model_id):
+    skip_if_provider_isnt_openai(client_with_models, text_model_id)
+
+    # Generate temporary PDF with "Hello World" text
+    with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_pdf:
+        c = canvas.Canvas(temp_pdf.name, pagesize=letter)
+        c.drawString(100, 750, "Hello World")
+        c.save()
+
+        # Read the PDF and sencode to base64
+        with open(temp_pdf.name, "rb") as pdf_file:
+            pdf_base64 = base64.b64encode(pdf_file.read()).decode("utf-8")
+
+        # Clean up temporary file
+        os.unlink(temp_pdf.name)
+
+    response = openai_client.chat.completions.create(
+        model=text_model_id,
+        messages=[
+            {
+                "role": "user",
+                "content": "Describe what you see in this PDF file.",
+            },
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "file",
+                        "file": {
+                            "filename": "my-temp-hello-world-pdf",
+                            "file_data": f"data:application/pdf;base64,{pdf_base64}",
+                        },
+                    }
+                ],
+            },
+        ],
+        stream=False,
+    )
+    message_content = response.choices[0].message.content.lower().strip()
+    assert "hello world" in message_content