test: add integration test for OpenAI Chat Completion file support

Signed-off-by: Nathan Weinberg <nweinber@redhat.com>
This commit is contained in:
Nathan Weinberg 2025-07-22 16:44:15 -04:00
parent 9f96aca4d2
commit 2a599d9a48
4 changed files with 2160 additions and 2012 deletions

View file

@ -114,6 +114,7 @@ test = [
"sqlalchemy[asyncio]>=2.0.41",
"requests",
"pymilvus>=2.5.12",
"reportlab",
]
docs = [
"setuptools",

View file

@ -1,16 +1,16 @@
# This file was autogenerated by uv via the following command:
# uv export --frozen --no-hashes --no-emit-project --no-default-groups --output-file=requirements.txt
aiohappyeyeballs==2.5.0
aiohappyeyeballs==2.6.1
# via aiohttp
aiohttp==3.12.13
aiohttp==3.12.14
# via llama-stack
aiosignal==1.3.2
aiosignal==1.4.0
# via aiohttp
aiosqlite==0.21.0
# via llama-stack
annotated-types==0.7.0
# via pydantic
anyio==4.8.0
anyio==4.9.0
# via
# httpx
# llama-api-client
@ -19,21 +19,21 @@ anyio==4.8.0
# starlette
asyncpg==0.30.0
# via llama-stack
attrs==25.1.0
attrs==25.3.0
# via
# aiohttp
# jsonschema
# referencing
certifi==2025.1.31
certifi==2025.7.14
# via
# httpcore
# httpx
# requests
cffi==1.17.1 ; platform_python_implementation != 'PyPy'
# via cryptography
charset-normalizer==3.4.1
charset-normalizer==3.4.2
# via requests
click==8.1.8
click==8.2.1
# via
# llama-stack-client
# uvicorn
@ -43,11 +43,6 @@ colorama==0.4.6 ; sys_platform == 'win32'
# tqdm
cryptography==45.0.5
# via python-jose
deprecated==1.2.18
# via
# opentelemetry-api
# opentelemetry-exporter-otlp-proto-http
# opentelemetry-semantic-conventions
distro==1.9.0
# via
# llama-api-client
@ -55,21 +50,21 @@ distro==1.9.0
# openai
ecdsa==0.19.1
# via python-jose
fastapi==0.115.8
fastapi==0.116.1
# via llama-stack
filelock==3.17.0
filelock==3.18.0
# via huggingface-hub
fire==0.7.0
# via
# llama-stack
# llama-stack-client
frozenlist==1.5.0
frozenlist==1.7.0
# via
# aiohttp
# aiosignal
fsspec==2024.12.0
fsspec==2025.3.0
# via huggingface-hub
googleapis-common-protos==1.67.0
googleapis-common-protos==1.70.0
# via opentelemetry-exporter-otlp-proto-http
h11==0.16.0
# via
@ -86,7 +81,7 @@ httpx==0.28.1
# llama-stack
# llama-stack-client
# openai
huggingface-hub==0.33.0
huggingface-hub==0.33.5
# via llama-stack
idna==3.10
# via
@ -94,15 +89,15 @@ idna==3.10
# httpx
# requests
# yarl
importlib-metadata==8.5.0
importlib-metadata==8.7.0
# via opentelemetry-api
jinja2==3.1.6
# via llama-stack
jiter==0.8.2
jiter==0.10.0
# via openai
jsonschema==4.23.0
jsonschema==4.25.0
# via llama-stack
jsonschema-specifications==2024.10.1
jsonschema-specifications==2025.4.1
# via jsonschema
llama-api-client==0.1.2
# via llama-stack
@ -114,79 +109,79 @@ markupsafe==3.0.2
# via jinja2
mdurl==0.1.2
# via markdown-it-py
multidict==6.1.0
multidict==6.6.3
# via
# aiohttp
# yarl
numpy==2.2.3
numpy==2.3.1
# via pandas
openai==1.71.0
openai==1.97.1
# via llama-stack
opentelemetry-api==1.30.0
opentelemetry-api==1.35.0
# via
# opentelemetry-exporter-otlp-proto-http
# opentelemetry-sdk
# opentelemetry-semantic-conventions
opentelemetry-exporter-otlp-proto-common==1.30.0
opentelemetry-exporter-otlp-proto-common==1.35.0
# via opentelemetry-exporter-otlp-proto-http
opentelemetry-exporter-otlp-proto-http==1.30.0
opentelemetry-exporter-otlp-proto-http==1.35.0
# via llama-stack
opentelemetry-proto==1.30.0
opentelemetry-proto==1.35.0
# via
# opentelemetry-exporter-otlp-proto-common
# opentelemetry-exporter-otlp-proto-http
opentelemetry-sdk==1.30.0
opentelemetry-sdk==1.35.0
# via
# llama-stack
# opentelemetry-exporter-otlp-proto-http
opentelemetry-semantic-conventions==0.51b0
opentelemetry-semantic-conventions==0.56b0
# via opentelemetry-sdk
packaging==24.2
packaging==25.0
# via huggingface-hub
pandas==2.2.3
pandas==2.3.1
# via llama-stack-client
pillow==11.1.0
pillow==11.3.0
# via llama-stack
prompt-toolkit==3.0.50
prompt-toolkit==3.0.51
# via
# llama-stack
# llama-stack-client
propcache==0.3.0
propcache==0.3.2
# via
# aiohttp
# yarl
protobuf==5.29.5
protobuf==6.31.1
# via
# googleapis-common-protos
# opentelemetry-proto
pyaml==25.1.0
pyaml==25.7.0
# via llama-stack-client
pyasn1==0.4.8
pyasn1==0.6.1
# via
# python-jose
# rsa
pycparser==2.22 ; platform_python_implementation != 'PyPy'
# via cffi
pydantic==2.10.6
pydantic==2.11.7
# via
# fastapi
# llama-api-client
# llama-stack
# llama-stack-client
# openai
pydantic-core==2.27.2
pydantic-core==2.33.2
# via pydantic
pygments==2.19.1
pygments==2.19.2
# via rich
python-dateutil==2.9.0.post0
# via pandas
python-dotenv==1.0.1
python-dotenv==1.1.1
# via llama-stack
python-jose==3.4.0
python-jose==3.5.0
# via llama-stack
python-multipart==0.0.20
# via llama-stack
pytz==2025.1
pytz==2025.2
# via pandas
pyyaml==6.0.2
# via
@ -204,15 +199,15 @@ requests==2.32.4
# llama-stack-client
# opentelemetry-exporter-otlp-proto-http
# tiktoken
rich==13.9.4
rich==14.0.0
# via
# llama-stack
# llama-stack-client
rpds-py==0.22.3
rpds-py==0.26.0
# via
# jsonschema
# referencing
rsa==4.9
rsa==4.9.1
# via python-jose
six==1.17.0
# via
@ -224,11 +219,11 @@ sniffio==1.3.1
# llama-api-client
# llama-stack-client
# openai
starlette==0.45.3
starlette==0.47.2
# via
# fastapi
# llama-stack
termcolor==2.5.0
termcolor==3.1.0
# via
# fire
# llama-stack
@ -240,8 +235,9 @@ tqdm==4.67.1
# huggingface-hub
# llama-stack-client
# openai
typing-extensions==4.12.2
typing-extensions==4.14.1
# via
# aiosignal
# aiosqlite
# anyio
# fastapi
@ -249,21 +245,26 @@ typing-extensions==4.12.2
# llama-api-client
# llama-stack-client
# openai
# opentelemetry-api
# opentelemetry-exporter-otlp-proto-http
# opentelemetry-sdk
# opentelemetry-semantic-conventions
# pydantic
# pydantic-core
# referencing
tzdata==2025.1
# starlette
# typing-inspection
typing-inspection==0.4.1
# via pydantic
tzdata==2025.2
# via pandas
urllib3==2.5.0
# via requests
uvicorn==0.34.0
uvicorn==0.35.0
# via llama-stack
wcwidth==0.2.13
# via prompt-toolkit
wrapt==1.17.2
# via deprecated
yarl==1.18.3
yarl==1.20.1
# via aiohttp
zipp==3.21.0
zipp==3.23.0
# via importlib-metadata

View file

@ -5,8 +5,14 @@
# the root directory of this source tree.
import base64
import os
import tempfile
import pytest
from openai import OpenAI
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
@ -82,6 +88,14 @@ def skip_if_provider_isnt_vllm(client_with_models, model_id):
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support vllm extra_body parameters.")
def skip_if_provider_isnt_openai(client_with_models, model_id):
provider = provider_from_model(client_with_models, model_id)
if provider.provider_type != "remote::openai":
pytest.skip(
f"Model {model_id} hosted by {provider.provider_type} doesn't support chat completion calls with base64 encoded files."
)
@pytest.fixture
def openai_client(client_with_models):
base_url = f"{client_with_models.base_url}/v1/openai/v1"
@ -418,3 +432,45 @@ def test_inference_store_tool_calls(compat_client, client_with_models, text_mode
# failed tool call parses show up as a message with content, so ensure
# that the retrieve response content matches the original request
assert retrieved_response.choices[0].message.content == content
def test_openai_chat_completion_non_streaming_with_file(openai_client, client_with_models, text_model_id):
skip_if_provider_isnt_openai(client_with_models, text_model_id)
# Generate temporary PDF with "Hello World" text
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_pdf:
c = canvas.Canvas(temp_pdf.name, pagesize=letter)
c.drawString(100, 750, "Hello World")
c.save()
# Read the PDF and sencode to base64
with open(temp_pdf.name, "rb") as pdf_file:
pdf_base64 = base64.b64encode(pdf_file.read()).decode("utf-8")
# Clean up temporary file
os.unlink(temp_pdf.name)
response = openai_client.chat.completions.create(
model=text_model_id,
messages=[
{
"role": "user",
"content": "Describe what you see in this PDF file.",
},
{
"role": "user",
"content": [
{
"type": "file",
"file": {
"filename": "my-temp-hello-world-pdf",
"file_data": f"data:application/pdf;base64,{pdf_base64}",
},
}
],
},
],
stream=False,
)
message_content = response.choices[0].message.content.lower().strip()
assert "hello world" in message_content

3998
uv.lock generated

File diff suppressed because it is too large Load diff