test: add integration test for OpenAI Chat Completion file support

Signed-off-by: Nathan Weinberg <nweinber@redhat.com>
This commit is contained in:
Nathan Weinberg 2025-07-22 16:44:15 -04:00
parent 9f96aca4d2
commit 2a599d9a48
4 changed files with 2160 additions and 2012 deletions

View file

@ -114,6 +114,7 @@ test = [
"sqlalchemy[asyncio]>=2.0.41", "sqlalchemy[asyncio]>=2.0.41",
"requests", "requests",
"pymilvus>=2.5.12", "pymilvus>=2.5.12",
"reportlab",
] ]
docs = [ docs = [
"setuptools", "setuptools",

View file

@ -1,16 +1,16 @@
# This file was autogenerated by uv via the following command: # This file was autogenerated by uv via the following command:
# uv export --frozen --no-hashes --no-emit-project --no-default-groups --output-file=requirements.txt # uv export --frozen --no-hashes --no-emit-project --no-default-groups --output-file=requirements.txt
aiohappyeyeballs==2.5.0 aiohappyeyeballs==2.6.1
# via aiohttp # via aiohttp
aiohttp==3.12.13 aiohttp==3.12.14
# via llama-stack # via llama-stack
aiosignal==1.3.2 aiosignal==1.4.0
# via aiohttp # via aiohttp
aiosqlite==0.21.0 aiosqlite==0.21.0
# via llama-stack # via llama-stack
annotated-types==0.7.0 annotated-types==0.7.0
# via pydantic # via pydantic
anyio==4.8.0 anyio==4.9.0
# via # via
# httpx # httpx
# llama-api-client # llama-api-client
@ -19,21 +19,21 @@ anyio==4.8.0
# starlette # starlette
asyncpg==0.30.0 asyncpg==0.30.0
# via llama-stack # via llama-stack
attrs==25.1.0 attrs==25.3.0
# via # via
# aiohttp # aiohttp
# jsonschema # jsonschema
# referencing # referencing
certifi==2025.1.31 certifi==2025.7.14
# via # via
# httpcore # httpcore
# httpx # httpx
# requests # requests
cffi==1.17.1 ; platform_python_implementation != 'PyPy' cffi==1.17.1 ; platform_python_implementation != 'PyPy'
# via cryptography # via cryptography
charset-normalizer==3.4.1 charset-normalizer==3.4.2
# via requests # via requests
click==8.1.8 click==8.2.1
# via # via
# llama-stack-client # llama-stack-client
# uvicorn # uvicorn
@ -43,11 +43,6 @@ colorama==0.4.6 ; sys_platform == 'win32'
# tqdm # tqdm
cryptography==45.0.5 cryptography==45.0.5
# via python-jose # via python-jose
deprecated==1.2.18
# via
# opentelemetry-api
# opentelemetry-exporter-otlp-proto-http
# opentelemetry-semantic-conventions
distro==1.9.0 distro==1.9.0
# via # via
# llama-api-client # llama-api-client
@ -55,21 +50,21 @@ distro==1.9.0
# openai # openai
ecdsa==0.19.1 ecdsa==0.19.1
# via python-jose # via python-jose
fastapi==0.115.8 fastapi==0.116.1
# via llama-stack # via llama-stack
filelock==3.17.0 filelock==3.18.0
# via huggingface-hub # via huggingface-hub
fire==0.7.0 fire==0.7.0
# via # via
# llama-stack # llama-stack
# llama-stack-client # llama-stack-client
frozenlist==1.5.0 frozenlist==1.7.0
# via # via
# aiohttp # aiohttp
# aiosignal # aiosignal
fsspec==2024.12.0 fsspec==2025.3.0
# via huggingface-hub # via huggingface-hub
googleapis-common-protos==1.67.0 googleapis-common-protos==1.70.0
# via opentelemetry-exporter-otlp-proto-http # via opentelemetry-exporter-otlp-proto-http
h11==0.16.0 h11==0.16.0
# via # via
@ -86,7 +81,7 @@ httpx==0.28.1
# llama-stack # llama-stack
# llama-stack-client # llama-stack-client
# openai # openai
huggingface-hub==0.33.0 huggingface-hub==0.33.5
# via llama-stack # via llama-stack
idna==3.10 idna==3.10
# via # via
@ -94,15 +89,15 @@ idna==3.10
# httpx # httpx
# requests # requests
# yarl # yarl
importlib-metadata==8.5.0 importlib-metadata==8.7.0
# via opentelemetry-api # via opentelemetry-api
jinja2==3.1.6 jinja2==3.1.6
# via llama-stack # via llama-stack
jiter==0.8.2 jiter==0.10.0
# via openai # via openai
jsonschema==4.23.0 jsonschema==4.25.0
# via llama-stack # via llama-stack
jsonschema-specifications==2024.10.1 jsonschema-specifications==2025.4.1
# via jsonschema # via jsonschema
llama-api-client==0.1.2 llama-api-client==0.1.2
# via llama-stack # via llama-stack
@ -114,79 +109,79 @@ markupsafe==3.0.2
# via jinja2 # via jinja2
mdurl==0.1.2 mdurl==0.1.2
# via markdown-it-py # via markdown-it-py
multidict==6.1.0 multidict==6.6.3
# via # via
# aiohttp # aiohttp
# yarl # yarl
numpy==2.2.3 numpy==2.3.1
# via pandas # via pandas
openai==1.71.0 openai==1.97.1
# via llama-stack # via llama-stack
opentelemetry-api==1.30.0 opentelemetry-api==1.35.0
# via # via
# opentelemetry-exporter-otlp-proto-http # opentelemetry-exporter-otlp-proto-http
# opentelemetry-sdk # opentelemetry-sdk
# opentelemetry-semantic-conventions # opentelemetry-semantic-conventions
opentelemetry-exporter-otlp-proto-common==1.30.0 opentelemetry-exporter-otlp-proto-common==1.35.0
# via opentelemetry-exporter-otlp-proto-http # via opentelemetry-exporter-otlp-proto-http
opentelemetry-exporter-otlp-proto-http==1.30.0 opentelemetry-exporter-otlp-proto-http==1.35.0
# via llama-stack # via llama-stack
opentelemetry-proto==1.30.0 opentelemetry-proto==1.35.0
# via # via
# opentelemetry-exporter-otlp-proto-common # opentelemetry-exporter-otlp-proto-common
# opentelemetry-exporter-otlp-proto-http # opentelemetry-exporter-otlp-proto-http
opentelemetry-sdk==1.30.0 opentelemetry-sdk==1.35.0
# via # via
# llama-stack # llama-stack
# opentelemetry-exporter-otlp-proto-http # opentelemetry-exporter-otlp-proto-http
opentelemetry-semantic-conventions==0.51b0 opentelemetry-semantic-conventions==0.56b0
# via opentelemetry-sdk # via opentelemetry-sdk
packaging==24.2 packaging==25.0
# via huggingface-hub # via huggingface-hub
pandas==2.2.3 pandas==2.3.1
# via llama-stack-client # via llama-stack-client
pillow==11.1.0 pillow==11.3.0
# via llama-stack # via llama-stack
prompt-toolkit==3.0.50 prompt-toolkit==3.0.51
# via # via
# llama-stack # llama-stack
# llama-stack-client # llama-stack-client
propcache==0.3.0 propcache==0.3.2
# via # via
# aiohttp # aiohttp
# yarl # yarl
protobuf==5.29.5 protobuf==6.31.1
# via # via
# googleapis-common-protos # googleapis-common-protos
# opentelemetry-proto # opentelemetry-proto
pyaml==25.1.0 pyaml==25.7.0
# via llama-stack-client # via llama-stack-client
pyasn1==0.4.8 pyasn1==0.6.1
# via # via
# python-jose # python-jose
# rsa # rsa
pycparser==2.22 ; platform_python_implementation != 'PyPy' pycparser==2.22 ; platform_python_implementation != 'PyPy'
# via cffi # via cffi
pydantic==2.10.6 pydantic==2.11.7
# via # via
# fastapi # fastapi
# llama-api-client # llama-api-client
# llama-stack # llama-stack
# llama-stack-client # llama-stack-client
# openai # openai
pydantic-core==2.27.2 pydantic-core==2.33.2
# via pydantic # via pydantic
pygments==2.19.1 pygments==2.19.2
# via rich # via rich
python-dateutil==2.9.0.post0 python-dateutil==2.9.0.post0
# via pandas # via pandas
python-dotenv==1.0.1 python-dotenv==1.1.1
# via llama-stack # via llama-stack
python-jose==3.4.0 python-jose==3.5.0
# via llama-stack # via llama-stack
python-multipart==0.0.20 python-multipart==0.0.20
# via llama-stack # via llama-stack
pytz==2025.1 pytz==2025.2
# via pandas # via pandas
pyyaml==6.0.2 pyyaml==6.0.2
# via # via
@ -204,15 +199,15 @@ requests==2.32.4
# llama-stack-client # llama-stack-client
# opentelemetry-exporter-otlp-proto-http # opentelemetry-exporter-otlp-proto-http
# tiktoken # tiktoken
rich==13.9.4 rich==14.0.0
# via # via
# llama-stack # llama-stack
# llama-stack-client # llama-stack-client
rpds-py==0.22.3 rpds-py==0.26.0
# via # via
# jsonschema # jsonschema
# referencing # referencing
rsa==4.9 rsa==4.9.1
# via python-jose # via python-jose
six==1.17.0 six==1.17.0
# via # via
@ -224,11 +219,11 @@ sniffio==1.3.1
# llama-api-client # llama-api-client
# llama-stack-client # llama-stack-client
# openai # openai
starlette==0.45.3 starlette==0.47.2
# via # via
# fastapi # fastapi
# llama-stack # llama-stack
termcolor==2.5.0 termcolor==3.1.0
# via # via
# fire # fire
# llama-stack # llama-stack
@ -240,8 +235,9 @@ tqdm==4.67.1
# huggingface-hub # huggingface-hub
# llama-stack-client # llama-stack-client
# openai # openai
typing-extensions==4.12.2 typing-extensions==4.14.1
# via # via
# aiosignal
# aiosqlite # aiosqlite
# anyio # anyio
# fastapi # fastapi
@ -249,21 +245,26 @@ typing-extensions==4.12.2
# llama-api-client # llama-api-client
# llama-stack-client # llama-stack-client
# openai # openai
# opentelemetry-api
# opentelemetry-exporter-otlp-proto-http
# opentelemetry-sdk # opentelemetry-sdk
# opentelemetry-semantic-conventions
# pydantic # pydantic
# pydantic-core # pydantic-core
# referencing # referencing
tzdata==2025.1 # starlette
# typing-inspection
typing-inspection==0.4.1
# via pydantic
tzdata==2025.2
# via pandas # via pandas
urllib3==2.5.0 urllib3==2.5.0
# via requests # via requests
uvicorn==0.34.0 uvicorn==0.35.0
# via llama-stack # via llama-stack
wcwidth==0.2.13 wcwidth==0.2.13
# via prompt-toolkit # via prompt-toolkit
wrapt==1.17.2 yarl==1.20.1
# via deprecated
yarl==1.18.3
# via aiohttp # via aiohttp
zipp==3.21.0 zipp==3.23.0
# via importlib-metadata # via importlib-metadata

View file

@ -5,8 +5,14 @@
# the root directory of this source tree. # the root directory of this source tree.
import base64
import os
import tempfile
import pytest import pytest
from openai import OpenAI from openai import OpenAI
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from llama_stack.distribution.library_client import LlamaStackAsLibraryClient from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
@ -82,6 +88,14 @@ def skip_if_provider_isnt_vllm(client_with_models, model_id):
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support vllm extra_body parameters.") pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support vllm extra_body parameters.")
def skip_if_provider_isnt_openai(client_with_models, model_id):
provider = provider_from_model(client_with_models, model_id)
if provider.provider_type != "remote::openai":
pytest.skip(
f"Model {model_id} hosted by {provider.provider_type} doesn't support chat completion calls with base64 encoded files."
)
@pytest.fixture @pytest.fixture
def openai_client(client_with_models): def openai_client(client_with_models):
base_url = f"{client_with_models.base_url}/v1/openai/v1" base_url = f"{client_with_models.base_url}/v1/openai/v1"
@ -418,3 +432,45 @@ def test_inference_store_tool_calls(compat_client, client_with_models, text_mode
# failed tool call parses show up as a message with content, so ensure # failed tool call parses show up as a message with content, so ensure
# that the retrieve response content matches the original request # that the retrieve response content matches the original request
assert retrieved_response.choices[0].message.content == content assert retrieved_response.choices[0].message.content == content
def test_openai_chat_completion_non_streaming_with_file(openai_client, client_with_models, text_model_id):
skip_if_provider_isnt_openai(client_with_models, text_model_id)
# Generate temporary PDF with "Hello World" text
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_pdf:
c = canvas.Canvas(temp_pdf.name, pagesize=letter)
c.drawString(100, 750, "Hello World")
c.save()
# Read the PDF and sencode to base64
with open(temp_pdf.name, "rb") as pdf_file:
pdf_base64 = base64.b64encode(pdf_file.read()).decode("utf-8")
# Clean up temporary file
os.unlink(temp_pdf.name)
response = openai_client.chat.completions.create(
model=text_model_id,
messages=[
{
"role": "user",
"content": "Describe what you see in this PDF file.",
},
{
"role": "user",
"content": [
{
"type": "file",
"file": {
"filename": "my-temp-hello-world-pdf",
"file_data": f"data:application/pdf;base64,{pdf_base64}",
},
}
],
},
],
stream=False,
)
message_content = response.choices[0].message.content.lower().strip()
assert "hello world" in message_content

3998
uv.lock generated

File diff suppressed because it is too large Load diff