This commit is contained in:
Nathan Weinberg 2025-07-24 21:54:23 -04:00 committed by GitHub
commit e6d03d35dc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 2241 additions and 2014 deletions

View file

@ -9821,13 +9821,17 @@
}, },
{ {
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam" "$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
},
{
"$ref": "#/components/schemas/OpenAIFile"
} }
], ],
"discriminator": { "discriminator": {
"propertyName": "type", "propertyName": "type",
"mapping": { "mapping": {
"text": "#/components/schemas/OpenAIChatCompletionContentPartTextParam", "text": "#/components/schemas/OpenAIChatCompletionContentPartTextParam",
"image_url": "#/components/schemas/OpenAIChatCompletionContentPartImageParam" "image_url": "#/components/schemas/OpenAIChatCompletionContentPartImageParam",
"file": "#/components/schemas/OpenAIFile"
} }
} }
}, },
@ -9974,6 +9978,41 @@
"title": "OpenAIDeveloperMessageParam", "title": "OpenAIDeveloperMessageParam",
"description": "A message from the developer in an OpenAI-compatible chat completion request." "description": "A message from the developer in an OpenAI-compatible chat completion request."
}, },
"OpenAIFile": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "file",
"default": "file"
},
"file": {
"$ref": "#/components/schemas/OpenAIFileFile"
}
},
"additionalProperties": false,
"required": [
"type",
"file"
],
"title": "OpenAIFile"
},
"OpenAIFileFile": {
"type": "object",
"properties": {
"file_data": {
"type": "string"
},
"file_id": {
"type": "string"
},
"filename": {
"type": "string"
}
},
"additionalProperties": false,
"title": "OpenAIFileFile"
},
"OpenAIImageURL": { "OpenAIImageURL": {
"type": "object", "type": "object",
"properties": { "properties": {

View file

@ -6934,11 +6934,13 @@ components:
oneOf: oneOf:
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
- $ref: '#/components/schemas/OpenAIFile'
discriminator: discriminator:
propertyName: type propertyName: type
mapping: mapping:
text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
file: '#/components/schemas/OpenAIFile'
OpenAIChatCompletionContentPartTextParam: OpenAIChatCompletionContentPartTextParam:
type: object type: object
properties: properties:
@ -7050,6 +7052,31 @@ components:
title: OpenAIDeveloperMessageParam title: OpenAIDeveloperMessageParam
description: >- description: >-
A message from the developer in an OpenAI-compatible chat completion request. A message from the developer in an OpenAI-compatible chat completion request.
OpenAIFile:
type: object
properties:
type:
type: string
const: file
default: file
file:
$ref: '#/components/schemas/OpenAIFileFile'
additionalProperties: false
required:
- type
- file
title: OpenAIFile
OpenAIFileFile:
type: object
properties:
file_data:
type: string
file_id:
type: string
filename:
type: string
additionalProperties: false
title: OpenAIFileFile
OpenAIImageURL: OpenAIImageURL:
type: object type: object
properties: properties:

View file

@ -455,8 +455,21 @@ class OpenAIChatCompletionContentPartImageParam(BaseModel):
image_url: OpenAIImageURL image_url: OpenAIImageURL
@json_schema_type
class OpenAIFileFile(BaseModel):
file_data: str | None = None
file_id: str | None = None
filename: str | None = None
@json_schema_type
class OpenAIFile(BaseModel):
type: Literal["file"] = "file"
file: OpenAIFileFile
OpenAIChatCompletionContentPartParam = Annotated[ OpenAIChatCompletionContentPartParam = Annotated[
OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam, OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile,
Field(discriminator="type"), Field(discriminator="type"),
] ]
register_schema(OpenAIChatCompletionContentPartParam, name="OpenAIChatCompletionContentPartParam") register_schema(OpenAIChatCompletionContentPartParam, name="OpenAIChatCompletionContentPartParam")

View file

@ -114,6 +114,7 @@ test = [
"sqlalchemy[asyncio]>=2.0.41", "sqlalchemy[asyncio]>=2.0.41",
"requests", "requests",
"pymilvus>=2.5.12", "pymilvus>=2.5.12",
"reportlab",
] ]
docs = [ docs = [
"setuptools", "setuptools",

View file

@ -1,16 +1,16 @@
# This file was autogenerated by uv via the following command: # This file was autogenerated by uv via the following command:
# uv export --frozen --no-hashes --no-emit-project --no-default-groups --output-file=requirements.txt # uv export --frozen --no-hashes --no-emit-project --no-default-groups --output-file=requirements.txt
aiohappyeyeballs==2.5.0 aiohappyeyeballs==2.6.1
# via aiohttp # via aiohttp
aiohttp==3.12.13 aiohttp==3.12.14
# via llama-stack # via llama-stack
aiosignal==1.3.2 aiosignal==1.4.0
# via aiohttp # via aiohttp
aiosqlite==0.21.0 aiosqlite==0.21.0
# via llama-stack # via llama-stack
annotated-types==0.7.0 annotated-types==0.7.0
# via pydantic # via pydantic
anyio==4.8.0 anyio==4.9.0
# via # via
# httpx # httpx
# llama-api-client # llama-api-client
@ -19,21 +19,21 @@ anyio==4.8.0
# starlette # starlette
asyncpg==0.30.0 asyncpg==0.30.0
# via llama-stack # via llama-stack
attrs==25.1.0 attrs==25.3.0
# via # via
# aiohttp # aiohttp
# jsonschema # jsonschema
# referencing # referencing
certifi==2025.1.31 certifi==2025.7.14
# via # via
# httpcore # httpcore
# httpx # httpx
# requests # requests
cffi==1.17.1 ; platform_python_implementation != 'PyPy' cffi==1.17.1 ; platform_python_implementation != 'PyPy'
# via cryptography # via cryptography
charset-normalizer==3.4.1 charset-normalizer==3.4.2
# via requests # via requests
click==8.1.8 click==8.2.1
# via # via
# llama-stack-client # llama-stack-client
# uvicorn # uvicorn
@ -43,11 +43,6 @@ colorama==0.4.6 ; sys_platform == 'win32'
# tqdm # tqdm
cryptography==45.0.5 cryptography==45.0.5
# via python-jose # via python-jose
deprecated==1.2.18
# via
# opentelemetry-api
# opentelemetry-exporter-otlp-proto-http
# opentelemetry-semantic-conventions
distro==1.9.0 distro==1.9.0
# via # via
# llama-api-client # llama-api-client
@ -55,21 +50,21 @@ distro==1.9.0
# openai # openai
ecdsa==0.19.1 ecdsa==0.19.1
# via python-jose # via python-jose
fastapi==0.115.8 fastapi==0.116.1
# via llama-stack # via llama-stack
filelock==3.17.0 filelock==3.18.0
# via huggingface-hub # via huggingface-hub
fire==0.7.0 fire==0.7.0
# via # via
# llama-stack # llama-stack
# llama-stack-client # llama-stack-client
frozenlist==1.5.0 frozenlist==1.7.0
# via # via
# aiohttp # aiohttp
# aiosignal # aiosignal
fsspec==2024.12.0 fsspec==2025.3.0
# via huggingface-hub # via huggingface-hub
googleapis-common-protos==1.67.0 googleapis-common-protos==1.70.0
# via opentelemetry-exporter-otlp-proto-http # via opentelemetry-exporter-otlp-proto-http
h11==0.16.0 h11==0.16.0
# via # via
@ -86,7 +81,7 @@ httpx==0.28.1
# llama-stack # llama-stack
# llama-stack-client # llama-stack-client
# openai # openai
huggingface-hub==0.33.0 huggingface-hub==0.33.5
# via llama-stack # via llama-stack
idna==3.10 idna==3.10
# via # via
@ -94,15 +89,15 @@ idna==3.10
# httpx # httpx
# requests # requests
# yarl # yarl
importlib-metadata==8.5.0 importlib-metadata==8.7.0
# via opentelemetry-api # via opentelemetry-api
jinja2==3.1.6 jinja2==3.1.6
# via llama-stack # via llama-stack
jiter==0.8.2 jiter==0.10.0
# via openai # via openai
jsonschema==4.23.0 jsonschema==4.25.0
# via llama-stack # via llama-stack
jsonschema-specifications==2024.10.1 jsonschema-specifications==2025.4.1
# via jsonschema # via jsonschema
llama-api-client==0.1.2 llama-api-client==0.1.2
# via llama-stack # via llama-stack
@ -114,79 +109,79 @@ markupsafe==3.0.2
# via jinja2 # via jinja2
mdurl==0.1.2 mdurl==0.1.2
# via markdown-it-py # via markdown-it-py
multidict==6.1.0 multidict==6.6.3
# via # via
# aiohttp # aiohttp
# yarl # yarl
numpy==2.2.3 numpy==2.3.1
# via pandas # via pandas
openai==1.71.0 openai==1.97.1
# via llama-stack # via llama-stack
opentelemetry-api==1.30.0 opentelemetry-api==1.35.0
# via # via
# opentelemetry-exporter-otlp-proto-http # opentelemetry-exporter-otlp-proto-http
# opentelemetry-sdk # opentelemetry-sdk
# opentelemetry-semantic-conventions # opentelemetry-semantic-conventions
opentelemetry-exporter-otlp-proto-common==1.30.0 opentelemetry-exporter-otlp-proto-common==1.35.0
# via opentelemetry-exporter-otlp-proto-http # via opentelemetry-exporter-otlp-proto-http
opentelemetry-exporter-otlp-proto-http==1.30.0 opentelemetry-exporter-otlp-proto-http==1.35.0
# via llama-stack # via llama-stack
opentelemetry-proto==1.30.0 opentelemetry-proto==1.35.0
# via # via
# opentelemetry-exporter-otlp-proto-common # opentelemetry-exporter-otlp-proto-common
# opentelemetry-exporter-otlp-proto-http # opentelemetry-exporter-otlp-proto-http
opentelemetry-sdk==1.30.0 opentelemetry-sdk==1.35.0
# via # via
# llama-stack # llama-stack
# opentelemetry-exporter-otlp-proto-http # opentelemetry-exporter-otlp-proto-http
opentelemetry-semantic-conventions==0.51b0 opentelemetry-semantic-conventions==0.56b0
# via opentelemetry-sdk # via opentelemetry-sdk
packaging==24.2 packaging==25.0
# via huggingface-hub # via huggingface-hub
pandas==2.2.3 pandas==2.3.1
# via llama-stack-client # via llama-stack-client
pillow==11.1.0 pillow==11.3.0
# via llama-stack # via llama-stack
prompt-toolkit==3.0.50 prompt-toolkit==3.0.51
# via # via
# llama-stack # llama-stack
# llama-stack-client # llama-stack-client
propcache==0.3.0 propcache==0.3.2
# via # via
# aiohttp # aiohttp
# yarl # yarl
protobuf==5.29.5 protobuf==6.31.1
# via # via
# googleapis-common-protos # googleapis-common-protos
# opentelemetry-proto # opentelemetry-proto
pyaml==25.1.0 pyaml==25.7.0
# via llama-stack-client # via llama-stack-client
pyasn1==0.4.8 pyasn1==0.6.1
# via # via
# python-jose # python-jose
# rsa # rsa
pycparser==2.22 ; platform_python_implementation != 'PyPy' pycparser==2.22 ; platform_python_implementation != 'PyPy'
# via cffi # via cffi
pydantic==2.10.6 pydantic==2.11.7
# via # via
# fastapi # fastapi
# llama-api-client # llama-api-client
# llama-stack # llama-stack
# llama-stack-client # llama-stack-client
# openai # openai
pydantic-core==2.27.2 pydantic-core==2.33.2
# via pydantic # via pydantic
pygments==2.19.1 pygments==2.19.2
# via rich # via rich
python-dateutil==2.9.0.post0 python-dateutil==2.9.0.post0
# via pandas # via pandas
python-dotenv==1.0.1 python-dotenv==1.1.1
# via llama-stack # via llama-stack
python-jose==3.4.0 python-jose==3.5.0
# via llama-stack # via llama-stack
python-multipart==0.0.20 python-multipart==0.0.20
# via llama-stack # via llama-stack
pytz==2025.1 pytz==2025.2
# via pandas # via pandas
pyyaml==6.0.2 pyyaml==6.0.2
# via # via
@ -204,15 +199,15 @@ requests==2.32.4
# llama-stack-client # llama-stack-client
# opentelemetry-exporter-otlp-proto-http # opentelemetry-exporter-otlp-proto-http
# tiktoken # tiktoken
rich==13.9.4 rich==14.0.0
# via # via
# llama-stack # llama-stack
# llama-stack-client # llama-stack-client
rpds-py==0.22.3 rpds-py==0.26.0
# via # via
# jsonschema # jsonschema
# referencing # referencing
rsa==4.9 rsa==4.9.1
# via python-jose # via python-jose
six==1.17.0 six==1.17.0
# via # via
@ -224,11 +219,11 @@ sniffio==1.3.1
# llama-api-client # llama-api-client
# llama-stack-client # llama-stack-client
# openai # openai
starlette==0.45.3 starlette==0.47.2
# via # via
# fastapi # fastapi
# llama-stack # llama-stack
termcolor==2.5.0 termcolor==3.1.0
# via # via
# fire # fire
# llama-stack # llama-stack
@ -240,8 +235,9 @@ tqdm==4.67.1
# huggingface-hub # huggingface-hub
# llama-stack-client # llama-stack-client
# openai # openai
typing-extensions==4.12.2 typing-extensions==4.14.1
# via # via
# aiosignal
# aiosqlite # aiosqlite
# anyio # anyio
# fastapi # fastapi
@ -249,21 +245,26 @@ typing-extensions==4.12.2
# llama-api-client # llama-api-client
# llama-stack-client # llama-stack-client
# openai # openai
# opentelemetry-api
# opentelemetry-exporter-otlp-proto-http
# opentelemetry-sdk # opentelemetry-sdk
# opentelemetry-semantic-conventions
# pydantic # pydantic
# pydantic-core # pydantic-core
# referencing # referencing
tzdata==2025.1 # starlette
# typing-inspection
typing-inspection==0.4.1
# via pydantic
tzdata==2025.2
# via pandas # via pandas
urllib3==2.5.0 urllib3==2.5.0
# via requests # via requests
uvicorn==0.34.0 uvicorn==0.35.0
# via llama-stack # via llama-stack
wcwidth==0.2.13 wcwidth==0.2.13
# via prompt-toolkit # via prompt-toolkit
wrapt==1.17.2 yarl==1.20.1
# via deprecated
yarl==1.18.3
# via aiohttp # via aiohttp
zipp==3.21.0 zipp==3.23.0
# via importlib-metadata # via importlib-metadata

View file

@ -5,8 +5,14 @@
# the root directory of this source tree. # the root directory of this source tree.
import base64
import os
import tempfile
import pytest import pytest
from openai import OpenAI from openai import OpenAI
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from llama_stack.distribution.library_client import LlamaStackAsLibraryClient from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
@ -82,6 +88,14 @@ def skip_if_provider_isnt_vllm(client_with_models, model_id):
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support vllm extra_body parameters.") pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support vllm extra_body parameters.")
def skip_if_provider_isnt_openai(client_with_models, model_id):
provider = provider_from_model(client_with_models, model_id)
if provider.provider_type != "remote::openai":
pytest.skip(
f"Model {model_id} hosted by {provider.provider_type} doesn't support chat completion calls with base64 encoded files."
)
@pytest.fixture @pytest.fixture
def openai_client(client_with_models): def openai_client(client_with_models):
base_url = f"{client_with_models.base_url}/v1/openai/v1" base_url = f"{client_with_models.base_url}/v1/openai/v1"
@ -418,3 +432,45 @@ def test_inference_store_tool_calls(compat_client, client_with_models, text_mode
# failed tool call parses show up as a message with content, so ensure # failed tool call parses show up as a message with content, so ensure
# that the retrieve response content matches the original request # that the retrieve response content matches the original request
assert retrieved_response.choices[0].message.content == content assert retrieved_response.choices[0].message.content == content
def test_openai_chat_completion_non_streaming_with_file(openai_client, client_with_models, text_model_id):
skip_if_provider_isnt_openai(client_with_models, text_model_id)
# Generate temporary PDF with "Hello World" text
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_pdf:
c = canvas.Canvas(temp_pdf.name, pagesize=letter)
c.drawString(100, 750, "Hello World")
c.save()
# Read the PDF and sencode to base64
with open(temp_pdf.name, "rb") as pdf_file:
pdf_base64 = base64.b64encode(pdf_file.read()).decode("utf-8")
# Clean up temporary file
os.unlink(temp_pdf.name)
response = openai_client.chat.completions.create(
model=text_model_id,
messages=[
{
"role": "user",
"content": "Describe what you see in this PDF file.",
},
{
"role": "user",
"content": [
{
"type": "file",
"file": {
"filename": "my-temp-hello-world-pdf",
"file_data": f"data:application/pdf;base64,{pdf_base64}",
},
}
],
},
],
stream=False,
)
message_content = response.choices[0].message.content.lower().strip()
assert "hello world" in message_content

3998
uv.lock generated

File diff suppressed because it is too large Load diff