mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-30 23:51:00 +00:00
feat: add base64 encoded PDF support for OpenAI Chat Completions (#2881)
Some checks failed
Coverage Badge / unit-tests (push) Failing after 1s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
Integration Tests / discover-tests (push) Successful in 3s
Test Llama Stack Build / generate-matrix (push) Successful in 6s
Vector IO Integration Tests / test-matrix (3.12, inline::milvus) (push) Failing after 12s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 7s
Vector IO Integration Tests / test-matrix (3.12, inline::sqlite-vec) (push) Failing after 13s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 9s
Unit Tests / unit-tests (3.12) (push) Failing after 8s
Vector IO Integration Tests / test-matrix (3.13, inline::faiss) (push) Failing after 14s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Failing after 10s
Vector IO Integration Tests / test-matrix (3.13, inline::sqlite-vec) (push) Failing after 13s
Unit Tests / unit-tests (3.13) (push) Failing after 10s
Test Llama Stack Build / build-single-provider (push) Failing after 15s
Vector IO Integration Tests / test-matrix (3.13, remote::chromadb) (push) Failing after 14s
Vector IO Integration Tests / test-matrix (3.13, inline::milvus) (push) Failing after 17s
Vector IO Integration Tests / test-matrix (3.12, inline::faiss) (push) Failing after 19s
Test External API and Providers / test-external (venv) (push) Failing after 16s
Test Llama Stack Build / build (push) Failing after 9s
Python Package Build Test / build (3.12) (push) Failing after 23s
Update ReadTheDocs / update-readthedocs (push) Failing after 21s
Vector IO Integration Tests / test-matrix (3.13, remote::pgvector) (push) Failing after 27s
Vector IO Integration Tests / test-matrix (3.12, remote::chromadb) (push) Failing after 29s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 31s
Vector IO Integration Tests / test-matrix (3.12, remote::pgvector) (push) Failing after 58s
Python Package Build Test / build (3.13) (push) Failing after 54s
Integration Tests / test-matrix (push) Failing after 56s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 1m4s
Pre-commit / pre-commit (push) Successful in 2m15s
Some checks failed
Coverage Badge / unit-tests (push) Failing after 1s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
Integration Tests / discover-tests (push) Successful in 3s
Test Llama Stack Build / generate-matrix (push) Successful in 6s
Vector IO Integration Tests / test-matrix (3.12, inline::milvus) (push) Failing after 12s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 7s
Vector IO Integration Tests / test-matrix (3.12, inline::sqlite-vec) (push) Failing after 13s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 9s
Unit Tests / unit-tests (3.12) (push) Failing after 8s
Vector IO Integration Tests / test-matrix (3.13, inline::faiss) (push) Failing after 14s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Failing after 10s
Vector IO Integration Tests / test-matrix (3.13, inline::sqlite-vec) (push) Failing after 13s
Unit Tests / unit-tests (3.13) (push) Failing after 10s
Test Llama Stack Build / build-single-provider (push) Failing after 15s
Vector IO Integration Tests / test-matrix (3.13, remote::chromadb) (push) Failing after 14s
Vector IO Integration Tests / test-matrix (3.13, inline::milvus) (push) Failing after 17s
Vector IO Integration Tests / test-matrix (3.12, inline::faiss) (push) Failing after 19s
Test External API and Providers / test-external (venv) (push) Failing after 16s
Test Llama Stack Build / build (push) Failing after 9s
Python Package Build Test / build (3.12) (push) Failing after 23s
Update ReadTheDocs / update-readthedocs (push) Failing after 21s
Vector IO Integration Tests / test-matrix (3.13, remote::pgvector) (push) Failing after 27s
Vector IO Integration Tests / test-matrix (3.12, remote::chromadb) (push) Failing after 29s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 31s
Vector IO Integration Tests / test-matrix (3.12, remote::pgvector) (push) Failing after 58s
Python Package Build Test / build (3.13) (push) Failing after 54s
Integration Tests / test-matrix (push) Failing after 56s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 1m4s
Pre-commit / pre-commit (push) Successful in 2m15s
# What does this PR do? OpenAI Chat Completions supports passing a base64 encoded PDF file to a model, but Llama Stack currently does not allow for this behavior. This PR extends our implementation of the OpenAI API spec to change that. Closes #2129 ## Test Plan A new functional test has been added to test the validity of such a request Signed-off-by: Nathan Weinberg <nweinber@redhat.com>
This commit is contained in:
parent
cf8722079c
commit
870a37ff4b
6 changed files with 1514 additions and 1200 deletions
41
docs/_static/llama-stack-spec.html
vendored
41
docs/_static/llama-stack-spec.html
vendored
|
@ -9821,13 +9821,17 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
|
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/components/schemas/OpenAIFile"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"discriminator": {
|
"discriminator": {
|
||||||
"propertyName": "type",
|
"propertyName": "type",
|
||||||
"mapping": {
|
"mapping": {
|
||||||
"text": "#/components/schemas/OpenAIChatCompletionContentPartTextParam",
|
"text": "#/components/schemas/OpenAIChatCompletionContentPartTextParam",
|
||||||
"image_url": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
|
"image_url": "#/components/schemas/OpenAIChatCompletionContentPartImageParam",
|
||||||
|
"file": "#/components/schemas/OpenAIFile"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -9974,6 +9978,41 @@
|
||||||
"title": "OpenAIDeveloperMessageParam",
|
"title": "OpenAIDeveloperMessageParam",
|
||||||
"description": "A message from the developer in an OpenAI-compatible chat completion request."
|
"description": "A message from the developer in an OpenAI-compatible chat completion request."
|
||||||
},
|
},
|
||||||
|
"OpenAIFile": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "file",
|
||||||
|
"default": "file"
|
||||||
|
},
|
||||||
|
"file": {
|
||||||
|
"$ref": "#/components/schemas/OpenAIFileFile"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"type",
|
||||||
|
"file"
|
||||||
|
],
|
||||||
|
"title": "OpenAIFile"
|
||||||
|
},
|
||||||
|
"OpenAIFileFile": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"file_data": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"file_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"filename": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"title": "OpenAIFileFile"
|
||||||
|
},
|
||||||
"OpenAIImageURL": {
|
"OpenAIImageURL": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
|
27
docs/_static/llama-stack-spec.yaml
vendored
27
docs/_static/llama-stack-spec.yaml
vendored
|
@ -6934,11 +6934,13 @@ components:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
|
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
|
||||||
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
|
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
|
||||||
|
- $ref: '#/components/schemas/OpenAIFile'
|
||||||
discriminator:
|
discriminator:
|
||||||
propertyName: type
|
propertyName: type
|
||||||
mapping:
|
mapping:
|
||||||
text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
|
text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
|
||||||
image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
|
image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
|
||||||
|
file: '#/components/schemas/OpenAIFile'
|
||||||
OpenAIChatCompletionContentPartTextParam:
|
OpenAIChatCompletionContentPartTextParam:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -7050,6 +7052,31 @@ components:
|
||||||
title: OpenAIDeveloperMessageParam
|
title: OpenAIDeveloperMessageParam
|
||||||
description: >-
|
description: >-
|
||||||
A message from the developer in an OpenAI-compatible chat completion request.
|
A message from the developer in an OpenAI-compatible chat completion request.
|
||||||
|
OpenAIFile:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
const: file
|
||||||
|
default: file
|
||||||
|
file:
|
||||||
|
$ref: '#/components/schemas/OpenAIFileFile'
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
- file
|
||||||
|
title: OpenAIFile
|
||||||
|
OpenAIFileFile:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
file_data:
|
||||||
|
type: string
|
||||||
|
file_id:
|
||||||
|
type: string
|
||||||
|
filename:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
|
title: OpenAIFileFile
|
||||||
OpenAIImageURL:
|
OpenAIImageURL:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
|
@ -455,8 +455,21 @@ class OpenAIChatCompletionContentPartImageParam(BaseModel):
|
||||||
image_url: OpenAIImageURL
|
image_url: OpenAIImageURL
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class OpenAIFileFile(BaseModel):
|
||||||
|
file_data: str | None = None
|
||||||
|
file_id: str | None = None
|
||||||
|
filename: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class OpenAIFile(BaseModel):
|
||||||
|
type: Literal["file"] = "file"
|
||||||
|
file: OpenAIFileFile
|
||||||
|
|
||||||
|
|
||||||
OpenAIChatCompletionContentPartParam = Annotated[
|
OpenAIChatCompletionContentPartParam = Annotated[
|
||||||
OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam,
|
OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile,
|
||||||
Field(discriminator="type"),
|
Field(discriminator="type"),
|
||||||
]
|
]
|
||||||
register_schema(OpenAIChatCompletionContentPartParam, name="OpenAIChatCompletionContentPartParam")
|
register_schema(OpenAIChatCompletionContentPartParam, name="OpenAIChatCompletionContentPartParam")
|
||||||
|
|
|
@ -114,6 +114,7 @@ test = [
|
||||||
"sqlalchemy[asyncio]>=2.0.41",
|
"sqlalchemy[asyncio]>=2.0.41",
|
||||||
"requests",
|
"requests",
|
||||||
"pymilvus>=2.5.12",
|
"pymilvus>=2.5.12",
|
||||||
|
"reportlab",
|
||||||
]
|
]
|
||||||
docs = [
|
docs = [
|
||||||
"setuptools",
|
"setuptools",
|
||||||
|
|
|
@ -5,8 +5,14 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
|
from reportlab.lib.pagesizes import letter
|
||||||
|
from reportlab.pdfgen import canvas
|
||||||
|
|
||||||
from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
|
from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
|
||||||
|
|
||||||
|
@ -82,6 +88,14 @@ def skip_if_provider_isnt_vllm(client_with_models, model_id):
|
||||||
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support vllm extra_body parameters.")
|
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support vllm extra_body parameters.")
|
||||||
|
|
||||||
|
|
||||||
|
def skip_if_provider_isnt_openai(client_with_models, model_id):
|
||||||
|
provider = provider_from_model(client_with_models, model_id)
|
||||||
|
if provider.provider_type != "remote::openai":
|
||||||
|
pytest.skip(
|
||||||
|
f"Model {model_id} hosted by {provider.provider_type} doesn't support chat completion calls with base64 encoded files."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def openai_client(client_with_models):
|
def openai_client(client_with_models):
|
||||||
base_url = f"{client_with_models.base_url}/v1/openai/v1"
|
base_url = f"{client_with_models.base_url}/v1/openai/v1"
|
||||||
|
@ -418,3 +432,45 @@ def test_inference_store_tool_calls(compat_client, client_with_models, text_mode
|
||||||
# failed tool call parses show up as a message with content, so ensure
|
# failed tool call parses show up as a message with content, so ensure
|
||||||
# that the retrieve response content matches the original request
|
# that the retrieve response content matches the original request
|
||||||
assert retrieved_response.choices[0].message.content == content
|
assert retrieved_response.choices[0].message.content == content
|
||||||
|
|
||||||
|
|
||||||
|
def test_openai_chat_completion_non_streaming_with_file(openai_client, client_with_models, text_model_id):
|
||||||
|
skip_if_provider_isnt_openai(client_with_models, text_model_id)
|
||||||
|
|
||||||
|
# Generate temporary PDF with "Hello World" text
|
||||||
|
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_pdf:
|
||||||
|
c = canvas.Canvas(temp_pdf.name, pagesize=letter)
|
||||||
|
c.drawString(100, 750, "Hello World")
|
||||||
|
c.save()
|
||||||
|
|
||||||
|
# Read the PDF and sencode to base64
|
||||||
|
with open(temp_pdf.name, "rb") as pdf_file:
|
||||||
|
pdf_base64 = base64.b64encode(pdf_file.read()).decode("utf-8")
|
||||||
|
|
||||||
|
# Clean up temporary file
|
||||||
|
os.unlink(temp_pdf.name)
|
||||||
|
|
||||||
|
response = openai_client.chat.completions.create(
|
||||||
|
model=text_model_id,
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Describe what you see in this PDF file.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "file",
|
||||||
|
"file": {
|
||||||
|
"filename": "my-temp-hello-world-pdf",
|
||||||
|
"file_data": f"data:application/pdf;base64,{pdf_base64}",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
stream=False,
|
||||||
|
)
|
||||||
|
message_content = response.choices[0].message.content.lower().strip()
|
||||||
|
assert "hello world" in message_content
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue