Merge 2a599d9a48 into 3216765c26

2025-07-25 21:57:45 +00:00 · 2025-07-24 21:54:23 -04:00 · 2025-07-24 21:54:23 -04:00 · e6d03d35dc
commit e6d03d35dc
parent 3216765c26 2a599d9a48
7 changed files with 2241 additions and 2014 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -9821,13 +9821,17 @@
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIFile"
                    }
                ],
                "discriminator": {
                    "propertyName": "type",
                    "mapping": {
                        "text": "#/components/schemas/OpenAIChatCompletionContentPartTextParam",
-                        "image_url": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
+                        "image_url": "#/components/schemas/OpenAIChatCompletionContentPartImageParam",
+                        "file": "#/components/schemas/OpenAIFile"
                    }
                }
            },
@ -9974,6 +9978,41 @@
                "title": "OpenAIDeveloperMessageParam",
                "description": "A message from the developer in an OpenAI-compatible chat completion request."
            },
+            "OpenAIFile": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "file",
+                        "default": "file"
+                    },
+                    "file": {
+                        "$ref": "#/components/schemas/OpenAIFileFile"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "file"
+                ],
+                "title": "OpenAIFile"
+            },
+            "OpenAIFileFile": {
+                "type": "object",
+                "properties": {
+                    "file_data": {
+                        "type": "string"
+                    },
+                    "file_id": {
+                        "type": "string"
+                    },
+                    "filename": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "title": "OpenAIFileFile"
+            },
            "OpenAIImageURL": {
                "type": "object",
                "properties": {
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -6934,11 +6934,13 @@ components:
      oneOf:
        - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
        - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+        - $ref: '#/components/schemas/OpenAIFile'
      discriminator:
        propertyName: type
        mapping:
          text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
          image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+          file: '#/components/schemas/OpenAIFile'
    OpenAIChatCompletionContentPartTextParam:
      type: object
      properties:
@ -7050,6 +7052,31 @@ components:
      title: OpenAIDeveloperMessageParam
      description: >-
        A message from the developer in an OpenAI-compatible chat completion request.
+    OpenAIFile:
+      type: object
+      properties:
+        type:
+          type: string
+          const: file
+          default: file
+        file:
+          $ref: '#/components/schemas/OpenAIFileFile'
+      additionalProperties: false
+      required:
+        - type
+        - file
+      title: OpenAIFile
+    OpenAIFileFile:
+      type: object
+      properties:
+        file_data:
+          type: string
+        file_id:
+          type: string
+        filename:
+          type: string
+      additionalProperties: false
+      title: OpenAIFileFile
    OpenAIImageURL:
      type: object
      properties:
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@ -455,8 +455,21 @@ class OpenAIChatCompletionContentPartImageParam(BaseModel):
    image_url: OpenAIImageURL


+@json_schema_type
+class OpenAIFileFile(BaseModel):
+    file_data: str | None = None
+    file_id: str | None = None
+    filename: str | None = None
+
+
+@json_schema_type
+class OpenAIFile(BaseModel):
+    type: Literal["file"] = "file"
+    file: OpenAIFileFile
+
+
 OpenAIChatCompletionContentPartParam = Annotated[
-    OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile,
    Field(discriminator="type"),
 ]
 register_schema(OpenAIChatCompletionContentPartParam, name="OpenAIChatCompletionContentPartParam")
--- a/pyproject.toml
+++ b/pyproject.toml
@ -114,6 +114,7 @@ test = [
    "sqlalchemy[asyncio]>=2.0.41",
    "requests",
    "pymilvus>=2.5.12",
+    "reportlab",
 ]
 docs = [
    "setuptools",
--- a/requirements.txt
+++ b/requirements.txt
@ -1,16 +1,16 @@
 # This file was autogenerated by uv via the following command:
 #    uv export --frozen --no-hashes --no-emit-project --no-default-groups --output-file=requirements.txt
-aiohappyeyeballs==2.5.0
+aiohappyeyeballs==2.6.1
    # via aiohttp
-aiohttp==3.12.13
+aiohttp==3.12.14
    # via llama-stack
-aiosignal==1.3.2
+aiosignal==1.4.0
    # via aiohttp
 aiosqlite==0.21.0
    # via llama-stack
 annotated-types==0.7.0
    # via pydantic
-anyio==4.8.0
+anyio==4.9.0
    # via
    #   httpx
    #   llama-api-client
@ -19,21 +19,21 @@ anyio==4.8.0
    #   starlette
 asyncpg==0.30.0
    # via llama-stack
-attrs==25.1.0
+attrs==25.3.0
    # via
    #   aiohttp
    #   jsonschema
    #   referencing
-certifi==2025.1.31
+certifi==2025.7.14
    # via
    #   httpcore
    #   httpx
    #   requests
 cffi==1.17.1 ; platform_python_implementation != 'PyPy'
    # via cryptography
-charset-normalizer==3.4.1
+charset-normalizer==3.4.2
    # via requests
-click==8.1.8
+click==8.2.1
    # via
    #   llama-stack-client
    #   uvicorn
@ -43,11 +43,6 @@ colorama==0.4.6 ; sys_platform == 'win32'
    #   tqdm
 cryptography==45.0.5
    # via python-jose
-deprecated==1.2.18
-    # via
-    #   opentelemetry-api
-    #   opentelemetry-exporter-otlp-proto-http
-    #   opentelemetry-semantic-conventions
 distro==1.9.0
    # via
    #   llama-api-client
@ -55,21 +50,21 @@ distro==1.9.0
    #   openai
 ecdsa==0.19.1
    # via python-jose
-fastapi==0.115.8
+fastapi==0.116.1
    # via llama-stack
-filelock==3.17.0
+filelock==3.18.0
    # via huggingface-hub
 fire==0.7.0
    # via
    #   llama-stack
    #   llama-stack-client
-frozenlist==1.5.0
+frozenlist==1.7.0
    # via
    #   aiohttp
    #   aiosignal
-fsspec==2024.12.0
+fsspec==2025.3.0
    # via huggingface-hub
-googleapis-common-protos==1.67.0
+googleapis-common-protos==1.70.0
    # via opentelemetry-exporter-otlp-proto-http
 h11==0.16.0
    # via
@ -86,7 +81,7 @@ httpx==0.28.1
    #   llama-stack
    #   llama-stack-client
    #   openai
-huggingface-hub==0.33.0
+huggingface-hub==0.33.5
    # via llama-stack
 idna==3.10
    # via
@ -94,15 +89,15 @@ idna==3.10
    #   httpx
    #   requests
    #   yarl
-importlib-metadata==8.5.0
+importlib-metadata==8.7.0
    # via opentelemetry-api
 jinja2==3.1.6
    # via llama-stack
-jiter==0.8.2
+jiter==0.10.0
    # via openai
-jsonschema==4.23.0
+jsonschema==4.25.0
    # via llama-stack
-jsonschema-specifications==2024.10.1
+jsonschema-specifications==2025.4.1
    # via jsonschema
 llama-api-client==0.1.2
    # via llama-stack
@ -114,79 +109,79 @@ markupsafe==3.0.2
    # via jinja2
 mdurl==0.1.2
    # via markdown-it-py
-multidict==6.1.0
+multidict==6.6.3
    # via
    #   aiohttp
    #   yarl
-numpy==2.2.3
+numpy==2.3.1
    # via pandas
-openai==1.71.0
+openai==1.97.1
    # via llama-stack
-opentelemetry-api==1.30.0
+opentelemetry-api==1.35.0
    # via
    #   opentelemetry-exporter-otlp-proto-http
    #   opentelemetry-sdk
    #   opentelemetry-semantic-conventions
-opentelemetry-exporter-otlp-proto-common==1.30.0
+opentelemetry-exporter-otlp-proto-common==1.35.0
    # via opentelemetry-exporter-otlp-proto-http
-opentelemetry-exporter-otlp-proto-http==1.30.0
+opentelemetry-exporter-otlp-proto-http==1.35.0
    # via llama-stack
-opentelemetry-proto==1.30.0
+opentelemetry-proto==1.35.0
    # via
    #   opentelemetry-exporter-otlp-proto-common
    #   opentelemetry-exporter-otlp-proto-http
-opentelemetry-sdk==1.30.0
+opentelemetry-sdk==1.35.0
    # via
    #   llama-stack
    #   opentelemetry-exporter-otlp-proto-http
-opentelemetry-semantic-conventions==0.51b0
+opentelemetry-semantic-conventions==0.56b0
    # via opentelemetry-sdk
-packaging==24.2
+packaging==25.0
    # via huggingface-hub
-pandas==2.2.3
+pandas==2.3.1
    # via llama-stack-client
-pillow==11.1.0
+pillow==11.3.0
    # via llama-stack
-prompt-toolkit==3.0.50
+prompt-toolkit==3.0.51
    # via
    #   llama-stack
    #   llama-stack-client
-propcache==0.3.0
+propcache==0.3.2
    # via
    #   aiohttp
    #   yarl
-protobuf==5.29.5
+protobuf==6.31.1
    # via
    #   googleapis-common-protos
    #   opentelemetry-proto
-pyaml==25.1.0
+pyaml==25.7.0
    # via llama-stack-client
-pyasn1==0.4.8
+pyasn1==0.6.1
    # via
    #   python-jose
    #   rsa
 pycparser==2.22 ; platform_python_implementation != 'PyPy'
    # via cffi
-pydantic==2.10.6
+pydantic==2.11.7
    # via
    #   fastapi
    #   llama-api-client
    #   llama-stack
    #   llama-stack-client
    #   openai
-pydantic-core==2.27.2
+pydantic-core==2.33.2
    # via pydantic
-pygments==2.19.1
+pygments==2.19.2
    # via rich
 python-dateutil==2.9.0.post0
    # via pandas
-python-dotenv==1.0.1
+python-dotenv==1.1.1
    # via llama-stack
-python-jose==3.4.0
+python-jose==3.5.0
    # via llama-stack
 python-multipart==0.0.20
    # via llama-stack
-pytz==2025.1
+pytz==2025.2
    # via pandas
 pyyaml==6.0.2
    # via
@ -204,15 +199,15 @@ requests==2.32.4
    #   llama-stack-client
    #   opentelemetry-exporter-otlp-proto-http
    #   tiktoken
-rich==13.9.4
+rich==14.0.0
    # via
    #   llama-stack
    #   llama-stack-client
-rpds-py==0.22.3
+rpds-py==0.26.0
    # via
    #   jsonschema
    #   referencing
-rsa==4.9
+rsa==4.9.1
    # via python-jose
 six==1.17.0
    # via
@ -224,11 +219,11 @@ sniffio==1.3.1
    #   llama-api-client
    #   llama-stack-client
    #   openai
-starlette==0.45.3
+starlette==0.47.2
    # via
    #   fastapi
    #   llama-stack
-termcolor==2.5.0
+termcolor==3.1.0
    # via
    #   fire
    #   llama-stack
@ -240,8 +235,9 @@ tqdm==4.67.1
    #   huggingface-hub
    #   llama-stack-client
    #   openai
-typing-extensions==4.12.2
+typing-extensions==4.14.1
    # via
+    #   aiosignal
    #   aiosqlite
    #   anyio
    #   fastapi
@ -249,21 +245,26 @@ typing-extensions==4.12.2
    #   llama-api-client
    #   llama-stack-client
    #   openai
+    #   opentelemetry-api
+    #   opentelemetry-exporter-otlp-proto-http
    #   opentelemetry-sdk
+    #   opentelemetry-semantic-conventions
    #   pydantic
    #   pydantic-core
    #   referencing
-tzdata==2025.1
+    #   starlette
+    #   typing-inspection
+typing-inspection==0.4.1
+    # via pydantic
+tzdata==2025.2
    # via pandas
 urllib3==2.5.0
    # via requests
-uvicorn==0.34.0
+uvicorn==0.35.0
    # via llama-stack
 wcwidth==0.2.13
    # via prompt-toolkit
-wrapt==1.17.2
-    # via deprecated
-yarl==1.18.3
+yarl==1.20.1
    # via aiohttp
-zipp==3.21.0
+zipp==3.23.0
    # via importlib-metadata
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@ -5,8 +5,14 @@
 # the root directory of this source tree.


+import base64
+import os
+import tempfile
+
 import pytest
 from openai import OpenAI
+from reportlab.lib.pagesizes import letter
+from reportlab.pdfgen import canvas

 from llama_stack.distribution.library_client import LlamaStackAsLibraryClient

@ -82,6 +88,14 @@ def skip_if_provider_isnt_vllm(client_with_models, model_id):
        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support vllm extra_body parameters.")


+def skip_if_provider_isnt_openai(client_with_models, model_id):
+    provider = provider_from_model(client_with_models, model_id)
+    if provider.provider_type != "remote::openai":
+        pytest.skip(
+            f"Model {model_id} hosted by {provider.provider_type} doesn't support chat completion calls with base64 encoded files."
+        )
+
+
@pytest.fixture
 def openai_client(client_with_models):
    base_url = f"{client_with_models.base_url}/v1/openai/v1"
@ -418,3 +432,45 @@ def test_inference_store_tool_calls(compat_client, client_with_models, text_mode
        # failed tool call parses show up as a message with content, so ensure
        # that the retrieve response content matches the original request
        assert retrieved_response.choices[0].message.content == content
+
+
+def test_openai_chat_completion_non_streaming_with_file(openai_client, client_with_models, text_model_id):
+    skip_if_provider_isnt_openai(client_with_models, text_model_id)
+
+    # Generate temporary PDF with "Hello World" text
+    with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_pdf:
+        c = canvas.Canvas(temp_pdf.name, pagesize=letter)
+        c.drawString(100, 750, "Hello World")
+        c.save()
+
+        # Read the PDF and sencode to base64
+        with open(temp_pdf.name, "rb") as pdf_file:
+            pdf_base64 = base64.b64encode(pdf_file.read()).decode("utf-8")
+
+        # Clean up temporary file
+        os.unlink(temp_pdf.name)
+
+    response = openai_client.chat.completions.create(
+        model=text_model_id,
+        messages=[
+            {
+                "role": "user",
+                "content": "Describe what you see in this PDF file.",
+            },
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "file",
+                        "file": {
+                            "filename": "my-temp-hello-world-pdf",
+                            "file_data": f"data:application/pdf;base64,{pdf_base64}",
+                        },
+                    }
+                ],
+            },
+        ],
+        stream=False,
+    )
+    message_content = response.choices[0].message.content.lower().strip()
+    assert "hello world" in message_content
--- a/uv.lock
+++ b/uv.lock