feat: Allow application/yaml as mime_type (#2575)

# What does this PR do?
Allow application/yaml as mime_type for documents.

## Test Plan
Added unit tests.
This commit is contained in:
Ondrej Metelka 2025-07-21 15:43:32 +02:00 committed by GitHub
parent b2c7543af7
commit 89c49eb003
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 197 additions and 1 deletions

View file

@ -10,6 +10,7 @@ import re
import secrets
import string
import uuid
import warnings
from collections.abc import AsyncGenerator
from datetime import UTC, datetime
@ -911,8 +912,16 @@ async def load_data_from_url(url: str) -> str:
async def get_raw_document_text(document: Document) -> str:
if not document.mime_type.startswith("text/"):
# Handle deprecated text/yaml mime type with warning
if document.mime_type == "text/yaml":
warnings.warn(
"The 'text/yaml' MIME type is deprecated. Please use 'application/yaml' instead.",
DeprecationWarning,
stacklevel=2,
)
elif not (document.mime_type.startswith("text/") or document.mime_type == "application/yaml"):
raise ValueError(f"Unexpected document mime type: {document.mime_type}")
if isinstance(document.content, URL):
return await load_data_from_url(document.content.uri)
elif isinstance(document.content, str):