Expand file types tested with file_search

This expands the file types tested with file_search to include Word documents (.docx), Markdown (.md), text (.txt), PDF (.pdf), and PowerPoint (.pptx) files. Python's mimetypes library doesn't actually recognize markdown docs as text, so we have to handle that case specifically instead of relying on mimetypes to get it right. Signed-off-by: Ben Browning <bbrownin@redhat.com>
2025-06-27 18:50:41 +00:00 · 2025-06-21 09:31:38 -04:00 · 2025-06-21 09:31:38 -04:00 · fb6763eef5
commit fb6763eef5
parent dae7953de4
7 changed files with 89 additions and 4 deletions
--- a/llama_stack/providers/inline/tool_runtime/synthetic-data-kit/synthetic_data_kit.py
+++ b/llama_stack/providers/inline/tool_runtime/synthetic-data-kit/synthetic_data_kit.py
@ -76,7 +76,7 @@ class SyntheticDataKitToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime):

        file_id = kwargs["file_id"]
        file_response = await self.files_api.openai_retrieve_file(file_id)
-        mime_type, _ = mimetypes.guess_type(file_response.filename)
+        mime_type = self._guess_mime_type(file_response.filename)
        content_response = await self.files_api.openai_retrieve_file_content(file_id)

        mime_category = mime_type.split("/")[0] if mime_type else None
@ -89,10 +89,16 @@ class SyntheticDataKitToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime):
            )
        else:
            return await asyncio.to_thread(
-                self.synthetic_data_kit_convert, content_response.body, file_response.filename
+                self._synthetic_data_kit_convert, content_response.body, file_response.filename
            )

-    def synthetic_data_kit_convert(self, content_body: bytes, filename: str) -> ToolInvocationResult:
+    def _guess_mime_type(self, filename: str) -> str | None:
+        mime_type, _ = mimetypes.guess_type(filename)
+        if mime_type is None and filename.endswith(".md"):
+            mime_type = "text/markdown"
+        return mime_type
+
+    def _synthetic_data_kit_convert(self, content_body: bytes, filename: str) -> ToolInvocationResult:
        from synthetic_data_kit.core.ingest import process_file

        try:
--- a/tests/verifications/openai_api/fixtures/docs/llama_stack_and_models.docx
+++ b/tests/verifications/openai_api/fixtures/docs/llama_stack_and_models.docx
--- a/tests/verifications/openai_api/fixtures/docs/llama_stack_and_models.md
+++ b/tests/verifications/openai_api/fixtures/docs/llama_stack_and_models.md
@ -0,0 +1,27 @@
+# Llama Stack
+
+## Llama Stack Overview
+
+Llama Stack standardizes the core building blocks that simplify AI application development. It codifies best practices across the Llama ecosystem. More specifically, it provides
+
+* Unified API layer for Inference, RAG, Agents, Tools, Safety, Evals, and Telemetry.
+
+* Plugin architecture to support the rich ecosystem of different API implementations in various environments, including local development, on-premises, cloud, and mobile.
+
+* Prepackaged verified distributions which offer a one-stop solution for developers to get started quickly and reliably in any environment.
+
+* Multiple developer interfaces like CLI and SDKs for Python, Typescript, iOS, and Android.
+
+* Standalone applications as examples for how to build production-grade AI applications with Llama Stack.
+
+## Llama Stack Benefits
+
+* Flexible Options: Developers can choose their preferred infrastructure without changing APIs and enjoy flexible deployment choices.
+
+* Consistent Experience: With its unified APIs, Llama Stack makes it easier to build, test, and deploy AI applications with consistent application behavior.
+
+* Robust Ecosystem: Llama Stack is already integrated with distribution partners (cloud providers, hardware vendors, and AI-focused companies) that offer tailored infrastructure, software, and services for deploying Llama models.
+
+# Llama 4 Maverick
+
+Llama 4 Maverick is a Mixture-of-Experts (MoE) model with 17 billion active parameters and 128 experts.
--- a/tests/verifications/openai_api/fixtures/docs/llama_stack_and_models.pdf
+++ b/tests/verifications/openai_api/fixtures/docs/llama_stack_and_models.pdf
--- a/tests/verifications/openai_api/fixtures/docs/llama_stack_and_models.pptx
+++ b/tests/verifications/openai_api/fixtures/docs/llama_stack_and_models.pptx
--- a/tests/verifications/openai_api/fixtures/docs/llama_stack_and_models.txt
+++ b/tests/verifications/openai_api/fixtures/docs/llama_stack_and_models.txt
@ -0,0 +1,24 @@
+Llama Stack
+
+
+Llama Stack Overview
+
+Llama Stack standardizes the core building blocks that simplify AI application development. It codifies best practices across the Llama ecosystem. More specifically, it provides
+
+* Unified API layer for Inference, RAG, Agents, Tools, Safety, Evals, and Telemetry.
+* Plugin architecture to support the rich ecosystem of different API implementations in various environments, including local development, on-premises, cloud, and mobile.
+* Prepackaged verified distributions which offer a one-stop solution for developers to get started quickly and reliably in any environment.
+* Multiple developer interfaces like CLI and SDKs for Python, Typescript, iOS, and Android.
+* Standalone applications as examples for how to build production-grade AI applications with Llama Stack.
+
+
+Llama Stack Benefits
+
+* Flexible Options: Developers can choose their preferred infrastructure without changing APIs and enjoy flexible deployment choices.
+* Consistent Experience: With its unified APIs, Llama Stack makes it easier to build, test, and deploy AI applications with consistent application behavior.
+* Robust Ecosystem: Llama Stack is already integrated with distribution partners (cloud providers, hardware vendors, and AI-focused companies) that offer tailored infrastructure, software, and services for deploying Llama models.
+
+
+Llama 4 Maverick
+
+Llama 4 Maverick is a Mixture-of-Experts (MoE) model with 17 billion active parameters and 128 experts.
--- a/tests/verifications/openai_api/fixtures/test_cases/responses.yaml
+++ b/tests/verifications/openai_api/fixtures/test_cases/responses.yaml
@ -42,12 +42,40 @@ test_response_file_search:
        # vector_store_ids param for file_search tool gets added by the test runner
      file_content: "Llama 4 Maverick has 128 experts"
      output: "128"
+    - case_id: "llama_experts_docx"
+      input: "How many experts does the Llama 4 Maverick model have?"
+      tools:
+      - type: file_search
+        # vector_store_ids param for file_search toolgets added by the test runner
+      file_path: "docs/llama_stack_and_models.docx"
+      output: "128"
+    - case_id: "llama_experts_md"
+      input: "How many experts does the Llama 4 Maverick model have?"
+      tools:
+      - type: file_search
+        # vector_store_ids param for file_search toolgets added by the test runner
+      file_path: "docs/llama_stack_and_models.md"
+      output: "128"
    - case_id: "llama_experts_pdf"
      input: "How many experts does the Llama 4 Maverick model have?"
      tools:
      - type: file_search
        # vector_store_ids param for file_search toolgets added by the test runner
-      file_path: "pdfs/llama_stack_and_models.pdf"
+      file_path: "docs/llama_stack_and_models.pdf"
+      output: "128"
+    - case_id: "llama_experts_pptx"
+      input: "How many experts does the Llama 4 Maverick model have?"
+      tools:
+      - type: file_search
+        # vector_store_ids param for file_search toolgets added by the test runner
+      file_path: "docs/llama_stack_and_models.pptx"
+      output: "128"
+    - case_id: "llama_experts_txt"
+      input: "How many experts does the Llama 4 Maverick model have?"
+      tools:
+      - type: file_search
+        # vector_store_ids param for file_search toolgets added by the test runner
+      file_path: "docs/llama_stack_and_models.txt"
      output: "128"

 test_response_mcp_tool: