From 8ede67b8090f4c98a09eaa16c374c8518d4bebf3 Mon Sep 17 00:00:00 2001
From: Ben Browning <bbrownin@redhat.com>
Date: Wed, 11 Jun 2025 09:28:33 -0400
Subject: [PATCH] More work on file_search verification test

This gets the file_search verification test working against ollama,
fireworks, and api.openai.com. We don't have the entirety of the
vector store API implemented in Llama Stack yet, so this still has a
bit of a hack to swap between using only OpenAI-compatible APIs versus
using the LlamaStackClient to insert content into our vector stores.

Outside of actually inserting file contents, the rest of the test
works the same and uses only the OpenAI client for all of these providers.

How to run the tests:

Ollama (sometimes flakes with small model):

```
ollama run llama3.2:3b

INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct" \
llama stack run ./llama_stack/templates/ollama/run.yaml \
  --image-type venv \
  --env OLLAMA_URL="http://0.0.0.0:11434"

pytest -sv \
  'tests/verifications/openai_api/test_responses.py::test_response_non_streaming_file_search' \
  --base-url=http://localhost:8321/v1/openai/v1 \
  --model meta-llama/Llama-3.2-3B-Instruct
```

Fireworks via Llama Stack:

```
llama stack run llama_stack/templates/fireworks/run.yaml

pytest -sv \
  'tests/verifications/openai_api/test_responses.py::test_response_non_streaming_file_search' \
  --base-url=http://localhost:8321/v1/openai/v1 \
  --model meta-llama/Llama-3.3-70B-Instruct
```

OpenAI directly:

```
pytest -sv \
  'tests/verifications/openai_api/test_responses.py::test_response_non_streaming_file_search' \
  --base-url=https://api.openai.com/v1 \
  --model gpt-4o
```

Signed-off-by: Ben Browning <bbrownin@redhat.com>
---
 .../self_hosted_distro/ollama.md              |  1 +
 llama_stack/templates/ollama/build.yaml       |  2 +
 llama_stack/templates/ollama/ollama.py        |  9 +++
 .../templates/ollama/run-with-safety.yaml     |  9 +++
 llama_stack/templates/ollama/run.yaml         |  9 +++
 .../fixtures/test_cases/responses.yaml        |  3 +-
 .../openai_api/test_responses.py              | 62 ++++++++++++++-----
 7 files changed, 79 insertions(+), 16 deletions(-)

diff --git a/docs/source/distributions/self_hosted_distro/ollama.md b/docs/source/distributions/self_hosted_distro/ollama.md
index 4d148feda..e09c79359 100644
--- a/docs/source/distributions/self_hosted_distro/ollama.md
+++ b/docs/source/distributions/self_hosted_distro/ollama.md
@@ -18,6 +18,7 @@ The `llamastack/distribution-ollama` distribution consists of the following prov
 | agents | `inline::meta-reference` |
 | datasetio | `remote::huggingface`, `inline::localfs` |
 | eval | `inline::meta-reference` |
+| files | `inline::localfs` |
 | inference | `remote::ollama` |
 | post_training | `inline::huggingface` |
 | safety | `inline::llama-guard` |
diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml
index 36a120897..ebe0849f3 100644
--- a/llama_stack/templates/ollama/build.yaml
+++ b/llama_stack/templates/ollama/build.yaml
@@ -23,6 +23,8 @@ distribution_spec:
     - inline::basic
     - inline::llm-as-judge
     - inline::braintrust
+    files:
+    - inline::localfs
     post_training:
     - inline::huggingface
     tool_runtime:
diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py
index 0b4f05128..46c4852a4 100644
--- a/llama_stack/templates/ollama/ollama.py
+++ b/llama_stack/templates/ollama/ollama.py
@@ -13,6 +13,7 @@ from llama_stack.distribution.datatypes import (
     ShieldInput,
     ToolGroupInput,
 )
+from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
 from llama_stack.providers.inline.post_training.huggingface import HuggingFacePostTrainingConfig
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
@@ -29,6 +30,7 @@ def get_distribution_template() -> DistributionTemplate:
         "eval": ["inline::meta-reference"],
         "datasetio": ["remote::huggingface", "inline::localfs"],
         "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
+        "files": ["inline::localfs"],
         "post_training": ["inline::huggingface"],
         "tool_runtime": [
             "remote::brave-search",
@@ -49,6 +51,11 @@ def get_distribution_template() -> DistributionTemplate:
         provider_type="inline::faiss",
         config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
     )
+    files_provider = Provider(
+        provider_id="meta-reference-files",
+        provider_type="inline::localfs",
+        config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+    )
     posttraining_provider = Provider(
         provider_id="huggingface",
         provider_type="inline::huggingface",
@@ -98,6 +105,7 @@ def get_distribution_template() -> DistributionTemplate:
                 provider_overrides={
                     "inference": [inference_provider],
                     "vector_io": [vector_io_provider_faiss],
+                    "files": [files_provider],
                     "post_training": [posttraining_provider],
                 },
                 default_models=[inference_model, embedding_model],
@@ -107,6 +115,7 @@ def get_distribution_template() -> DistributionTemplate:
                 provider_overrides={
                     "inference": [inference_provider],
                     "vector_io": [vector_io_provider_faiss],
+                    "files": [files_provider],
                     "post_training": [posttraining_provider],
                     "safety": [
                         Provider(
diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml
index 7bf9fc3bd..85d5c813b 100644
--- a/llama_stack/templates/ollama/run-with-safety.yaml
+++ b/llama_stack/templates/ollama/run-with-safety.yaml
@@ -4,6 +4,7 @@ apis:
 - agents
 - datasetio
 - eval
+- files
 - inference
 - post_training
 - safety
@@ -84,6 +85,14 @@ providers:
     provider_type: inline::braintrust
     config:
       openai_api_key: ${env.OPENAI_API_KEY:}
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/ollama/files}
+      metadata_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/files_metadata.db
   post_training:
   - provider_id: huggingface
     provider_type: inline::huggingface
diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml
index 0030bcd60..2d10a99a4 100644
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@@ -4,6 +4,7 @@ apis:
 - agents
 - datasetio
 - eval
+- files
 - inference
 - post_training
 - safety
@@ -82,6 +83,14 @@ providers:
     provider_type: inline::braintrust
     config:
       openai_api_key: ${env.OPENAI_API_KEY:}
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/ollama/files}
+      metadata_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/files_metadata.db
   post_training:
   - provider_id: huggingface
     provider_type: inline::huggingface
diff --git a/tests/verifications/openai_api/fixtures/test_cases/responses.yaml b/tests/verifications/openai_api/fixtures/test_cases/responses.yaml
index 7115e4b50..1ce25181e 100644
--- a/tests/verifications/openai_api/fixtures/test_cases/responses.yaml
+++ b/tests/verifications/openai_api/fixtures/test_cases/responses.yaml
@@ -39,8 +39,7 @@ test_response_file_search:
       input: "How many experts does the Llama 4 Maverick model have?"
       tools:
       - type: file_search
-        vector_store_ids:
-        - test_vector_store
+        # vector_store_ids gets added by the test runner
       output: "128"
 
 test_response_mcp_tool:
diff --git a/tests/verifications/openai_api/test_responses.py b/tests/verifications/openai_api/test_responses.py
index 86b267fac..5a04b0449 100644
--- a/tests/verifications/openai_api/test_responses.py
+++ b/tests/verifications/openai_api/test_responses.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 import json
+import time
 
 import httpx
 import openai
@@ -265,25 +266,27 @@ def test_response_non_streaming_web_search(request, openai_client, model, provid
     ids=case_id_generator,
 )
 def test_response_non_streaming_file_search(
-    base_url, request, openai_client, model, provider, verification_config, case
+    base_url, request, openai_client, model, provider, verification_config, tmp_path, case
 ):
+    if isinstance(openai_client, LlamaStackAsLibraryClient):
+        pytest.skip("Responses API file search is not yet supported in library client.")
+
     test_name_base = get_base_test_name(request)
     if should_skip_test(verification_config, provider, model, test_name_base):
         pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
 
-    lls_client = LlamaStackClient(base_url=base_url.replace("/v1/openai/v1", ""))
-    vector_db_id = "test_vector_store"
-
-    # Ensure the test starts from a clean vector store
-    try:
-        lls_client.vector_dbs.unregister(vector_db_id=vector_db_id)
-    except Exception:
-        pass
-
-    lls_client.vector_dbs.register(
-        vector_db_id=vector_db_id,
-        embedding_model="all-MiniLM-L6-v2",
+    vector_stores = openai_client.vector_stores.list()
+    for vector_store in vector_stores:
+        if vector_store.name == "test_vector_store":
+            openai_client.vector_stores.delete(vector_store_id=vector_store.id)
+    vector_store = openai_client.vector_stores.create(
+        name="test_vector_store",
+        # extra_body={
+        #     "embedding_model": "all-MiniLM-L6-v2",
+        #     "embedding_dimension": 384,
+        # },
     )
+
     doc_content = "Llama 4 Maverick has 128 experts"
     chunks = [
         {
@@ -294,18 +297,49 @@ def test_response_non_streaming_file_search(
             },
         },
     ]
-    lls_client.vector_io.insert(vector_db_id=vector_db_id, chunks=chunks)
+
+    file_name = "test_response_non_streaming_file_search.txt"
+    files = openai_client.files.list()
+    for file in files:
+        if file.filename == file_name:
+            openai_client.files.delete(file_id=file.id)
+    file_path = tmp_path / file_name
+    file_path.write_text(doc_content)
+    file_response = openai_client.files.create(file=open(file_path, "rb"), purpose="assistants")
+
+    if "api.openai.com" in base_url:
+        file_attach_response = openai_client.vector_stores.files.create(
+            vector_store_id=vector_store.id,
+            file_id=file_response.id,
+        )
+        while file_attach_response.status == "in_progress":
+            time.sleep(0.1)
+            file_attach_response = openai_client.vector_stores.files.retrieve(
+                vector_store_id=vector_store.id,
+                file_id=file_response.id,
+            )
+    else:
+        # TODO: only until we have a way to insert content into OpenAI vector stores
+        lls_client = LlamaStackClient(base_url=base_url.replace("/v1/openai/v1", ""))
+        lls_client.vector_io.insert(vector_db_id=vector_store.id, chunks=chunks)
+
+    tools = case["tools"]
+    for tool in tools:
+        if tool["type"] == "file_search":
+            tool["vector_store_ids"] = [vector_store.id]
 
     response = openai_client.responses.create(
         model=model,
         input=case["input"],
         tools=case["tools"],
         stream=False,
+        include=["file_search_call.results"],
     )
     assert len(response.output) > 1
     assert response.output[0].type == "file_search_call"
     assert response.output[0].status == "completed"
     assert response.output[0].queries  # ensure it's some non-empty list
+    assert response.output[0].results
     assert response.output[0].results[0].text == doc_content
     assert response.output[0].results[0].score > 0
     assert response.output[1].type == "message"