diff --git a/docs/source/distributions/self_hosted_distro/ollama.md b/docs/source/distributions/self_hosted_distro/ollama.md index 4d148feda..e09c79359 100644 --- a/docs/source/distributions/self_hosted_distro/ollama.md +++ b/docs/source/distributions/self_hosted_distro/ollama.md @@ -18,6 +18,7 @@ The `llamastack/distribution-ollama` distribution consists of the following prov | agents | `inline::meta-reference` | | datasetio | `remote::huggingface`, `inline::localfs` | | eval | `inline::meta-reference` | +| files | `inline::localfs` | | inference | `remote::ollama` | | post_training | `inline::huggingface` | | safety | `inline::llama-guard` | diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml index 36a120897..ebe0849f3 100644 --- a/llama_stack/templates/ollama/build.yaml +++ b/llama_stack/templates/ollama/build.yaml @@ -23,6 +23,8 @@ distribution_spec: - inline::basic - inline::llm-as-judge - inline::braintrust + files: + - inline::localfs post_training: - inline::huggingface tool_runtime: diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py index 0b4f05128..46c4852a4 100644 --- a/llama_stack/templates/ollama/ollama.py +++ b/llama_stack/templates/ollama/ollama.py @@ -13,6 +13,7 @@ from llama_stack.distribution.datatypes import ( ShieldInput, ToolGroupInput, ) +from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig from llama_stack.providers.inline.post_training.huggingface import HuggingFacePostTrainingConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.remote.inference.ollama import OllamaImplConfig @@ -29,6 +30,7 @@ def get_distribution_template() -> DistributionTemplate: "eval": ["inline::meta-reference"], "datasetio": ["remote::huggingface", "inline::localfs"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], + "files": ["inline::localfs"], "post_training": ["inline::huggingface"], "tool_runtime": [ "remote::brave-search", @@ -49,6 +51,11 @@ def get_distribution_template() -> DistributionTemplate: provider_type="inline::faiss", config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), ) + files_provider = Provider( + provider_id="meta-reference-files", + provider_type="inline::localfs", + config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"), + ) posttraining_provider = Provider( provider_id="huggingface", provider_type="inline::huggingface", @@ -98,6 +105,7 @@ def get_distribution_template() -> DistributionTemplate: provider_overrides={ "inference": [inference_provider], "vector_io": [vector_io_provider_faiss], + "files": [files_provider], "post_training": [posttraining_provider], }, default_models=[inference_model, embedding_model], @@ -107,6 +115,7 @@ def get_distribution_template() -> DistributionTemplate: provider_overrides={ "inference": [inference_provider], "vector_io": [vector_io_provider_faiss], + "files": [files_provider], "post_training": [posttraining_provider], "safety": [ Provider( diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml index 7bf9fc3bd..85d5c813b 100644 --- a/llama_stack/templates/ollama/run-with-safety.yaml +++ b/llama_stack/templates/ollama/run-with-safety.yaml @@ -4,6 +4,7 @@ apis: - agents - datasetio - eval +- files - inference - post_training - safety @@ -84,6 +85,14 @@ providers: provider_type: inline::braintrust config: openai_api_key: ${env.OPENAI_API_KEY:} + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/ollama/files} + metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/files_metadata.db post_training: - provider_id: huggingface provider_type: inline::huggingface diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml index 0030bcd60..2d10a99a4 100644 --- a/llama_stack/templates/ollama/run.yaml +++ b/llama_stack/templates/ollama/run.yaml @@ -4,6 +4,7 @@ apis: - agents - datasetio - eval +- files - inference - post_training - safety @@ -82,6 +83,14 @@ providers: provider_type: inline::braintrust config: openai_api_key: ${env.OPENAI_API_KEY:} + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/ollama/files} + metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/files_metadata.db post_training: - provider_id: huggingface provider_type: inline::huggingface diff --git a/tests/verifications/openai_api/fixtures/test_cases/responses.yaml b/tests/verifications/openai_api/fixtures/test_cases/responses.yaml index 7115e4b50..1ce25181e 100644 --- a/tests/verifications/openai_api/fixtures/test_cases/responses.yaml +++ b/tests/verifications/openai_api/fixtures/test_cases/responses.yaml @@ -39,8 +39,7 @@ test_response_file_search: input: "How many experts does the Llama 4 Maverick model have?" tools: - type: file_search - vector_store_ids: - - test_vector_store + # vector_store_ids gets added by the test runner output: "128" test_response_mcp_tool: diff --git a/tests/verifications/openai_api/test_responses.py b/tests/verifications/openai_api/test_responses.py index 86b267fac..5a04b0449 100644 --- a/tests/verifications/openai_api/test_responses.py +++ b/tests/verifications/openai_api/test_responses.py @@ -5,6 +5,7 @@ # the root directory of this source tree. import json +import time import httpx import openai @@ -265,25 +266,27 @@ def test_response_non_streaming_web_search(request, openai_client, model, provid ids=case_id_generator, ) def test_response_non_streaming_file_search( - base_url, request, openai_client, model, provider, verification_config, case + base_url, request, openai_client, model, provider, verification_config, tmp_path, case ): + if isinstance(openai_client, LlamaStackAsLibraryClient): + pytest.skip("Responses API file search is not yet supported in library client.") + test_name_base = get_base_test_name(request) if should_skip_test(verification_config, provider, model, test_name_base): pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - lls_client = LlamaStackClient(base_url=base_url.replace("/v1/openai/v1", "")) - vector_db_id = "test_vector_store" - - # Ensure the test starts from a clean vector store - try: - lls_client.vector_dbs.unregister(vector_db_id=vector_db_id) - except Exception: - pass - - lls_client.vector_dbs.register( - vector_db_id=vector_db_id, - embedding_model="all-MiniLM-L6-v2", + vector_stores = openai_client.vector_stores.list() + for vector_store in vector_stores: + if vector_store.name == "test_vector_store": + openai_client.vector_stores.delete(vector_store_id=vector_store.id) + vector_store = openai_client.vector_stores.create( + name="test_vector_store", + # extra_body={ + # "embedding_model": "all-MiniLM-L6-v2", + # "embedding_dimension": 384, + # }, ) + doc_content = "Llama 4 Maverick has 128 experts" chunks = [ { @@ -294,18 +297,49 @@ def test_response_non_streaming_file_search( }, }, ] - lls_client.vector_io.insert(vector_db_id=vector_db_id, chunks=chunks) + + file_name = "test_response_non_streaming_file_search.txt" + files = openai_client.files.list() + for file in files: + if file.filename == file_name: + openai_client.files.delete(file_id=file.id) + file_path = tmp_path / file_name + file_path.write_text(doc_content) + file_response = openai_client.files.create(file=open(file_path, "rb"), purpose="assistants") + + if "api.openai.com" in base_url: + file_attach_response = openai_client.vector_stores.files.create( + vector_store_id=vector_store.id, + file_id=file_response.id, + ) + while file_attach_response.status == "in_progress": + time.sleep(0.1) + file_attach_response = openai_client.vector_stores.files.retrieve( + vector_store_id=vector_store.id, + file_id=file_response.id, + ) + else: + # TODO: only until we have a way to insert content into OpenAI vector stores + lls_client = LlamaStackClient(base_url=base_url.replace("/v1/openai/v1", "")) + lls_client.vector_io.insert(vector_db_id=vector_store.id, chunks=chunks) + + tools = case["tools"] + for tool in tools: + if tool["type"] == "file_search": + tool["vector_store_ids"] = [vector_store.id] response = openai_client.responses.create( model=model, input=case["input"], tools=case["tools"], stream=False, + include=["file_search_call.results"], ) assert len(response.output) > 1 assert response.output[0].type == "file_search_call" assert response.output[0].status == "completed" assert response.output[0].queries # ensure it's some non-empty list + assert response.output[0].results assert response.output[0].results[0].text == doc_content assert response.output[0].results[0].score > 0 assert response.output[1].type == "message"