mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-28 02:53:30 +00:00
More work on file_search verification test
This gets the file_search verification test working against ollama, fireworks, and api.openai.com. We don't have the entirety of the vector store API implemented in Llama Stack yet, so this still has a bit of a hack to swap between using only OpenAI-compatible APIs versus using the LlamaStackClient to insert content into our vector stores. Outside of actually inserting file contents, the rest of the test works the same and uses only the OpenAI client for all of these providers. How to run the tests: Ollama (sometimes flakes with small model): ``` ollama run llama3.2:3b INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct" \ llama stack run ./llama_stack/templates/ollama/run.yaml \ --image-type venv \ --env OLLAMA_URL="http://0.0.0.0:11434" pytest -sv \ 'tests/verifications/openai_api/test_responses.py::test_response_non_streaming_file_search' \ --base-url=http://localhost:8321/v1/openai/v1 \ --model meta-llama/Llama-3.2-3B-Instruct ``` Fireworks via Llama Stack: ``` llama stack run llama_stack/templates/fireworks/run.yaml pytest -sv \ 'tests/verifications/openai_api/test_responses.py::test_response_non_streaming_file_search' \ --base-url=http://localhost:8321/v1/openai/v1 \ --model meta-llama/Llama-3.3-70B-Instruct ``` OpenAI directly: ``` pytest -sv \ 'tests/verifications/openai_api/test_responses.py::test_response_non_streaming_file_search' \ --base-url=https://api.openai.com/v1 \ --model gpt-4o ``` Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
parent
fa34468308
commit
8ede67b809
7 changed files with 79 additions and 16 deletions
|
@ -18,6 +18,7 @@ The `llamastack/distribution-ollama` distribution consists of the following prov
|
||||||
| agents | `inline::meta-reference` |
|
| agents | `inline::meta-reference` |
|
||||||
| datasetio | `remote::huggingface`, `inline::localfs` |
|
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||||
| eval | `inline::meta-reference` |
|
| eval | `inline::meta-reference` |
|
||||||
|
| files | `inline::localfs` |
|
||||||
| inference | `remote::ollama` |
|
| inference | `remote::ollama` |
|
||||||
| post_training | `inline::huggingface` |
|
| post_training | `inline::huggingface` |
|
||||||
| safety | `inline::llama-guard` |
|
| safety | `inline::llama-guard` |
|
||||||
|
|
|
@ -23,6 +23,8 @@ distribution_spec:
|
||||||
- inline::basic
|
- inline::basic
|
||||||
- inline::llm-as-judge
|
- inline::llm-as-judge
|
||||||
- inline::braintrust
|
- inline::braintrust
|
||||||
|
files:
|
||||||
|
- inline::localfs
|
||||||
post_training:
|
post_training:
|
||||||
- inline::huggingface
|
- inline::huggingface
|
||||||
tool_runtime:
|
tool_runtime:
|
||||||
|
|
|
@ -13,6 +13,7 @@ from llama_stack.distribution.datatypes import (
|
||||||
ShieldInput,
|
ShieldInput,
|
||||||
ToolGroupInput,
|
ToolGroupInput,
|
||||||
)
|
)
|
||||||
|
from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
|
||||||
from llama_stack.providers.inline.post_training.huggingface import HuggingFacePostTrainingConfig
|
from llama_stack.providers.inline.post_training.huggingface import HuggingFacePostTrainingConfig
|
||||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
||||||
from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
|
from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
|
||||||
|
@ -29,6 +30,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"eval": ["inline::meta-reference"],
|
"eval": ["inline::meta-reference"],
|
||||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||||
|
"files": ["inline::localfs"],
|
||||||
"post_training": ["inline::huggingface"],
|
"post_training": ["inline::huggingface"],
|
||||||
"tool_runtime": [
|
"tool_runtime": [
|
||||||
"remote::brave-search",
|
"remote::brave-search",
|
||||||
|
@ -49,6 +51,11 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
provider_type="inline::faiss",
|
provider_type="inline::faiss",
|
||||||
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||||
)
|
)
|
||||||
|
files_provider = Provider(
|
||||||
|
provider_id="meta-reference-files",
|
||||||
|
provider_type="inline::localfs",
|
||||||
|
config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||||
|
)
|
||||||
posttraining_provider = Provider(
|
posttraining_provider = Provider(
|
||||||
provider_id="huggingface",
|
provider_id="huggingface",
|
||||||
provider_type="inline::huggingface",
|
provider_type="inline::huggingface",
|
||||||
|
@ -98,6 +105,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider],
|
"inference": [inference_provider],
|
||||||
"vector_io": [vector_io_provider_faiss],
|
"vector_io": [vector_io_provider_faiss],
|
||||||
|
"files": [files_provider],
|
||||||
"post_training": [posttraining_provider],
|
"post_training": [posttraining_provider],
|
||||||
},
|
},
|
||||||
default_models=[inference_model, embedding_model],
|
default_models=[inference_model, embedding_model],
|
||||||
|
@ -107,6 +115,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider],
|
"inference": [inference_provider],
|
||||||
"vector_io": [vector_io_provider_faiss],
|
"vector_io": [vector_io_provider_faiss],
|
||||||
|
"files": [files_provider],
|
||||||
"post_training": [posttraining_provider],
|
"post_training": [posttraining_provider],
|
||||||
"safety": [
|
"safety": [
|
||||||
Provider(
|
Provider(
|
||||||
|
|
|
@ -4,6 +4,7 @@ apis:
|
||||||
- agents
|
- agents
|
||||||
- datasetio
|
- datasetio
|
||||||
- eval
|
- eval
|
||||||
|
- files
|
||||||
- inference
|
- inference
|
||||||
- post_training
|
- post_training
|
||||||
- safety
|
- safety
|
||||||
|
@ -84,6 +85,14 @@ providers:
|
||||||
provider_type: inline::braintrust
|
provider_type: inline::braintrust
|
||||||
config:
|
config:
|
||||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||||
|
files:
|
||||||
|
- provider_id: meta-reference-files
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config:
|
||||||
|
storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/ollama/files}
|
||||||
|
metadata_store:
|
||||||
|
type: sqlite
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/files_metadata.db
|
||||||
post_training:
|
post_training:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
provider_type: inline::huggingface
|
provider_type: inline::huggingface
|
||||||
|
|
|
@ -4,6 +4,7 @@ apis:
|
||||||
- agents
|
- agents
|
||||||
- datasetio
|
- datasetio
|
||||||
- eval
|
- eval
|
||||||
|
- files
|
||||||
- inference
|
- inference
|
||||||
- post_training
|
- post_training
|
||||||
- safety
|
- safety
|
||||||
|
@ -82,6 +83,14 @@ providers:
|
||||||
provider_type: inline::braintrust
|
provider_type: inline::braintrust
|
||||||
config:
|
config:
|
||||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||||
|
files:
|
||||||
|
- provider_id: meta-reference-files
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config:
|
||||||
|
storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/ollama/files}
|
||||||
|
metadata_store:
|
||||||
|
type: sqlite
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/files_metadata.db
|
||||||
post_training:
|
post_training:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
provider_type: inline::huggingface
|
provider_type: inline::huggingface
|
||||||
|
|
|
@ -39,8 +39,7 @@ test_response_file_search:
|
||||||
input: "How many experts does the Llama 4 Maverick model have?"
|
input: "How many experts does the Llama 4 Maverick model have?"
|
||||||
tools:
|
tools:
|
||||||
- type: file_search
|
- type: file_search
|
||||||
vector_store_ids:
|
# vector_store_ids gets added by the test runner
|
||||||
- test_vector_store
|
|
||||||
output: "128"
|
output: "128"
|
||||||
|
|
||||||
test_response_mcp_tool:
|
test_response_mcp_tool:
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import time
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
import openai
|
import openai
|
||||||
|
@ -265,25 +266,27 @@ def test_response_non_streaming_web_search(request, openai_client, model, provid
|
||||||
ids=case_id_generator,
|
ids=case_id_generator,
|
||||||
)
|
)
|
||||||
def test_response_non_streaming_file_search(
|
def test_response_non_streaming_file_search(
|
||||||
base_url, request, openai_client, model, provider, verification_config, case
|
base_url, request, openai_client, model, provider, verification_config, tmp_path, case
|
||||||
):
|
):
|
||||||
|
if isinstance(openai_client, LlamaStackAsLibraryClient):
|
||||||
|
pytest.skip("Responses API file search is not yet supported in library client.")
|
||||||
|
|
||||||
test_name_base = get_base_test_name(request)
|
test_name_base = get_base_test_name(request)
|
||||||
if should_skip_test(verification_config, provider, model, test_name_base):
|
if should_skip_test(verification_config, provider, model, test_name_base):
|
||||||
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
|
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
|
||||||
|
|
||||||
lls_client = LlamaStackClient(base_url=base_url.replace("/v1/openai/v1", ""))
|
vector_stores = openai_client.vector_stores.list()
|
||||||
vector_db_id = "test_vector_store"
|
for vector_store in vector_stores:
|
||||||
|
if vector_store.name == "test_vector_store":
|
||||||
# Ensure the test starts from a clean vector store
|
openai_client.vector_stores.delete(vector_store_id=vector_store.id)
|
||||||
try:
|
vector_store = openai_client.vector_stores.create(
|
||||||
lls_client.vector_dbs.unregister(vector_db_id=vector_db_id)
|
name="test_vector_store",
|
||||||
except Exception:
|
# extra_body={
|
||||||
pass
|
# "embedding_model": "all-MiniLM-L6-v2",
|
||||||
|
# "embedding_dimension": 384,
|
||||||
lls_client.vector_dbs.register(
|
# },
|
||||||
vector_db_id=vector_db_id,
|
|
||||||
embedding_model="all-MiniLM-L6-v2",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
doc_content = "Llama 4 Maverick has 128 experts"
|
doc_content = "Llama 4 Maverick has 128 experts"
|
||||||
chunks = [
|
chunks = [
|
||||||
{
|
{
|
||||||
|
@ -294,18 +297,49 @@ def test_response_non_streaming_file_search(
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
lls_client.vector_io.insert(vector_db_id=vector_db_id, chunks=chunks)
|
|
||||||
|
file_name = "test_response_non_streaming_file_search.txt"
|
||||||
|
files = openai_client.files.list()
|
||||||
|
for file in files:
|
||||||
|
if file.filename == file_name:
|
||||||
|
openai_client.files.delete(file_id=file.id)
|
||||||
|
file_path = tmp_path / file_name
|
||||||
|
file_path.write_text(doc_content)
|
||||||
|
file_response = openai_client.files.create(file=open(file_path, "rb"), purpose="assistants")
|
||||||
|
|
||||||
|
if "api.openai.com" in base_url:
|
||||||
|
file_attach_response = openai_client.vector_stores.files.create(
|
||||||
|
vector_store_id=vector_store.id,
|
||||||
|
file_id=file_response.id,
|
||||||
|
)
|
||||||
|
while file_attach_response.status == "in_progress":
|
||||||
|
time.sleep(0.1)
|
||||||
|
file_attach_response = openai_client.vector_stores.files.retrieve(
|
||||||
|
vector_store_id=vector_store.id,
|
||||||
|
file_id=file_response.id,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# TODO: only until we have a way to insert content into OpenAI vector stores
|
||||||
|
lls_client = LlamaStackClient(base_url=base_url.replace("/v1/openai/v1", ""))
|
||||||
|
lls_client.vector_io.insert(vector_db_id=vector_store.id, chunks=chunks)
|
||||||
|
|
||||||
|
tools = case["tools"]
|
||||||
|
for tool in tools:
|
||||||
|
if tool["type"] == "file_search":
|
||||||
|
tool["vector_store_ids"] = [vector_store.id]
|
||||||
|
|
||||||
response = openai_client.responses.create(
|
response = openai_client.responses.create(
|
||||||
model=model,
|
model=model,
|
||||||
input=case["input"],
|
input=case["input"],
|
||||||
tools=case["tools"],
|
tools=case["tools"],
|
||||||
stream=False,
|
stream=False,
|
||||||
|
include=["file_search_call.results"],
|
||||||
)
|
)
|
||||||
assert len(response.output) > 1
|
assert len(response.output) > 1
|
||||||
assert response.output[0].type == "file_search_call"
|
assert response.output[0].type == "file_search_call"
|
||||||
assert response.output[0].status == "completed"
|
assert response.output[0].status == "completed"
|
||||||
assert response.output[0].queries # ensure it's some non-empty list
|
assert response.output[0].queries # ensure it's some non-empty list
|
||||||
|
assert response.output[0].results
|
||||||
assert response.output[0].results[0].text == doc_content
|
assert response.output[0].results[0].text == doc_content
|
||||||
assert response.output[0].results[0].score > 0
|
assert response.output[0].results[0].score > 0
|
||||||
assert response.output[1].type == "message"
|
assert response.output[1].type == "message"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue