mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-14 17:16:09 +00:00
Merge branch 'main' into watsonx_hc
This commit is contained in:
commit
f5388e252d
48 changed files with 2179 additions and 66 deletions
|
@ -22,9 +22,6 @@ def provider_from_model(client_with_models, model_id):
|
|||
|
||||
|
||||
def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id):
|
||||
if isinstance(client_with_models, LlamaStackAsLibraryClient):
|
||||
pytest.skip("OpenAI completions are not supported when testing with library client yet.")
|
||||
|
||||
provider = provider_from_model(client_with_models, model_id)
|
||||
if provider.provider_type in (
|
||||
"inline::meta-reference",
|
||||
|
@ -44,6 +41,23 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
|
|||
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.")
|
||||
|
||||
|
||||
def skip_if_model_doesnt_support_suffix(client_with_models, model_id):
|
||||
# To test `fim` ( fill in the middle ) completion, we need to use a model that supports suffix.
|
||||
# Use this to specifically test this API functionality.
|
||||
|
||||
# pytest -sv --stack-config="inference=ollama" \
|
||||
# tests/integration/inference/test_openai_completion.py \
|
||||
# --text-model qwen2.5-coder:1.5b \
|
||||
# -k test_openai_completion_non_streaming_suffix
|
||||
|
||||
if model_id != "qwen2.5-coder:1.5b":
|
||||
pytest.skip(f"Suffix is not supported for the model: {model_id}.")
|
||||
|
||||
provider = provider_from_model(client_with_models, model_id)
|
||||
if provider.provider_type != "remote::ollama":
|
||||
pytest.skip(f"Provider {provider.provider_type} doesn't support suffix.")
|
||||
|
||||
|
||||
def skip_if_model_doesnt_support_openai_chat_completion(client_with_models, model_id):
|
||||
if isinstance(client_with_models, LlamaStackAsLibraryClient):
|
||||
pytest.skip("OpenAI chat completions are not supported when testing with library client yet.")
|
||||
|
@ -102,6 +116,32 @@ def test_openai_completion_non_streaming(llama_stack_client, client_with_models,
|
|||
assert len(choice.text) > 10
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
"inference:completion:suffix",
|
||||
],
|
||||
)
|
||||
def test_openai_completion_non_streaming_suffix(llama_stack_client, client_with_models, text_model_id, test_case):
|
||||
skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id)
|
||||
skip_if_model_doesnt_support_suffix(client_with_models, text_model_id)
|
||||
tc = TestCase(test_case)
|
||||
|
||||
# ollama needs more verbose prompting for some reason here...
|
||||
response = llama_stack_client.completions.create(
|
||||
model=text_model_id,
|
||||
prompt=tc["content"],
|
||||
stream=False,
|
||||
suffix=tc["suffix"],
|
||||
max_tokens=10,
|
||||
)
|
||||
|
||||
assert len(response.choices) > 0
|
||||
choice = response.choices[0]
|
||||
assert len(choice.text) > 5
|
||||
assert "france" in choice.text.lower()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
|
|
|
@ -51,7 +51,6 @@ def skip_if_model_doesnt_support_openai_embeddings(client, model_id):
|
|||
"remote::runpod",
|
||||
"remote::sambanova",
|
||||
"remote::tgi",
|
||||
"remote::ollama",
|
||||
):
|
||||
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI embeddings.")
|
||||
|
||||
|
|
|
@ -4,6 +4,12 @@
|
|||
"content": "Complete the sentence using one word: Roses are red, violets are "
|
||||
}
|
||||
},
|
||||
"suffix": {
|
||||
"data": {
|
||||
"content": "The capital of ",
|
||||
"suffix": "is Paris."
|
||||
}
|
||||
},
|
||||
"non_streaming": {
|
||||
"data": {
|
||||
"content": "Micheael Jordan is born in ",
|
||||
|
|
|
@ -84,6 +84,28 @@ async def test_query_chunks_full_text_search(sqlite_vec_index, sample_chunks, sa
|
|||
assert len(response_no_results.chunks) == 0, f"Expected 0 results, but got {len(response_no_results.chunks)}"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_query_chunks_hybrid(sqlite_vec_index, sample_chunks, sample_embeddings):
|
||||
await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
|
||||
|
||||
# Create a query embedding that's similar to the first chunk
|
||||
query_embedding = sample_embeddings[0]
|
||||
query_string = "Sentence 5"
|
||||
|
||||
response = await sqlite_vec_index.query_hybrid(
|
||||
embedding=query_embedding,
|
||||
query_string=query_string,
|
||||
k=3,
|
||||
score_threshold=0.0,
|
||||
reranker_type="rrf",
|
||||
reranker_params={"impact_factor": 60.0},
|
||||
)
|
||||
|
||||
assert len(response.chunks) == 3, f"Expected 3 results, got {len(response.chunks)}"
|
||||
# Verify scores are in descending order (higher is better)
|
||||
assert all(response.scores[i] >= response.scores[i + 1] for i in range(len(response.scores) - 1))
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_query_chunks_full_text_search_k_greater_than_results(sqlite_vec_index, sample_chunks, sample_embeddings):
|
||||
# Re-initialize with a clean index
|
||||
|
@ -141,3 +163,355 @@ def test_generate_chunk_id():
|
|||
"bc744db3-1b25-0a9c-cdff-b6ba3df73c36",
|
||||
"f68df25d-d9aa-ab4d-5684-64a233add20d",
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_query_chunks_hybrid_no_keyword_matches(sqlite_vec_index, sample_chunks, sample_embeddings):
|
||||
"""Test hybrid search when keyword search returns no matches - should still return vector results."""
|
||||
await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
|
||||
|
||||
# Use a non-existent keyword but a valid vector query
|
||||
query_embedding = sample_embeddings[0]
|
||||
query_string = "Sentence 499"
|
||||
|
||||
# First verify keyword search returns no results
|
||||
keyword_response = await sqlite_vec_index.query_keyword(query_string, k=5, score_threshold=0.0)
|
||||
assert len(keyword_response.chunks) == 0, "Keyword search should return no results"
|
||||
|
||||
# Get hybrid results
|
||||
response = await sqlite_vec_index.query_hybrid(
|
||||
embedding=query_embedding,
|
||||
query_string=query_string,
|
||||
k=3,
|
||||
score_threshold=0.0,
|
||||
reranker_type="rrf",
|
||||
reranker_params={"impact_factor": 60.0},
|
||||
)
|
||||
|
||||
# Should still get results from vector search
|
||||
assert len(response.chunks) > 0, "Should get results from vector search even with no keyword matches"
|
||||
# Verify scores are in descending order
|
||||
assert all(response.scores[i] >= response.scores[i + 1] for i in range(len(response.scores) - 1))
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_query_chunks_hybrid_score_threshold(sqlite_vec_index, sample_chunks, sample_embeddings):
|
||||
"""Test hybrid search with a high score threshold."""
|
||||
await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
|
||||
|
||||
# Use a very high score threshold that no results will meet
|
||||
query_embedding = sample_embeddings[0]
|
||||
query_string = "Sentence 5"
|
||||
|
||||
response = await sqlite_vec_index.query_hybrid(
|
||||
embedding=query_embedding,
|
||||
query_string=query_string,
|
||||
k=3,
|
||||
score_threshold=1000.0, # Very high threshold
|
||||
reranker_type="rrf",
|
||||
reranker_params={"impact_factor": 60.0},
|
||||
)
|
||||
|
||||
# Should return no results due to high threshold
|
||||
assert len(response.chunks) == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_query_chunks_hybrid_different_embedding(
|
||||
sqlite_vec_index, sample_chunks, sample_embeddings, embedding_dimension
|
||||
):
|
||||
"""Test hybrid search with a different embedding than the stored ones."""
|
||||
await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
|
||||
|
||||
# Create a random embedding that's different from stored ones
|
||||
query_embedding = np.random.rand(embedding_dimension).astype(np.float32)
|
||||
query_string = "Sentence 5"
|
||||
|
||||
response = await sqlite_vec_index.query_hybrid(
|
||||
embedding=query_embedding,
|
||||
query_string=query_string,
|
||||
k=3,
|
||||
score_threshold=0.0,
|
||||
reranker_type="rrf",
|
||||
reranker_params={"impact_factor": 60.0},
|
||||
)
|
||||
|
||||
# Should still get results if keyword matches exist
|
||||
assert len(response.chunks) > 0
|
||||
# Verify scores are in descending order
|
||||
assert all(response.scores[i] >= response.scores[i + 1] for i in range(len(response.scores) - 1))
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_query_chunks_hybrid_rrf_ranking(sqlite_vec_index, sample_chunks, sample_embeddings):
|
||||
"""Test that RRF properly combines rankings when documents appear in both search methods."""
|
||||
await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
|
||||
|
||||
# Create a query embedding that's similar to the first chunk
|
||||
query_embedding = sample_embeddings[0]
|
||||
# Use a keyword that appears in multiple documents
|
||||
query_string = "Sentence 5"
|
||||
|
||||
response = await sqlite_vec_index.query_hybrid(
|
||||
embedding=query_embedding,
|
||||
query_string=query_string,
|
||||
k=5,
|
||||
score_threshold=0.0,
|
||||
reranker_type="rrf",
|
||||
reranker_params={"impact_factor": 60.0},
|
||||
)
|
||||
|
||||
# Verify we get results from both search methods
|
||||
assert len(response.chunks) > 0
|
||||
# Verify scores are in descending order (RRF should maintain this)
|
||||
assert all(response.scores[i] >= response.scores[i + 1] for i in range(len(response.scores) - 1))
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_query_chunks_hybrid_score_selection(sqlite_vec_index, sample_chunks, sample_embeddings):
|
||||
await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
|
||||
|
||||
# Create a query embedding that's similar to the first chunk
|
||||
query_embedding = sample_embeddings[0]
|
||||
# Use a keyword that appears in the first document
|
||||
query_string = "Sentence 0 from document 0"
|
||||
|
||||
# Test weighted re-ranking
|
||||
response = await sqlite_vec_index.query_hybrid(
|
||||
embedding=query_embedding,
|
||||
query_string=query_string,
|
||||
k=1,
|
||||
score_threshold=0.0,
|
||||
reranker_type="weighted",
|
||||
reranker_params={"alpha": 0.5},
|
||||
)
|
||||
assert len(response.chunks) == 1
|
||||
# Score should be weighted average of normalized keyword score and vector score
|
||||
assert response.scores[0] > 0.5 # Both scores should be high
|
||||
|
||||
# Test RRF re-ranking
|
||||
response = await sqlite_vec_index.query_hybrid(
|
||||
embedding=query_embedding,
|
||||
query_string=query_string,
|
||||
k=1,
|
||||
score_threshold=0.0,
|
||||
reranker_type="rrf",
|
||||
reranker_params={"impact_factor": 60.0},
|
||||
)
|
||||
assert len(response.chunks) == 1
|
||||
# RRF score should be sum of reciprocal ranks
|
||||
assert response.scores[0] == pytest.approx(2.0 / 61.0, rel=1e-6) # 1/(60+1) + 1/(60+1)
|
||||
|
||||
# Test default re-ranking (should be RRF)
|
||||
response = await sqlite_vec_index.query_hybrid(
|
||||
embedding=query_embedding,
|
||||
query_string=query_string,
|
||||
k=1,
|
||||
score_threshold=0.0,
|
||||
reranker_type="rrf",
|
||||
reranker_params={"impact_factor": 60.0},
|
||||
)
|
||||
assert len(response.chunks) == 1
|
||||
assert response.scores[0] == pytest.approx(2.0 / 61.0, rel=1e-6) # Should behave like RRF
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_query_chunks_hybrid_mixed_results(sqlite_vec_index, sample_chunks, sample_embeddings):
|
||||
"""Test hybrid search with documents that appear in only one search method."""
|
||||
await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
|
||||
|
||||
# Create a query embedding that's similar to the first chunk
|
||||
query_embedding = sample_embeddings[0]
|
||||
# Use a keyword that appears in a different document
|
||||
query_string = "Sentence 9 from document 2"
|
||||
|
||||
response = await sqlite_vec_index.query_hybrid(
|
||||
embedding=query_embedding,
|
||||
query_string=query_string,
|
||||
k=3,
|
||||
score_threshold=0.0,
|
||||
reranker_type="rrf",
|
||||
reranker_params={"impact_factor": 60.0},
|
||||
)
|
||||
|
||||
# Should get results from both search methods
|
||||
assert len(response.chunks) > 0
|
||||
# Verify scores are in descending order
|
||||
assert all(response.scores[i] >= response.scores[i + 1] for i in range(len(response.scores) - 1))
|
||||
# Verify we get results from both the vector-similar document and keyword-matched document
|
||||
doc_ids = {chunk.metadata["document_id"] for chunk in response.chunks}
|
||||
assert "document-0" in doc_ids # From vector search
|
||||
assert "document-2" in doc_ids # From keyword search
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_query_chunks_hybrid_weighted_reranker_parametrization(
|
||||
sqlite_vec_index, sample_chunks, sample_embeddings
|
||||
):
|
||||
"""Test WeightedReRanker with different alpha values."""
|
||||
# Re-add data before each search to ensure test isolation
|
||||
await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
|
||||
query_embedding = sample_embeddings[0]
|
||||
query_string = "Sentence 0 from document 0"
|
||||
|
||||
# alpha=1.0 (should behave like pure keyword)
|
||||
response = await sqlite_vec_index.query_hybrid(
|
||||
embedding=query_embedding,
|
||||
query_string=query_string,
|
||||
k=1,
|
||||
score_threshold=0.0,
|
||||
reranker_type="weighted",
|
||||
reranker_params={"alpha": 1.0},
|
||||
)
|
||||
assert len(response.chunks) > 0 # Should get at least one result
|
||||
assert any("document-0" in chunk.metadata["document_id"] for chunk in response.chunks)
|
||||
|
||||
# alpha=0.0 (should behave like pure vector)
|
||||
response = await sqlite_vec_index.query_hybrid(
|
||||
embedding=query_embedding,
|
||||
query_string=query_string,
|
||||
k=1,
|
||||
score_threshold=0.0,
|
||||
reranker_type="weighted",
|
||||
reranker_params={"alpha": 0.0},
|
||||
)
|
||||
assert len(response.chunks) > 0 # Should get at least one result
|
||||
assert any("document-0" in chunk.metadata["document_id"] for chunk in response.chunks)
|
||||
|
||||
await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
|
||||
# alpha=0.7 (should be a mix)
|
||||
response = await sqlite_vec_index.query_hybrid(
|
||||
embedding=query_embedding,
|
||||
query_string=query_string,
|
||||
k=1,
|
||||
score_threshold=0.0,
|
||||
reranker_type="weighted",
|
||||
reranker_params={"alpha": 0.7},
|
||||
)
|
||||
assert len(response.chunks) > 0 # Should get at least one result
|
||||
assert any("document-0" in chunk.metadata["document_id"] for chunk in response.chunks)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_query_chunks_hybrid_rrf_impact_factor(sqlite_vec_index, sample_chunks, sample_embeddings):
|
||||
"""Test RRFReRanker with different impact factors."""
|
||||
await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
|
||||
query_embedding = sample_embeddings[0]
|
||||
query_string = "Sentence 0 from document 0"
|
||||
|
||||
# impact_factor=10
|
||||
response = await sqlite_vec_index.query_hybrid(
|
||||
embedding=query_embedding,
|
||||
query_string=query_string,
|
||||
k=1,
|
||||
score_threshold=0.0,
|
||||
reranker_type="rrf",
|
||||
reranker_params={"impact_factor": 10.0},
|
||||
)
|
||||
assert len(response.chunks) == 1
|
||||
assert response.scores[0] == pytest.approx(2.0 / 11.0, rel=1e-6)
|
||||
|
||||
# impact_factor=100
|
||||
response = await sqlite_vec_index.query_hybrid(
|
||||
embedding=query_embedding,
|
||||
query_string=query_string,
|
||||
k=1,
|
||||
score_threshold=0.0,
|
||||
reranker_type="rrf",
|
||||
reranker_params={"impact_factor": 100.0},
|
||||
)
|
||||
assert len(response.chunks) == 1
|
||||
assert response.scores[0] == pytest.approx(2.0 / 101.0, rel=1e-6)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_query_chunks_hybrid_edge_cases(sqlite_vec_index, sample_chunks, sample_embeddings):
|
||||
await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
|
||||
|
||||
# No results from either search - use a completely different embedding and a nonzero threshold
|
||||
query_embedding = np.ones_like(sample_embeddings[0]) * -1 # Very different from sample embeddings
|
||||
query_string = "no_such_keyword_that_will_never_match"
|
||||
response = await sqlite_vec_index.query_hybrid(
|
||||
embedding=query_embedding,
|
||||
query_string=query_string,
|
||||
k=3,
|
||||
score_threshold=0.1, # Nonzero threshold to filter out low-similarity matches
|
||||
reranker_type="rrf",
|
||||
reranker_params={"impact_factor": 60.0},
|
||||
)
|
||||
assert len(response.chunks) == 0
|
||||
|
||||
# All results below threshold
|
||||
query_embedding = sample_embeddings[0]
|
||||
query_string = "Sentence 0 from document 0"
|
||||
response = await sqlite_vec_index.query_hybrid(
|
||||
embedding=query_embedding,
|
||||
query_string=query_string,
|
||||
k=3,
|
||||
score_threshold=1000.0,
|
||||
reranker_type="rrf",
|
||||
reranker_params={"impact_factor": 60.0},
|
||||
)
|
||||
assert len(response.chunks) == 0
|
||||
|
||||
# Large k value
|
||||
response = await sqlite_vec_index.query_hybrid(
|
||||
embedding=query_embedding,
|
||||
query_string=query_string,
|
||||
k=100,
|
||||
score_threshold=0.0,
|
||||
reranker_type="rrf",
|
||||
reranker_params={"impact_factor": 60.0},
|
||||
)
|
||||
# Should not error, should return all available results
|
||||
assert len(response.chunks) > 0
|
||||
assert len(response.chunks) <= 100
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_query_chunks_hybrid_tie_breaking(
|
||||
sqlite_vec_index, sample_embeddings, embedding_dimension, tmp_path_factory
|
||||
):
|
||||
"""Test tie-breaking and determinism when scores are equal."""
|
||||
# Create two chunks with the same content and embedding
|
||||
chunk1 = Chunk(content="identical", metadata={"document_id": "docA"})
|
||||
chunk2 = Chunk(content="identical", metadata={"document_id": "docB"})
|
||||
chunks = [chunk1, chunk2]
|
||||
# Use the same embedding for both chunks to ensure equal scores
|
||||
same_embedding = sample_embeddings[0]
|
||||
embeddings = np.array([same_embedding, same_embedding])
|
||||
|
||||
# Clear existing data and recreate index
|
||||
await sqlite_vec_index.delete()
|
||||
temp_dir = tmp_path_factory.getbasetemp()
|
||||
db_path = str(temp_dir / "test_sqlite.db")
|
||||
sqlite_vec_index = await SQLiteVecIndex.create(dimension=embedding_dimension, db_path=db_path, bank_id="test_bank")
|
||||
await sqlite_vec_index.add_chunks(chunks, embeddings)
|
||||
|
||||
# Query with the same embedding and content to ensure equal scores
|
||||
query_embedding = same_embedding
|
||||
query_string = "identical"
|
||||
|
||||
# Run multiple queries to verify determinism
|
||||
responses = []
|
||||
for _ in range(3):
|
||||
response = await sqlite_vec_index.query_hybrid(
|
||||
embedding=query_embedding,
|
||||
query_string=query_string,
|
||||
k=2,
|
||||
score_threshold=0.0,
|
||||
reranker_type="rrf",
|
||||
reranker_params={"impact_factor": 60.0},
|
||||
)
|
||||
responses.append(response)
|
||||
|
||||
# Verify all responses are identical
|
||||
first_response = responses[0]
|
||||
for response in responses[1:]:
|
||||
assert response.chunks == first_response.chunks
|
||||
assert response.scores == first_response.scores
|
||||
|
||||
# Verify both chunks are returned with equal scores
|
||||
assert len(first_response.chunks) == 2
|
||||
assert first_response.scores[0] == first_response.scores[1]
|
||||
assert {chunk.metadata["document_id"] for chunk in first_response.chunks} == {"docA", "docB"}
|
||||
|
|
Binary file not shown.
|
@ -31,6 +31,25 @@ test_response_web_search:
|
|||
search_context_size: "low"
|
||||
output: "128"
|
||||
|
||||
test_response_file_search:
|
||||
test_name: test_response_file_search
|
||||
test_params:
|
||||
case:
|
||||
- case_id: "llama_experts"
|
||||
input: "How many experts does the Llama 4 Maverick model have?"
|
||||
tools:
|
||||
- type: file_search
|
||||
# vector_store_ids param for file_search tool gets added by the test runner
|
||||
file_content: "Llama 4 Maverick has 128 experts"
|
||||
output: "128"
|
||||
- case_id: "llama_experts_pdf"
|
||||
input: "How many experts does the Llama 4 Maverick model have?"
|
||||
tools:
|
||||
- type: file_search
|
||||
# vector_store_ids param for file_search toolgets added by the test runner
|
||||
file_path: "pdfs/llama_stack_and_models.pdf"
|
||||
output: "128"
|
||||
|
||||
test_response_mcp_tool:
|
||||
test_name: test_response_mcp_tool
|
||||
test_params:
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
|
||||
import httpx
|
||||
import openai
|
||||
|
@ -23,6 +25,31 @@ from tests.verifications.openai_api.fixtures.load import load_test_cases
|
|||
responses_test_cases = load_test_cases("responses")
|
||||
|
||||
|
||||
def _new_vector_store(openai_client, name):
|
||||
# Ensure we don't reuse an existing vector store
|
||||
vector_stores = openai_client.vector_stores.list()
|
||||
for vector_store in vector_stores:
|
||||
if vector_store.name == name:
|
||||
openai_client.vector_stores.delete(vector_store_id=vector_store.id)
|
||||
|
||||
# Create a new vector store
|
||||
vector_store = openai_client.vector_stores.create(
|
||||
name=name,
|
||||
)
|
||||
return vector_store
|
||||
|
||||
|
||||
def _upload_file(openai_client, name, file_path):
|
||||
# Ensure we don't reuse an existing file
|
||||
files = openai_client.files.list()
|
||||
for file in files:
|
||||
if file.filename == name:
|
||||
openai_client.files.delete(file_id=file.id)
|
||||
|
||||
# Upload a text file with our document content
|
||||
return openai_client.files.create(file=open(file_path, "rb"), purpose="assistants")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"case",
|
||||
responses_test_cases["test_response_basic"]["test_params"]["case"],
|
||||
|
@ -258,6 +285,111 @@ def test_response_non_streaming_web_search(request, openai_client, model, provid
|
|||
assert case["output"].lower() in response.output_text.lower().strip()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"case",
|
||||
responses_test_cases["test_response_file_search"]["test_params"]["case"],
|
||||
ids=case_id_generator,
|
||||
)
|
||||
def test_response_non_streaming_file_search(
|
||||
request, openai_client, model, provider, verification_config, tmp_path, case
|
||||
):
|
||||
if isinstance(openai_client, LlamaStackAsLibraryClient):
|
||||
pytest.skip("Responses API file search is not yet supported in library client.")
|
||||
|
||||
test_name_base = get_base_test_name(request)
|
||||
if should_skip_test(verification_config, provider, model, test_name_base):
|
||||
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
|
||||
|
||||
vector_store = _new_vector_store(openai_client, "test_vector_store")
|
||||
|
||||
if "file_content" in case:
|
||||
file_name = "test_response_non_streaming_file_search.txt"
|
||||
file_path = tmp_path / file_name
|
||||
file_path.write_text(case["file_content"])
|
||||
elif "file_path" in case:
|
||||
file_path = os.path.join(os.path.dirname(__file__), "fixtures", case["file_path"])
|
||||
file_name = os.path.basename(file_path)
|
||||
else:
|
||||
raise ValueError(f"No file content or path provided for case {case['case_id']}")
|
||||
|
||||
file_response = _upload_file(openai_client, file_name, file_path)
|
||||
|
||||
# Attach our file to the vector store
|
||||
file_attach_response = openai_client.vector_stores.files.create(
|
||||
vector_store_id=vector_store.id,
|
||||
file_id=file_response.id,
|
||||
)
|
||||
|
||||
# Wait for the file to be attached
|
||||
while file_attach_response.status == "in_progress":
|
||||
time.sleep(0.1)
|
||||
file_attach_response = openai_client.vector_stores.files.retrieve(
|
||||
vector_store_id=vector_store.id,
|
||||
file_id=file_response.id,
|
||||
)
|
||||
assert file_attach_response.status == "completed", f"Expected file to be attached, got {file_attach_response}"
|
||||
assert not file_attach_response.last_error
|
||||
|
||||
# Update our tools with the right vector store id
|
||||
tools = case["tools"]
|
||||
for tool in tools:
|
||||
if tool["type"] == "file_search":
|
||||
tool["vector_store_ids"] = [vector_store.id]
|
||||
|
||||
# Create the response request, which should query our vector store
|
||||
response = openai_client.responses.create(
|
||||
model=model,
|
||||
input=case["input"],
|
||||
tools=tools,
|
||||
stream=False,
|
||||
include=["file_search_call.results"],
|
||||
)
|
||||
|
||||
# Verify the file_search_tool was called
|
||||
assert len(response.output) > 1
|
||||
assert response.output[0].type == "file_search_call"
|
||||
assert response.output[0].status == "completed"
|
||||
assert response.output[0].queries # ensure it's some non-empty list
|
||||
assert response.output[0].results
|
||||
assert case["output"].lower() in response.output[0].results[0].text.lower()
|
||||
assert response.output[0].results[0].score > 0
|
||||
|
||||
# Verify the output_text generated by the response
|
||||
assert case["output"].lower() in response.output_text.lower().strip()
|
||||
|
||||
|
||||
def test_response_non_streaming_file_search_empty_vector_store(
|
||||
request, openai_client, model, provider, verification_config
|
||||
):
|
||||
if isinstance(openai_client, LlamaStackAsLibraryClient):
|
||||
pytest.skip("Responses API file search is not yet supported in library client.")
|
||||
|
||||
test_name_base = get_base_test_name(request)
|
||||
if should_skip_test(verification_config, provider, model, test_name_base):
|
||||
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
|
||||
|
||||
vector_store = _new_vector_store(openai_client, "test_vector_store")
|
||||
|
||||
# Create the response request, which should query our vector store
|
||||
response = openai_client.responses.create(
|
||||
model=model,
|
||||
input="How many experts does the Llama 4 Maverick model have?",
|
||||
tools=[{"type": "file_search", "vector_store_ids": [vector_store.id]}],
|
||||
stream=False,
|
||||
include=["file_search_call.results"],
|
||||
)
|
||||
|
||||
# Verify the file_search_tool was called
|
||||
assert len(response.output) > 1
|
||||
assert response.output[0].type == "file_search_call"
|
||||
assert response.output[0].status == "completed"
|
||||
assert response.output[0].queries # ensure it's some non-empty list
|
||||
assert not response.output[0].results # ensure we don't get any results
|
||||
|
||||
# Verify some output_text was generated by the response
|
||||
assert response.output_text
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"case",
|
||||
responses_test_cases["test_response_mcp_tool"]["test_params"]["case"],
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue