mirror of
				https://github.com/meta-llama/llama-stack.git
				synced 2025-10-25 09:05:37 +00:00 
			
		
		
		
	
		
			Some checks failed
		
		
	
	Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 8s
				
			Integration Tests / test-matrix (http, 3.11, datasets) (push) Failing after 8s
				
			Integration Tests / test-matrix (http, 3.11, providers) (push) Failing after 6s
				
			Integration Tests / test-matrix (http, 3.11, agents) (push) Failing after 9s
				
			Integration Tests / test-matrix (http, 3.11, post_training) (push) Failing after 9s
				
			Integration Tests / test-matrix (http, 3.11, vector_io) (push) Failing after 7s
				
			Integration Tests / test-matrix (http, 3.12, inspect) (push) Failing after 6s
				
			Integration Tests / test-matrix (http, 3.12, tool_runtime) (push) Failing after 6s
				
			Integration Tests / test-matrix (http, 3.11, inspect) (push) Failing after 13s
				
			Integration Tests / test-matrix (library, 3.11, datasets) (push) Failing after 5s
				
			Integration Tests / test-matrix (http, 3.11, tool_runtime) (push) Failing after 13s
				
			Integration Tests / test-matrix (http, 3.12, inference) (push) Failing after 11s
				
			Integration Tests / test-matrix (http, 3.12, scoring) (push) Failing after 12s
				
			Integration Tests / test-matrix (library, 3.11, agents) (push) Failing after 10s
				
			Integration Tests / test-matrix (library, 3.11, scoring) (push) Failing after 5s
				
			Integration Tests / test-matrix (library, 3.11, vector_io) (push) Failing after 5s
				
			Integration Tests / test-matrix (library, 3.12, datasets) (push) Failing after 5s
				
			Integration Tests / test-matrix (library, 3.11, inspect) (push) Failing after 13s
				
			Integration Tests / test-matrix (library, 3.11, providers) (push) Failing after 11s
				
			Integration Tests / test-matrix (library, 3.11, tool_runtime) (push) Failing after 10s
				
			Integration Tests / test-matrix (library, 3.12, inference) (push) Failing after 6s
				
			Integration Tests / test-matrix (library, 3.12, agents) (push) Failing after 8s
				
			Integration Tests / test-matrix (library, 3.12, inspect) (push) Failing after 6s
				
			Integration Tests / test-matrix (library, 3.12, post_training) (push) Failing after 5s
				
			Integration Tests / test-matrix (http, 3.11, scoring) (push) Failing after 28s
				
			Integration Tests / test-matrix (http, 3.12, providers) (push) Failing after 24s
				
			Integration Tests / test-matrix (http, 3.12, datasets) (push) Failing after 26s
				
			Integration Tests / test-matrix (http, 3.11, inference) (push) Failing after 30s
				
			Integration Tests / test-matrix (http, 3.12, agents) (push) Failing after 28s
				
			Integration Tests / test-matrix (http, 3.12, post_training) (push) Failing after 26s
				
			Integration Tests / test-matrix (http, 3.12, vector_io) (push) Failing after 23s
				
			Test Llama Stack Build / generate-matrix (push) Successful in 5s
				
			Integration Tests / test-matrix (library, 3.12, scoring) (push) Failing after 5s
				
			Test Llama Stack Build / build-custom-container-distribution (push) Failing after 5s
				
			Test External Providers / test-external-providers (venv) (push) Failing after 5s
				
			Integration Tests / test-matrix (library, 3.11, post_training) (push) Failing after 20s
				
			Integration Tests / test-matrix (library, 3.12, providers) (push) Failing after 7s
				
			Unit Tests / unit-tests (3.11) (push) Failing after 7s
				
			Update ReadTheDocs / update-readthedocs (push) Failing after 6s
				
			Integration Tests / test-matrix (library, 3.12, vector_io) (push) Failing after 8s
				
			Integration Tests / test-matrix (library, 3.11, inference) (push) Failing after 22s
				
			Test Llama Stack Build / build (push) Failing after 17s
				
			Unit Tests / unit-tests (3.13) (push) Failing after 37s
				
			Integration Tests / test-matrix (library, 3.12, tool_runtime) (push) Failing after 1m7s
				
			Test Llama Stack Build / build-single-provider (push) Failing after 1m15s
				
			Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 1m17s
				
			Unit Tests / unit-tests (3.12) (push) Failing after 1m32s
				
			Pre-commit / pre-commit (push) Failing after 2m14s
				
			# What does this PR do? Do not force 384 for the embedding dimension, use the one provided by the test run. ## Test Plan ``` pytest -s -vvv tests/integration/vector_io/test_vector_io.py --stack-config=http://localhost:8321 \ -k "not(builtin_tool or safety_with_image or code_interpreter or test_rag)" \ --text-model="meta-llama/Llama-3.2-3B-Instruct" \ --embedding-model=granite-embedding-125m --embedding-dimension=768 Uninstalled 1 package in 16ms Installed 1 package in 11ms INFO 2025-06-18 10:52:03,314 tests.integration.conftest:59 tests: Setting DISABLE_CODE_SANDBOX=1 for macOS /Users/leseb/Documents/AI/llama-stack/.venv/lib/python3.10/site-packages/pytest_asyncio/plugin.py:207: PytestDeprecationWarning: The configuration option "asyncio_default_fixture_loop_scope" is unset. The event loop scope for asynchronous fixtures will default to the fixture caching scope. Future versions of pytest-asyncio will default the loop scope for asynchronous fixtures to function scope. Set the default fixture loop scope explicitly in order to avoid unexpected behavior in the future. Valid fixture loop scopes are: "function", "class", "module", "package", "session" warnings.warn(PytestDeprecationWarning(_DEFAULT_FIXTURE_LOOP_SCOPE_UNSET)) ================================================= test session starts ================================================= platform darwin -- Python 3.10.16, pytest-8.3.4, pluggy-1.5.0 -- /Users/leseb/Documents/AI/llama-stack/.venv/bin/python cachedir: .pytest_cache metadata: {'Python': '3.10.16', 'Platform': 'macOS-15.5-arm64-arm-64bit', 'Packages': {'pytest': '8.3.4', 'pluggy': '1.5.0'}, 'Plugins': {'cov': '6.0.0', 'html': '4.1.1', 'json-report': '1.5.0', 'timeout': '2.4.0', 'metadata': '3.1.1', 'asyncio': '0.25.3', 'anyio': '4.8.0', 'nbval': '0.11.0'}} rootdir: /Users/leseb/Documents/AI/llama-stack configfile: pyproject.toml plugins: cov-6.0.0, html-4.1.1, json-report-1.5.0, timeout-2.4.0, metadata-3.1.1, asyncio-0.25.3, anyio-4.8.0, nbval-0.11.0 asyncio: mode=strict, asyncio_default_fixture_loop_scope=None collected 8 items tests/integration/vector_io/test_vector_io.py::test_vector_db_retrieve[emb=granite-embedding-125m:dim=768] PASSED tests/integration/vector_io/test_vector_io.py::test_vector_db_register[emb=granite-embedding-125m:dim=768] PASSED tests/integration/vector_io/test_vector_io.py::test_insert_chunks[emb=granite-embedding-125m:dim=768-test_case0] PASSED tests/integration/vector_io/test_vector_io.py::test_insert_chunks[emb=granite-embedding-125m:dim=768-test_case1] PASSED tests/integration/vector_io/test_vector_io.py::test_insert_chunks[emb=granite-embedding-125m:dim=768-test_case2] PASSED tests/integration/vector_io/test_vector_io.py::test_insert_chunks[emb=granite-embedding-125m:dim=768-test_case3] PASSED tests/integration/vector_io/test_vector_io.py::test_insert_chunks[emb=granite-embedding-125m:dim=768-test_case4] PASSED tests/integration/vector_io/test_vector_io.py::test_insert_chunks_with_precomputed_embeddings[emb=granite-embedding-125m:dim=768] PASSED ================================================== 8 passed in 5.50s ================================================== ``` Signed-off-by: Sébastien Han <seb@redhat.com>
		
			
				
	
	
		
			238 lines
		
	
	
	
		
			8.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			238 lines
		
	
	
	
		
			8.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # Copyright (c) Meta Platforms, Inc. and affiliates.
 | |
| # All rights reserved.
 | |
| #
 | |
| # This source code is licensed under the terms described in the LICENSE file in
 | |
| # the root directory of this source tree.
 | |
| 
 | |
| import pytest
 | |
| from llama_stack_client import BadRequestError
 | |
| from llama_stack_client.types import Document
 | |
| 
 | |
| 
 | |
| @pytest.fixture(scope="function")
 | |
| def client_with_empty_registry(client_with_models):
 | |
|     def clear_registry():
 | |
|         vector_dbs = [vector_db.identifier for vector_db in client_with_models.vector_dbs.list()]
 | |
|         for vector_db_id in vector_dbs:
 | |
|             client_with_models.vector_dbs.unregister(vector_db_id=vector_db_id)
 | |
| 
 | |
|     clear_registry()
 | |
|     yield client_with_models
 | |
| 
 | |
|     # you must clean after the last test if you were running tests against
 | |
|     # a stateful server instance
 | |
|     clear_registry()
 | |
| 
 | |
| 
 | |
| @pytest.fixture(scope="session")
 | |
| def sample_documents():
 | |
|     return [
 | |
|         Document(
 | |
|             document_id="test-doc-1",
 | |
|             content="Python is a high-level programming language.",
 | |
|             metadata={"category": "programming", "difficulty": "beginner"},
 | |
|         ),
 | |
|         Document(
 | |
|             document_id="test-doc-2",
 | |
|             content="Machine learning is a subset of artificial intelligence.",
 | |
|             metadata={"category": "AI", "difficulty": "advanced"},
 | |
|         ),
 | |
|         Document(
 | |
|             document_id="test-doc-3",
 | |
|             content="Data structures are fundamental to computer science.",
 | |
|             metadata={"category": "computer science", "difficulty": "intermediate"},
 | |
|         ),
 | |
|         Document(
 | |
|             document_id="test-doc-4",
 | |
|             content="Neural networks are inspired by biological neural networks.",
 | |
|             metadata={"category": "AI", "difficulty": "advanced"},
 | |
|         ),
 | |
|     ]
 | |
| 
 | |
| 
 | |
| def assert_valid_chunk_response(response):
 | |
|     assert len(response.chunks) > 0
 | |
|     assert len(response.scores) > 0
 | |
|     assert len(response.chunks) == len(response.scores)
 | |
|     for chunk in response.chunks:
 | |
|         assert isinstance(chunk.content, str)
 | |
| 
 | |
| 
 | |
| def assert_valid_text_response(response):
 | |
|     assert len(response.content) > 0
 | |
|     assert all(isinstance(chunk.text, str) for chunk in response.content)
 | |
| 
 | |
| 
 | |
| def test_vector_db_insert_inline_and_query(
 | |
|     client_with_empty_registry, sample_documents, embedding_model_id, embedding_dimension
 | |
| ):
 | |
|     vector_db_id = "test_vector_db"
 | |
|     client_with_empty_registry.vector_dbs.register(
 | |
|         vector_db_id=vector_db_id,
 | |
|         embedding_model=embedding_model_id,
 | |
|         embedding_dimension=embedding_dimension,
 | |
|     )
 | |
| 
 | |
|     client_with_empty_registry.tool_runtime.rag_tool.insert(
 | |
|         documents=sample_documents,
 | |
|         chunk_size_in_tokens=512,
 | |
|         vector_db_id=vector_db_id,
 | |
|     )
 | |
| 
 | |
|     # Query with a direct match
 | |
|     query1 = "programming language"
 | |
|     response1 = client_with_empty_registry.vector_io.query(
 | |
|         vector_db_id=vector_db_id,
 | |
|         query=query1,
 | |
|     )
 | |
|     assert_valid_chunk_response(response1)
 | |
|     assert any("Python" in chunk.content for chunk in response1.chunks)
 | |
| 
 | |
|     # Query with semantic similarity
 | |
|     query2 = "AI and brain-inspired computing"
 | |
|     response2 = client_with_empty_registry.vector_io.query(
 | |
|         vector_db_id=vector_db_id,
 | |
|         query=query2,
 | |
|     )
 | |
|     assert_valid_chunk_response(response2)
 | |
|     assert any("neural networks" in chunk.content.lower() for chunk in response2.chunks)
 | |
| 
 | |
|     # Query with limit on number of results (max_chunks=2)
 | |
|     query3 = "computer"
 | |
|     response3 = client_with_empty_registry.vector_io.query(
 | |
|         vector_db_id=vector_db_id,
 | |
|         query=query3,
 | |
|         params={"max_chunks": 2},
 | |
|     )
 | |
|     assert_valid_chunk_response(response3)
 | |
|     assert len(response3.chunks) <= 2
 | |
| 
 | |
|     # Query with threshold on similarity score
 | |
|     query4 = "computer"
 | |
|     response4 = client_with_empty_registry.vector_io.query(
 | |
|         vector_db_id=vector_db_id,
 | |
|         query=query4,
 | |
|         params={"score_threshold": 0.01},
 | |
|     )
 | |
|     assert_valid_chunk_response(response4)
 | |
|     assert all(score >= 0.01 for score in response4.scores)
 | |
| 
 | |
| 
 | |
| def test_vector_db_insert_from_url_and_query(
 | |
|     client_with_empty_registry, sample_documents, embedding_model_id, embedding_dimension
 | |
| ):
 | |
|     providers = [p for p in client_with_empty_registry.providers.list() if p.api == "vector_io"]
 | |
|     assert len(providers) > 0
 | |
| 
 | |
|     vector_db_id = "test_vector_db"
 | |
| 
 | |
|     client_with_empty_registry.vector_dbs.register(
 | |
|         vector_db_id=vector_db_id,
 | |
|         embedding_model=embedding_model_id,
 | |
|         embedding_dimension=embedding_dimension,
 | |
|     )
 | |
| 
 | |
|     # list to check memory bank is successfully registered
 | |
|     available_vector_dbs = [vector_db.identifier for vector_db in client_with_empty_registry.vector_dbs.list()]
 | |
|     assert vector_db_id in available_vector_dbs
 | |
| 
 | |
|     urls = [
 | |
|         "memory_optimizations.rst",
 | |
|         "chat.rst",
 | |
|         "llama3.rst",
 | |
|     ]
 | |
|     documents = [
 | |
|         Document(
 | |
|             document_id=f"num-{i}",
 | |
|             content=f"https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/{url}",
 | |
|             mime_type="text/plain",
 | |
|             metadata={},
 | |
|         )
 | |
|         for i, url in enumerate(urls)
 | |
|     ]
 | |
| 
 | |
|     client_with_empty_registry.tool_runtime.rag_tool.insert(
 | |
|         documents=documents,
 | |
|         vector_db_id=vector_db_id,
 | |
|         chunk_size_in_tokens=512,
 | |
|     )
 | |
| 
 | |
|     # Query for the name of method
 | |
|     response1 = client_with_empty_registry.vector_io.query(
 | |
|         vector_db_id=vector_db_id,
 | |
|         query="What's the name of the fine-tunning method used?",
 | |
|     )
 | |
|     assert_valid_chunk_response(response1)
 | |
|     assert any("lora" in chunk.content.lower() for chunk in response1.chunks)
 | |
| 
 | |
|     # Query for the name of model
 | |
|     response2 = client_with_empty_registry.vector_io.query(
 | |
|         vector_db_id=vector_db_id,
 | |
|         query="Which Llama model is mentioned?",
 | |
|     )
 | |
|     assert_valid_chunk_response(response2)
 | |
|     assert any("llama2" in chunk.content.lower() for chunk in response2.chunks)
 | |
| 
 | |
| 
 | |
| def test_rag_tool_insert_and_query(client_with_empty_registry, embedding_model_id, embedding_dimension):
 | |
|     providers = [p for p in client_with_empty_registry.providers.list() if p.api == "vector_io"]
 | |
|     assert len(providers) > 0
 | |
| 
 | |
|     vector_db_id = "test_vector_db"
 | |
| 
 | |
|     client_with_empty_registry.vector_dbs.register(
 | |
|         vector_db_id=vector_db_id,
 | |
|         embedding_model=embedding_model_id,
 | |
|         embedding_dimension=embedding_dimension,
 | |
|     )
 | |
| 
 | |
|     available_vector_dbs = [vector_db.identifier for vector_db in client_with_empty_registry.vector_dbs.list()]
 | |
|     assert vector_db_id in available_vector_dbs
 | |
| 
 | |
|     urls = [
 | |
|         "memory_optimizations.rst",
 | |
|         "chat.rst",
 | |
|         "llama3.rst",
 | |
|     ]
 | |
|     documents = [
 | |
|         Document(
 | |
|             document_id=f"num-{i}",
 | |
|             content=f"https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/{url}",
 | |
|             mime_type="text/plain",
 | |
|             metadata={"author": "llama", "source": url},
 | |
|         )
 | |
|         for i, url in enumerate(urls)
 | |
|     ]
 | |
| 
 | |
|     client_with_empty_registry.tool_runtime.rag_tool.insert(
 | |
|         documents=documents,
 | |
|         vector_db_id=vector_db_id,
 | |
|         chunk_size_in_tokens=512,
 | |
|     )
 | |
| 
 | |
|     response_with_metadata = client_with_empty_registry.tool_runtime.rag_tool.query(
 | |
|         vector_db_ids=[vector_db_id],
 | |
|         content="What is the name of the method used for fine-tuning?",
 | |
|     )
 | |
|     assert_valid_text_response(response_with_metadata)
 | |
|     assert any("metadata:" in chunk.text.lower() for chunk in response_with_metadata.content)
 | |
| 
 | |
|     response_without_metadata = client_with_empty_registry.tool_runtime.rag_tool.query(
 | |
|         vector_db_ids=[vector_db_id],
 | |
|         content="What is the name of the method used for fine-tuning?",
 | |
|         query_config={
 | |
|             "include_metadata_in_content": True,
 | |
|             "chunk_template": "Result {index}\nContent: {chunk.content}\n",
 | |
|         },
 | |
|     )
 | |
|     assert_valid_text_response(response_without_metadata)
 | |
|     assert not any("metadata:" in chunk.text.lower() for chunk in response_without_metadata.content)
 | |
| 
 | |
|     with pytest.raises((ValueError, BadRequestError)):
 | |
|         client_with_empty_registry.tool_runtime.rag_tool.query(
 | |
|             vector_db_ids=[vector_db_id],
 | |
|             content="What is the name of the method used for fine-tuning?",
 | |
|             query_config={
 | |
|                 "chunk_template": "This should raise a ValueError because it is missing the proper template variables",
 | |
|             },
 | |
|         )
 |