mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-18 10:52:28 +00:00
7 commits
Author | SHA1 | Message | Date | |
---|---|---|---|---|
|
31b088978a
|
fix: Fix /vector-stores/create API when vector store with duplicate name (#2617)
# What does this PR do? Resolves https://github.com/meta-llama/llama-stack/issues/2735 Currently, if you test against OpenAI's Vector Stores API the `client.vector_stores.search` call fails with an invalid vector_db during routing (see the script referenced in the clickable item under the Test Plan section). This PR ensures that `client.vector_stores.search()` is compatible with OpenAI's Vector Stores API. Two biggest changes: 1. The `name`, which was previously used as the `vector_db_id`, has been changed to be consistent with OpenAI's `vs_{uuid}` format. 2. The vector store ID has to be referenced by the ID, the name is not reliable as every `client.vector_stores.create` results in a new vector store. NOTE: I believe this is a breaking change for end users as they'll need to update their VectorDB identifiers. ## Test Plan Unit tests: ```bash ./scripts/unit-tests.sh tests/unit/providers/vector_io/ -v ``` Integration tests: ```bash ENABLE_MILVUS=milvus llama stack run /Users/farceo/dev/llama-stack/llama_stack/templates/starter/run.yaml --image-type venv LLAMA_STACK_CONFIG=http://localhost:8321 pytest -sv tests/integration/vector_io/test_openai_vector_stores.py --embedding-model=all-MiniLM-L6-v2 -vv ``` Unit tests and test script below 👇 <details> <summary>Click here for script used to test OpenAI and Llama Stack Vector Store implementation</summary> ```python import json import argparse from openai import OpenAI, pagination import logging from colorama import Fore, Style, init import traceback import os # Initialize colorama for color support in terminal init(autoreset=True) # Setup basic logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') DEMO_VECTOR_STORE_NAME = "Support FAQ FJA" global DEMO_VECTOR_STORE_ID global DEMO_VECTOR_STORE_ID2 def colored_print(color, text): """Prints text to the console with the specified color.""" print(f"{color}{text}{Style.RESET_ALL}") def log_and_print(color, message, level=logging.INFO): """Logs a message and prints it to the console with the specified color.""" logging.log(level, message) colored_print(color, message) def run_tests(client, prefix="openai"): """ Runs all tests using the provided OpenAI client and saves the output to JSON files with the given prefix. """ # Create the directory if it doesn't exist os.makedirs('openai_testing', exist_ok=True) # Default values in case tests fail global DEMO_VECTOR_STORE_ID, DEMO_VECTOR_STORE_ID2 DEMO_VECTOR_STORE_ID = None DEMO_VECTOR_STORE_ID2 = None def test_idempotent_vector_store_creation(): """ Test that creating a vector store with the same name is idempotent. """ log_and_print(Fore.BLUE, "Starting vector store creation test...") try: vector_store = client.vector_stores.create( name=DEMO_VECTOR_STORE_NAME, ) # Attempt to create the same vector store again vector_store2 = client.vector_stores.create( name=DEMO_VECTOR_STORE_NAME, ) # Check instead of assert if vector_store2.id != vector_store.id: log_and_print(Fore.YELLOW, f"FAILED IDEMPOTENCY: the same VectorStore name for {prefix.upper()} does not return the same ID", level=logging.WARNING) else: log_and_print(Fore.GREEN, f"PASSED IDEMPOTENCY: f{vector_store2.id} == {vector_store.id} the same VectorStore name for {prefix.upper()} returns the same ID") vector_store_data = vector_store.to_dict() log_and_print(Fore.WHITE, f"vector_stores.create = {json.dumps(vector_store_data, indent=2)}") with open(f'openai_testing/{prefix}_vector_store_create.json', 'w') as f: json.dump(vector_store_data, f, indent=2) global DEMO_VECTOR_STORE_ID, DEMO_VECTOR_STORE_ID2 DEMO_VECTOR_STORE_ID = vector_store.id DEMO_VECTOR_STORE_ID2 = vector_store2.id return DEMO_VECTOR_STORE_ID, DEMO_VECTOR_STORE_ID2 except Exception as e: log_and_print(Fore.RED, f"Idempotent vector store creation test failed: {e}", level=logging.ERROR) logging.error(traceback.format_exc()) # Create a fallback vector store ID if needed if 'vector_store' in locals() and vector_store: DEMO_VECTOR_STORE_ID = vector_store.id return DEMO_VECTOR_STORE_ID, DEMO_VECTOR_STORE_ID2 def test_vector_store_list(): """ Test listing vector stores. """ log_and_print(Fore.BLUE, "Starting vector store list test...") try: vector_stores = client.vector_stores.list() # Check instead of assert if not isinstance(vector_stores, pagination.SyncCursorPage): log_and_print(Fore.YELLOW, f"FAILED: Expected a list of vector stores, got {type(vector_stores)}", level=logging.WARNING) else: log_and_print(Fore.GREEN, "Vector store list test passed!") vector_stores_data = vector_stores.to_dict() log_and_print(Fore.WHITE, f"vector_stores.list = {json.dumps(vector_stores_data, indent=2)}") with open(f'openai_testing/{prefix}_vector_store_list.json', 'w') as f: json.dump(vector_stores_data, f, indent=2) except Exception as e: log_and_print(Fore.RED, f"Vector store list test failed: {e}", level=logging.ERROR) logging.error(traceback.format_exc()) def test_retrieve_vector_store(): """ Test retrieving a specific vector store. """ log_and_print(Fore.BLUE, "Starting retrieve vector store test...") if not DEMO_VECTOR_STORE_ID: log_and_print(Fore.YELLOW, "Skipping retrieve vector store test - no vector store ID available", level=logging.WARNING) return try: vector_store = client.vector_stores.retrieve( vector_store_id=DEMO_VECTOR_STORE_ID, ) # Check instead of assert if vector_store.id != DEMO_VECTOR_STORE_ID: log_and_print(Fore.YELLOW, "FAILED: Retrieved vector store ID does not match", level=logging.WARNING) else: log_and_print(Fore.GREEN, "Retrieve vector store test passed!") vector_store_data = vector_store.to_dict() log_and_print(Fore.WHITE, f"vector_stores.retrieve = {json.dumps(vector_store_data, indent=2)}") with open(f'openai_testing/{prefix}_vector_store_retrieve.json', 'w') as f: json.dump(vector_store_data, f, indent=2) except Exception as e: log_and_print(Fore.RED, f"Retrieve vector store test failed: {e}", level=logging.ERROR) logging.error(traceback.format_exc()) def test_modify_vector_store(): """ Test modifying a vector store. """ log_and_print(Fore.BLUE, "Starting modify vector store test...") if not DEMO_VECTOR_STORE_ID: log_and_print(Fore.YELLOW, "Skipping modify vector store test - no vector store ID available", level=logging.WARNING) return try: updated_vector_store = client.vector_stores.update( vector_store_id=DEMO_VECTOR_STORE_ID, name="Updated Support FAQ FJA", ) # Check instead of assert if updated_vector_store.name != "Updated Support FAQ FJA": log_and_print(Fore.YELLOW, "FAILED: Vector store name was not updated correctly", level=logging.WARNING) else: log_and_print(Fore.GREEN, "Modify vector store test passed!") updated_vector_store_data = updated_vector_store.to_dict() log_and_print(Fore.WHITE, f"vector_stores.modify = {json.dumps(updated_vector_store_data, indent=2)}") with open(f'openai_testing/{prefix}_vector_store_modify.json', 'w') as f: json.dump(updated_vector_store_data, f, indent=2) except Exception as e: log_and_print(Fore.RED, f"Modify vector store test failed: {e}", level=logging.ERROR) logging.error(traceback.format_exc()) def test_delete_vector_store(): """ Test deleting a vector store. """ log_and_print(Fore.BLUE, "Starting delete vector store test...") if not DEMO_VECTOR_STORE_ID2: log_and_print(Fore.YELLOW, "Skipping delete vector store test - no second vector store ID available", level=logging.WARNING) return try: response = client.vector_stores.delete( vector_store_id=DEMO_VECTOR_STORE_ID2, ) log_and_print(Fore.GREEN, "Delete vector store test passed!") response_data = response.to_dict() log_and_print(Fore.WHITE, f"Vector store delete response = {json.dumps(response_data, indent=2)}") with open(f'openai_testing/{prefix}_vector_store_delete.json', 'w') as f: json.dump(response_data, f, indent=2) except Exception as e: log_and_print(Fore.RED, f"Delete vector store test failed: {e}", level=logging.ERROR) logging.error(traceback.format_exc()) def test_create_vector_store_file(): log_and_print(Fore.BLUE, "Starting create vector store file test...") if not DEMO_VECTOR_STORE_ID: log_and_print(Fore.YELLOW, "Skipping create vector store file test - no vector store ID available", level=logging.WARNING) return try: # create jsonl of files as an example with open("mydata.jsonl", "w") as f: f.write('{"text": "What is the return policy?", "metadata": {"category": "support"}}\n') f.write('{"text": "How do I reset my password?", "metadata": {"category": "support"}}\n') f.write('{"text": "Where can I find my order history?", "metadata": {"category": "support"}}\n') f.write('{"text": "What are the shipping options?", "metadata": {"category": "support"}}\n') f.write('{"text": "What is your favorite banana?", "metadata": {"category": "support"}}\n') # Create a simple text file if my_data_small.txt doesn't exist if not os.path.exists("my_data_small.txt"): with open("my_data_small.txt", "w") as f: f.write("This is a test file for vector store testing.\n") created_file = client.files.create( file=open("my_data_small.txt", "rb"), purpose="assistants", ) created_file_data = created_file.to_dict() log_and_print(Fore.WHITE, f"Created file {json.dumps(created_file_data, indent=2)}") with open(f'openai_testing/{prefix}_file_create.json', 'w') as f: json.dump(created_file_data, f, indent=2) retrieved_files = client.files.retrieve(created_file.id) retrieved_files_data = retrieved_files.to_dict() log_and_print(Fore.WHITE, f"Retrieved file {json.dumps(retrieved_files_data, indent=2)}") with open(f'openai_testing/{prefix}_file_retrieve.json', 'w') as f: json.dump(retrieved_files_data, f, indent=2) vector_store_file = client.vector_stores.files.create( vector_store_id=DEMO_VECTOR_STORE_ID, file_id=created_file.id, ) log_and_print(Fore.GREEN, "Create vector store file test passed!") except Exception as e: log_and_print(Fore.RED, f"Create vector store file test failed: {e}", level=logging.ERROR) logging.error(traceback.format_exc()) def test_search_vector_store(): """ Test searching a vector store. """ log_and_print(Fore.BLUE, "Starting search vector store test...") if not DEMO_VECTOR_STORE_ID: log_and_print(Fore.YELLOW, "Skipping search vector store test - no vector store ID available", level=logging.WARNING) return try: query = "What is the banana policy?" search_results = client.vector_stores.search( vector_store_id=DEMO_VECTOR_STORE_ID, query=query, max_num_results=10, ranking_options={ 'ranker': 'default-2024-11-15', 'score_threshold': 0.0, }, rewrite_query=False, ) # Check instead of assert if not isinstance(search_results, pagination.SyncPage): log_and_print(Fore.YELLOW, f"FAILED: Expected a list of search results, got {type(search_results)}", level=logging.WARNING) else: log_and_print(Fore.GREEN, "Search vector store test passed!") search_results_dict = search_results.to_dict() log_and_print(Fore.WHITE, f"Search results = {search_results_dict}") with open(f'openai_testing/{prefix}_vector_store_search.json', 'w') as f: json.dump(search_results_dict, f, indent=2) log_and_print(Fore.WHITE, f"vector_stores.search = {search_results.to_json()}") except Exception as e: log_and_print(Fore.RED, f"Search vector store test failed: {e}", level=logging.ERROR) logging.error(traceback.format_exc()) # Run all tests in sequence, even if some fail test_results = [] try: result = test_idempotent_vector_store_creation() if result and len(result) == 2: DEMO_VECTOR_STORE_ID, DEMO_VECTOR_STORE_ID2 = result test_results.append(True) except Exception as e: log_and_print(Fore.RED, f"Vector store creation test failed: {e}", level=logging.ERROR) logging.error(traceback.format_exc()) test_results.append(False) for test_func in [ test_vector_store_list, test_retrieve_vector_store, test_modify_vector_store, test_delete_vector_store, test_create_vector_store_file, test_search_vector_store ]: try: test_func() test_results.append(True) except Exception as e: log_and_print(Fore.RED, f"{test_func.__name__} failed: {e}", level=logging.ERROR) logging.error(traceback.format_exc()) test_results.append(False) if all(test_results): log_and_print(Fore.GREEN, f"All {prefix} tests completed successfully!") else: failed_count = test_results.count(False) log_and_print(Fore.YELLOW, f"{failed_count} {prefix} test(s) failed, but script completed.") if __name__ == "__main__": parser = argparse.ArgumentParser(description="Run OpenAI and/or LlamaStack tests.") parser.add_argument( "--provider", type=str, default="llama", choices=["openai", "llama", "both"], help="Specify which environment to test: openai, llama, or both. Default is both.", ) args = parser.parse_args() try: if args.provider in ("openai", "both"): openai_client = OpenAI() run_tests(openai_client, prefix="openai") if args.provider in ("llama", "both"): llama_client = OpenAI(base_url="http://localhost:8321/v1/openai/v1", api_key="none") run_tests(llama_client, prefix="llama") log_and_print(Fore.GREEN, "All tests completed!") except Exception as e: log_and_print(Fore.RED, f"Tests failed to complete: {e}", level=logging.ERROR) logging.error(traceback.format_exc()) ``` </details> --------- Signed-off-by: Francisco Javier Arceo <farceo@redhat.com> |
||
|
f4950f4ef0
|
fix: AccessDeniedError leads to HTTP 500 instead of error 403 (#2595)
Resolves access control error visibility issues where 500 errors were returned instead of proper 403 responses with actionable error messages. • Enhance AccessDeniedError with detailed context and improve exception handling • Enhanced AccessDeniedError class to include user, action, and resource context - Added constructor parameters for action, resource, and user - Generate detailed error messages showing user principal, attributes, and attempted resource - Backward compatible with existing usage (falls back to generic message) • Updated exception handling in server.py - Import AccessDeniedError from access_control module - Return proper 403 status codes with detailed error messages - Separate handling for PermissionError (generic) vs AccessDeniedError (detailed) • Enhanced error context at raise sites - Updated routing_tables/common.py to pass action, resource, and user context - Updated agents persistence to include context in access denied errors - Provides better debugging information for access control issues • Added comprehensive unit tests - Created tests/unit/server/test_server.py with 13 test cases - Covers AccessDeniedError with and without context - Tests all exception types (ValidationError, BadRequestError, AuthenticationRequiredError, etc.) - Validates proper HTTP status codes and error message formats # What does this PR do? <!-- Provide a short summary of what this PR does and why. Link to relevant issues if applicable. --> <!-- If resolving an issue, uncomment and update the line below --> <!-- Closes #[issue-number] --> ## Test Plan ``` server: port: 8321 access_policy: - permit: principal: admin actions: [create, read, delete] when: user with admin in groups - permit: actions: [read] when: user with system:authenticated in roles ``` then: ``` curl --request POST --url http://localhost:8321/v1/vector-dbs \ --header "Authorization: Bearer your-bearer" \ --data '{ "vector_db_id": "my_demo_vector_db", "embedding_model": "ibm-granite/granite-embedding-125m-english", "embedding_dimension": 768, "provider_id": "milvus" }' ``` depending if user is in group admin or not, you should get the `AccessDeniedError`. Before this PR, this was leading to an error 500 and `Traceback` displayed in the logs. After the PR, logs display a simpler error (unless DEBUG logging is set) and a 403 Forbidden error is returned on the HTTP side. --------- Signed-off-by: Akram Ben Aissi <<akram.benaissi@gmail.com>> |
||
|
e7eb9f9adc
|
fix: dataset metadata without provider_id (#2527)
# What does this PR do? Fixes an error when inferring dataset provider_id with metadata Closes #[2506](https://github.com/meta-llama/llama-stack/issues/2506) Signed-off-by: Juanma Barea <juanmabareamartinez@gmail.com> |
||
|
7c1998db25
|
feat: fine grained access control policy (#2264)
This allows a set of rules to be defined for determining access to resources. The rules are (loosely) based on the cedar policy format. A rule defines a list of action either to permit or to forbid. It may specify a principal or a resource that must match for the rule to take effect. It may also specify a condition, either a 'when' or an 'unless', with additional constraints as to where the rule applies. A list of rules is held for each type to be protected and tried in order to find a match. If a match is found, the request is permitted or forbidden depening on the type of rule. If no match is found, the request is denied. If no rules are specified for a given type, a rule that allows any action as long as the resource attributes match the user attributes is added (i.e. the previous behaviour is the default. Some examples in yaml: ``` model: - permit: principal: user-1 actions: [create, read, delete] comment: user-1 has full access to all models - permit: principal: user-2 actions: [read] resource: model-1 comment: user-2 has read access to model-1 only - permit: actions: [read] when: user_in: resource.namespaces comment: any user has read access to models with matching attributes vector_db: - forbid: actions: [create, read, delete] unless: user_in: role::admin comment: only user with admin role can use vector_db resources ``` --------- Signed-off-by: Gordon Sim <gsim@redhat.com> |
||
|
51e6f529f3
|
fix: index non-MCP toolgroups at registration time (#2272)
Two somewhat annoying fixes: - we are going to index tools for non-MCP toolgroups always (like we used to do). because there are just random assumptions in our tests, etc. and I don't want to fix them right now - we need to handle the funny case of toolgroups like `builtin::rag/knowledge_search` where we added the tool name to use in the toolgroup itself. |
||
|
ce33d02443
|
fix(tools): do not index tools, only index toolgroups (#2261)
When registering a MCP endpoint, we cannot list tools (like we used to) since the MCP endpoint may be behind an auth wall. Registration can happen much sooner (via run.yaml). Instead, we do listing only when the _user_ actually calls listing. Furthermore, we cache the list in-memory in the server. Currently, the cache is not invalidated -- we may want to periodically re-list for MCP servers. Note that they must call `list_tools` before calling `invoke_tool` -- we use this critically. This will enable us to list MCP servers in run.yaml ## Test Plan Existing tests, updated tests accordingly. |
||
|
298721c238
|
chore: split routing_tables into individual files (#2259) |