# What does this PR do?
Resolves https://github.com/meta-llama/llama-stack/issues/2735
Currently, if you test against OpenAI's Vector Stores API the
`client.vector_stores.search` call fails with an invalid vector_db
during routing (see the script referenced in the clickable item under
the Test Plan section).
This PR ensures that `client.vector_stores.search()` is compatible with
OpenAI's Vector Stores API.
Two biggest changes:
1. The `name`, which was previously used as the `vector_db_id`, has been
changed to be consistent with OpenAI's `vs_{uuid}` format.
2. The vector store ID has to be referenced by the ID, the name is not
reliable as every `client.vector_stores.create` results in a new vector
store.
NOTE: I believe this is a breaking change for end users as they'll need
to update their VectorDB identifiers.
## Test Plan
Unit tests:
```bash
./scripts/unit-tests.sh tests/unit/providers/vector_io/ -v
```
Integration tests:
```bash
ENABLE_MILVUS=milvus llama stack run /Users/farceo/dev/llama-stack/llama_stack/templates/starter/run.yaml --image-type venv
LLAMA_STACK_CONFIG=http://localhost:8321 pytest -sv tests/integration/vector_io/test_openai_vector_stores.py --embedding-model=all-MiniLM-L6-v2 -vv
```
Unit tests and test script below 👇
<details>
<summary>Click here for script used to test OpenAI and Llama Stack
Vector Store implementation</summary>
```python
import json
import argparse
from openai import OpenAI, pagination
import logging
from colorama import Fore, Style, init
import traceback
import os
# Initialize colorama for color support in terminal
init(autoreset=True)
# Setup basic logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
DEMO_VECTOR_STORE_NAME = "Support FAQ FJA"
global DEMO_VECTOR_STORE_ID
global DEMO_VECTOR_STORE_ID2
def colored_print(color, text):
"""Prints text to the console with the specified color."""
print(f"{color}{text}{Style.RESET_ALL}")
def log_and_print(color, message, level=logging.INFO):
"""Logs a message and prints it to the console with the specified color."""
logging.log(level, message)
colored_print(color, message)
def run_tests(client, prefix="openai"):
"""
Runs all tests using the provided OpenAI client and saves the output
to JSON files with the given prefix.
"""
# Create the directory if it doesn't exist
os.makedirs('openai_testing', exist_ok=True)
# Default values in case tests fail
global DEMO_VECTOR_STORE_ID, DEMO_VECTOR_STORE_ID2
DEMO_VECTOR_STORE_ID = None
DEMO_VECTOR_STORE_ID2 = None
def test_idempotent_vector_store_creation():
"""
Test that creating a vector store with the same name is idempotent.
"""
log_and_print(Fore.BLUE, "Starting vector store creation test...")
try:
vector_store = client.vector_stores.create(
name=DEMO_VECTOR_STORE_NAME,
)
# Attempt to create the same vector store again
vector_store2 = client.vector_stores.create(
name=DEMO_VECTOR_STORE_NAME,
)
# Check instead of assert
if vector_store2.id != vector_store.id:
log_and_print(Fore.YELLOW, f"FAILED IDEMPOTENCY: the same VectorStore name for {prefix.upper()} does not return the same ID",
level=logging.WARNING)
else:
log_and_print(Fore.GREEN, f"PASSED IDEMPOTENCY: f{vector_store2.id} == {vector_store.id} the same VectorStore name for {prefix.upper()} returns the same ID")
vector_store_data = vector_store.to_dict()
log_and_print(Fore.WHITE, f"vector_stores.create = {json.dumps(vector_store_data, indent=2)}")
with open(f'openai_testing/{prefix}_vector_store_create.json', 'w') as f:
json.dump(vector_store_data, f, indent=2)
global DEMO_VECTOR_STORE_ID, DEMO_VECTOR_STORE_ID2
DEMO_VECTOR_STORE_ID = vector_store.id
DEMO_VECTOR_STORE_ID2 = vector_store2.id
return DEMO_VECTOR_STORE_ID, DEMO_VECTOR_STORE_ID2
except Exception as e:
log_and_print(Fore.RED, f"Idempotent vector store creation test failed: {e}", level=logging.ERROR)
logging.error(traceback.format_exc())
# Create a fallback vector store ID if needed
if 'vector_store' in locals() and vector_store:
DEMO_VECTOR_STORE_ID = vector_store.id
return DEMO_VECTOR_STORE_ID, DEMO_VECTOR_STORE_ID2
def test_vector_store_list():
"""
Test listing vector stores.
"""
log_and_print(Fore.BLUE, "Starting vector store list test...")
try:
vector_stores = client.vector_stores.list()
# Check instead of assert
if not isinstance(vector_stores, pagination.SyncCursorPage):
log_and_print(Fore.YELLOW, f"FAILED: Expected a list of vector stores, got {type(vector_stores)}",
level=logging.WARNING)
else:
log_and_print(Fore.GREEN, "Vector store list test passed!")
vector_stores_data = vector_stores.to_dict()
log_and_print(Fore.WHITE, f"vector_stores.list = {json.dumps(vector_stores_data, indent=2)}")
with open(f'openai_testing/{prefix}_vector_store_list.json', 'w') as f:
json.dump(vector_stores_data, f, indent=2)
except Exception as e:
log_and_print(Fore.RED, f"Vector store list test failed: {e}", level=logging.ERROR)
logging.error(traceback.format_exc())
def test_retrieve_vector_store():
"""
Test retrieving a specific vector store.
"""
log_and_print(Fore.BLUE, "Starting retrieve vector store test...")
if not DEMO_VECTOR_STORE_ID:
log_and_print(Fore.YELLOW, "Skipping retrieve vector store test - no vector store ID available",
level=logging.WARNING)
return
try:
vector_store = client.vector_stores.retrieve(
vector_store_id=DEMO_VECTOR_STORE_ID,
)
# Check instead of assert
if vector_store.id != DEMO_VECTOR_STORE_ID:
log_and_print(Fore.YELLOW, "FAILED: Retrieved vector store ID does not match", level=logging.WARNING)
else:
log_and_print(Fore.GREEN, "Retrieve vector store test passed!")
vector_store_data = vector_store.to_dict()
log_and_print(Fore.WHITE, f"vector_stores.retrieve = {json.dumps(vector_store_data, indent=2)}")
with open(f'openai_testing/{prefix}_vector_store_retrieve.json', 'w') as f:
json.dump(vector_store_data, f, indent=2)
except Exception as e:
log_and_print(Fore.RED, f"Retrieve vector store test failed: {e}", level=logging.ERROR)
logging.error(traceback.format_exc())
def test_modify_vector_store():
"""
Test modifying a vector store.
"""
log_and_print(Fore.BLUE, "Starting modify vector store test...")
if not DEMO_VECTOR_STORE_ID:
log_and_print(Fore.YELLOW, "Skipping modify vector store test - no vector store ID available",
level=logging.WARNING)
return
try:
updated_vector_store = client.vector_stores.update(
vector_store_id=DEMO_VECTOR_STORE_ID,
name="Updated Support FAQ FJA",
)
# Check instead of assert
if updated_vector_store.name != "Updated Support FAQ FJA":
log_and_print(Fore.YELLOW, "FAILED: Vector store name was not updated correctly", level=logging.WARNING)
else:
log_and_print(Fore.GREEN, "Modify vector store test passed!")
updated_vector_store_data = updated_vector_store.to_dict()
log_and_print(Fore.WHITE, f"vector_stores.modify = {json.dumps(updated_vector_store_data, indent=2)}")
with open(f'openai_testing/{prefix}_vector_store_modify.json', 'w') as f:
json.dump(updated_vector_store_data, f, indent=2)
except Exception as e:
log_and_print(Fore.RED, f"Modify vector store test failed: {e}", level=logging.ERROR)
logging.error(traceback.format_exc())
def test_delete_vector_store():
"""
Test deleting a vector store.
"""
log_and_print(Fore.BLUE, "Starting delete vector store test...")
if not DEMO_VECTOR_STORE_ID2:
log_and_print(Fore.YELLOW, "Skipping delete vector store test - no second vector store ID available",
level=logging.WARNING)
return
try:
response = client.vector_stores.delete(
vector_store_id=DEMO_VECTOR_STORE_ID2,
)
log_and_print(Fore.GREEN, "Delete vector store test passed!")
response_data = response.to_dict()
log_and_print(Fore.WHITE, f"Vector store delete response = {json.dumps(response_data, indent=2)}")
with open(f'openai_testing/{prefix}_vector_store_delete.json', 'w') as f:
json.dump(response_data, f, indent=2)
except Exception as e:
log_and_print(Fore.RED, f"Delete vector store test failed: {e}", level=logging.ERROR)
logging.error(traceback.format_exc())
def test_create_vector_store_file():
log_and_print(Fore.BLUE, "Starting create vector store file test...")
if not DEMO_VECTOR_STORE_ID:
log_and_print(Fore.YELLOW, "Skipping create vector store file test - no vector store ID available",
level=logging.WARNING)
return
try:
# create jsonl of files as an example
with open("mydata.jsonl", "w") as f:
f.write('{"text": "What is the return policy?", "metadata": {"category": "support"}}\n')
f.write('{"text": "How do I reset my password?", "metadata": {"category": "support"}}\n')
f.write('{"text": "Where can I find my order history?", "metadata": {"category": "support"}}\n')
f.write('{"text": "What are the shipping options?", "metadata": {"category": "support"}}\n')
f.write('{"text": "What is your favorite banana?", "metadata": {"category": "support"}}\n')
# Create a simple text file if my_data_small.txt doesn't exist
if not os.path.exists("my_data_small.txt"):
with open("my_data_small.txt", "w") as f:
f.write("This is a test file for vector store testing.\n")
created_file = client.files.create(
file=open("my_data_small.txt", "rb"),
purpose="assistants",
)
created_file_data = created_file.to_dict()
log_and_print(Fore.WHITE, f"Created file {json.dumps(created_file_data, indent=2)}")
with open(f'openai_testing/{prefix}_file_create.json', 'w') as f:
json.dump(created_file_data, f, indent=2)
retrieved_files = client.files.retrieve(created_file.id)
retrieved_files_data = retrieved_files.to_dict()
log_and_print(Fore.WHITE, f"Retrieved file {json.dumps(retrieved_files_data, indent=2)}")
with open(f'openai_testing/{prefix}_file_retrieve.json', 'w') as f:
json.dump(retrieved_files_data, f, indent=2)
vector_store_file = client.vector_stores.files.create(
vector_store_id=DEMO_VECTOR_STORE_ID,
file_id=created_file.id,
)
log_and_print(Fore.GREEN, "Create vector store file test passed!")
except Exception as e:
log_and_print(Fore.RED, f"Create vector store file test failed: {e}", level=logging.ERROR)
logging.error(traceback.format_exc())
def test_search_vector_store():
"""
Test searching a vector store.
"""
log_and_print(Fore.BLUE, "Starting search vector store test...")
if not DEMO_VECTOR_STORE_ID:
log_and_print(Fore.YELLOW, "Skipping search vector store test - no vector store ID available",
level=logging.WARNING)
return
try:
query = "What is the banana policy?"
search_results = client.vector_stores.search(
vector_store_id=DEMO_VECTOR_STORE_ID,
query=query,
max_num_results=10,
ranking_options={
'ranker': 'default-2024-11-15',
'score_threshold': 0.0,
},
rewrite_query=False,
)
# Check instead of assert
if not isinstance(search_results, pagination.SyncPage):
log_and_print(Fore.YELLOW, f"FAILED: Expected a list of search results, got {type(search_results)}",
level=logging.WARNING)
else:
log_and_print(Fore.GREEN, "Search vector store test passed!")
search_results_dict = search_results.to_dict()
log_and_print(Fore.WHITE, f"Search results = {search_results_dict}")
with open(f'openai_testing/{prefix}_vector_store_search.json', 'w') as f:
json.dump(search_results_dict, f, indent=2)
log_and_print(Fore.WHITE, f"vector_stores.search = {search_results.to_json()}")
except Exception as e:
log_and_print(Fore.RED, f"Search vector store test failed: {e}", level=logging.ERROR)
logging.error(traceback.format_exc())
# Run all tests in sequence, even if some fail
test_results = []
try:
result = test_idempotent_vector_store_creation()
if result and len(result) == 2:
DEMO_VECTOR_STORE_ID, DEMO_VECTOR_STORE_ID2 = result
test_results.append(True)
except Exception as e:
log_and_print(Fore.RED, f"Vector store creation test failed: {e}", level=logging.ERROR)
logging.error(traceback.format_exc())
test_results.append(False)
for test_func in [
test_vector_store_list,
test_retrieve_vector_store,
test_modify_vector_store,
test_delete_vector_store,
test_create_vector_store_file,
test_search_vector_store
]:
try:
test_func()
test_results.append(True)
except Exception as e:
log_and_print(Fore.RED, f"{test_func.__name__} failed: {e}", level=logging.ERROR)
logging.error(traceback.format_exc())
test_results.append(False)
if all(test_results):
log_and_print(Fore.GREEN, f"All {prefix} tests completed successfully!")
else:
failed_count = test_results.count(False)
log_and_print(Fore.YELLOW, f"{failed_count} {prefix} test(s) failed, but script completed.")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run OpenAI and/or LlamaStack tests.")
parser.add_argument(
"--provider",
type=str,
default="llama",
choices=["openai", "llama", "both"],
help="Specify which environment to test: openai, llama, or both. Default is both.",
)
args = parser.parse_args()
try:
if args.provider in ("openai", "both"):
openai_client = OpenAI()
run_tests(openai_client, prefix="openai")
if args.provider in ("llama", "both"):
llama_client = OpenAI(base_url="http://localhost:8321/v1/openai/v1", api_key="none")
run_tests(llama_client, prefix="llama")
log_and_print(Fore.GREEN, "All tests completed!")
except Exception as e:
log_and_print(Fore.RED, f"Tests failed to complete: {e}", level=logging.ERROR)
logging.error(traceback.format_exc())
```
</details>
---------
Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
# What does this PR do?
* Given that our API packages use "import *" in `__init.py__` we don't
need to do `from llama_stack.apis.models.models` but simply from
llama_stack.apis.models. The decision to use `import *` is debatable and
should probably be revisited at one point.
* Remove unneeded Ruff F401 rule
* Consolidate Ruff F403 rule in the pyprojectfrom
llama_stack.apis.models.models
Signed-off-by: Sébastien Han <seb@redhat.com>
# What does this PR do?
We added:
* make sure docstrings are present with 'params' and 'returns'
* fail if someone sets 'returns: None'
* fix the failing APIs
Signed-off-by: Sébastien Han <seb@redhat.com>
# What does this PR do?
Mainly tried to cover the entire llama_stack/apis directory, we only
have one left. Some excludes were just noop.
Signed-off-by: Sébastien Han <seb@redhat.com>
# What does this PR do?
The goal of this PR is code base modernization.
Schema reflection code needed a minor adjustment to handle UnionTypes
and collections.abc.AsyncIterator. (Both are preferred for latest Python
releases.)
Note to reviewers: almost all changes here are automatically generated
by pyupgrade. Some additional unused imports were cleaned up. The only
change worth of note can be found under `docs/openapi_generator` and
`llama_stack/strong_typing/schema.py` where reflection code was updated
to deal with "newer" types.
Signed-off-by: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>
# What does this PR do?
- Removed Optional return types for GET methods
- Raised ValueError when requested resource is not found
- Ensures proper 4xx response for missing resources
- Updated the API generator to check for wrong signatures
```
$ uv run --with ".[dev]" ./docs/openapi_generator/run_openapi_generator.sh
Validating API method return types...
API Method Return Type Validation Errors:
Method ScoringFunctions.get_scoring_function returns Optional type
```
Closes: https://github.com/meta-llama/llama-stack/issues/1630
## Test Plan
Run the server then:
```
curl http://127.0.0.1:8321/v1/models/foo
{"detail":"Invalid value: Model 'foo' not found"}%
```
Server log:
```
INFO: 127.0.0.1:52307 - "GET /v1/models/foo HTTP/1.1" 400 Bad Request
09:51:42.654 [END] /v1/models/foo [StatusCode.OK] (134.65ms)
09:51:42.651 [ERROR] Error executing endpoint route='/v1/models/{model_id:path}' method='get'
Traceback (most recent call last):
File "/Users/leseb/Documents/AI/llama-stack/llama_stack/distribution/server/server.py", line 193, in endpoint
return await maybe_await(value)
File "/Users/leseb/Documents/AI/llama-stack/llama_stack/distribution/server/server.py", line 156, in maybe_await
return await value
File "/Users/leseb/Documents/AI/llama-stack/llama_stack/providers/utils/telemetry/trace_protocol.py", line 102, in async_wrapper
result = await method(self, *args, **kwargs)
File "/Users/leseb/Documents/AI/llama-stack/llama_stack/distribution/routers/routing_tables.py", line 217, in get_model
raise ValueError(f"Model '{model_id}' not found")
ValueError: Model 'foo' not found
```
Signed-off-by: Sébastien Han <seb@redhat.com>
llama-models should have extremely minimal cruft. Its sole purpose
should be didactic -- show the simplest implementation of the llama
models and document the prompt formats, etc.
This PR is the complement to
https://github.com/meta-llama/llama-models/pull/279
## Test Plan
Ensure all `llama` CLI `model` sub-commands work:
```bash
llama model list
llama model download --model-id ...
llama model prompt-format -m ...
```
Ran tests:
```bash
cd tests/client-sdk
LLAMA_STACK_CONFIG=fireworks pytest -s -v inference/
LLAMA_STACK_CONFIG=fireworks pytest -s -v vector_io/
LLAMA_STACK_CONFIG=fireworks pytest -s -v agents/
```
Create a fresh venv `uv venv && source .venv/bin/activate` and run
`llama stack build --template fireworks --image-type venv` followed by
`llama stack run together --image-type venv` <-- the server runs
Also checked that the OpenAPI generator can run and there is no change
in the generated files as a result.
```bash
cd docs/openapi_generator
sh run_openapi_generator.sh
```
See https://github.com/meta-llama/llama-stack/issues/827 for the broader
design.
This is the first part:
- delete other kinds of memory banks (keyvalue, keyword, graph) for now;
we will introduce a keyvalue store API as part of this design but not
use it in the RAG tool yet.
- renaming of the APIs