mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-23 04:59:40 +00:00
Merge branch 'main' into kvstore-refactor
This commit is contained in:
commit
a2cffb7d0e
10 changed files with 231 additions and 11 deletions
|
|
@ -145,6 +145,10 @@ $ llama stack build --template starter
|
|||
...
|
||||
You can now edit ~/.llama/distributions/llamastack-starter/starter-run.yaml and run `llama stack run ~/.llama/distributions/llamastack-starter/starter-run.yaml`
|
||||
```
|
||||
|
||||
```{tip}
|
||||
The generated `run.yaml` file is a starting point for your configuration. For comprehensive guidance on customizing it for your specific needs, infrastructure, and deployment scenarios, see [Customizing Your run.yaml Configuration](customizing_run_yaml.md).
|
||||
```
|
||||
:::
|
||||
:::{tab-item} Building from Scratch
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,10 @@
|
|||
|
||||
The Llama Stack runtime configuration is specified as a YAML file. Here is a simplified version of an example configuration file for the Ollama distribution:
|
||||
|
||||
```{note}
|
||||
The default `run.yaml` files generated by templates are starting points for your configuration. For guidance on customizing these files for your specific needs, see [Customizing Your run.yaml Configuration](customizing_run_yaml.md).
|
||||
```
|
||||
|
||||
```{dropdown} 👋 Click here for a Sample Configuration File
|
||||
|
||||
```yaml
|
||||
|
|
|
|||
40
docs/source/distributions/customizing_run_yaml.md
Normal file
40
docs/source/distributions/customizing_run_yaml.md
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
# Customizing run.yaml Files
|
||||
|
||||
The `run.yaml` files generated by Llama Stack templates are **starting points** designed to be customized for your specific needs. They are not meant to be used as-is in production environments.
|
||||
|
||||
## Key Points
|
||||
|
||||
- **Templates are starting points**: Generated `run.yaml` files contain defaults for development/testing
|
||||
- **Customization expected**: Update URLs, credentials, models, and settings for your environment
|
||||
- **Version control separately**: Keep customized configs in your own repository
|
||||
- **Environment-specific**: Create different configurations for dev, staging, production
|
||||
|
||||
## What You Can Customize
|
||||
|
||||
You can customize:
|
||||
- **Provider endpoints**: Change `http://localhost:8000` to your actual servers
|
||||
- **Swap providers**: Replace default providers (e.g., swap Tavily with Brave for search)
|
||||
- **Storage paths**: Move from `/tmp/` to production directories
|
||||
- **Authentication**: Add API keys, SSL, timeouts
|
||||
- **Models**: Different model sizes for dev vs prod
|
||||
- **Database settings**: Switch from SQLite to PostgreSQL
|
||||
- **Tool configurations**: Add custom tools and integrations
|
||||
|
||||
## Best Practices
|
||||
|
||||
- Use environment variables for secrets and environment-specific values
|
||||
- Create separate `run.yaml` files for different environments (dev, staging, prod)
|
||||
- Document your changes with comments
|
||||
- Test configurations before deployment
|
||||
- Keep your customized configs in version control
|
||||
|
||||
Example structure:
|
||||
```
|
||||
your-project/
|
||||
├── configs/
|
||||
│ ├── dev-run.yaml
|
||||
│ ├── prod-run.yaml
|
||||
└── README.md
|
||||
```
|
||||
|
||||
The goal is to take the generated template and adapt it to your specific infrastructure and operational needs.
|
||||
|
|
@ -9,6 +9,7 @@ This section provides an overview of the distributions available in Llama Stack.
|
|||
|
||||
importing_as_library
|
||||
configuration
|
||||
customizing_run_yaml
|
||||
list_of_distributions
|
||||
kubernetes_deployment
|
||||
building_distro
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ Llama Stack is a server that exposes multiple APIs, you connect with it using th
|
|||
You can use Python to build and run the Llama Stack server, which is useful for testing and development.
|
||||
|
||||
Llama Stack uses a [YAML configuration file](../distributions/configuration.md) to specify the stack setup,
|
||||
which defines the providers and their settings.
|
||||
which defines the providers and their settings. The generated configuration serves as a starting point that you can [customize for your specific needs](../distributions/customizing_run_yaml.md).
|
||||
Now let's build and run the Llama Stack config for Ollama.
|
||||
We use `starter` as template. By default all providers are disabled, this requires enable ollama by passing environment variables.
|
||||
|
||||
|
|
|
|||
|
|
@ -264,7 +264,7 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
|
|||
async def _save_openai_vector_store_file(
|
||||
self, store_id: str, file_id: str, file_info: dict[str, Any], file_contents: list[dict[str, Any]]
|
||||
) -> None:
|
||||
"""Save vector store file metadata to kvstore."""
|
||||
"""Save vector store file data to kvstore."""
|
||||
assert self.kvstore is not None
|
||||
key = f"{OPENAI_VECTOR_STORES_FILES_PREFIX}{store_id}:{file_id}"
|
||||
await self.kvstore.set(key=key, value=json.dumps(file_info))
|
||||
|
|
@ -292,7 +292,16 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
|
|||
await self.kvstore.set(key=key, value=json.dumps(file_info))
|
||||
|
||||
async def _delete_openai_vector_store_file_from_storage(self, store_id: str, file_id: str) -> None:
|
||||
"""Delete vector store file metadata from kvstore."""
|
||||
"""Delete vector store data from kvstore."""
|
||||
assert self.kvstore is not None
|
||||
key = f"{OPENAI_VECTOR_STORES_FILES_PREFIX}{store_id}:{file_id}"
|
||||
await self.kvstore.delete(key)
|
||||
|
||||
keys_to_delete = [
|
||||
f"{OPENAI_VECTOR_STORES_FILES_PREFIX}{store_id}:{file_id}",
|
||||
f"{OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX}{store_id}:{file_id}",
|
||||
]
|
||||
for key in keys_to_delete:
|
||||
try:
|
||||
await self.kvstore.delete(key)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to delete key {key}: {e}")
|
||||
continue
|
||||
|
|
|
|||
|
|
@ -83,9 +83,37 @@ class ModelRegistryHelper(ModelsProtocolPrivate):
|
|||
def get_llama_model(self, provider_model_id: str) -> str | None:
|
||||
return self.provider_id_to_llama_model_map.get(provider_model_id, None)
|
||||
|
||||
async def check_model_availability(self, model: str) -> bool:
|
||||
"""
|
||||
Check if a specific model is available from the provider (non-static check).
|
||||
|
||||
This is for subclassing purposes, so providers can check if a specific
|
||||
model is currently available for use through dynamic means (e.g., API calls).
|
||||
|
||||
This method should NOT check statically configured model entries in
|
||||
`self.alias_to_provider_id_map` - that is handled separately in register_model.
|
||||
|
||||
Default implementation returns False (no dynamic models available).
|
||||
|
||||
:param model: The model identifier to check.
|
||||
:return: True if the model is available dynamically, False otherwise.
|
||||
"""
|
||||
return False
|
||||
|
||||
async def register_model(self, model: Model) -> Model:
|
||||
if not (supported_model_id := self.get_provider_model_id(model.provider_resource_id)):
|
||||
raise UnsupportedModelError(model.provider_resource_id, self.alias_to_provider_id_map.keys())
|
||||
# Check if model is supported in static configuration
|
||||
supported_model_id = self.get_provider_model_id(model.provider_resource_id)
|
||||
|
||||
# If not found in static config, check if it's available dynamically from provider
|
||||
if not supported_model_id:
|
||||
if await self.check_model_availability(model.provider_resource_id):
|
||||
supported_model_id = model.provider_resource_id
|
||||
else:
|
||||
# note: we cannot provide a complete list of supported models without
|
||||
# getting a complete list from the provider, so we return "..."
|
||||
all_supported_models = [*self.alias_to_provider_id_map.keys(), "..."]
|
||||
raise UnsupportedModelError(model.provider_resource_id, all_supported_models)
|
||||
|
||||
provider_resource_id = self.get_provider_model_id(model.model_id)
|
||||
if model.model_type == ModelType.embedding:
|
||||
# embedding models are always registered by their provider model id and does not need to be mapped to a llama model
|
||||
|
|
@ -114,6 +142,7 @@ class ModelRegistryHelper(ModelsProtocolPrivate):
|
|||
ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR[llama_model]
|
||||
)
|
||||
|
||||
# Register the model alias, ensuring it maps to the correct provider model id
|
||||
self.alias_to_provider_id_map[model.model_id] = supported_model_id
|
||||
|
||||
return model
|
||||
|
|
|
|||
|
|
@ -87,6 +87,37 @@ def helper(known_provider_model: ProviderModelEntry, known_provider_model2: Prov
|
|||
return ModelRegistryHelper([known_provider_model, known_provider_model2])
|
||||
|
||||
|
||||
class MockModelRegistryHelperWithDynamicModels(ModelRegistryHelper):
|
||||
"""Test helper that simulates a provider with dynamically available models."""
|
||||
|
||||
def __init__(self, model_entries: list[ProviderModelEntry], available_models: list[str]):
|
||||
super().__init__(model_entries)
|
||||
self._available_models = available_models
|
||||
|
||||
async def check_model_availability(self, model: str) -> bool:
|
||||
return model in self._available_models
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dynamic_model() -> Model:
|
||||
"""A model that's not in static config but available dynamically."""
|
||||
return Model(
|
||||
provider_id="provider",
|
||||
identifier="dynamic-model",
|
||||
provider_resource_id="dynamic-provider-id",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def helper_with_dynamic_models(
|
||||
known_provider_model: ProviderModelEntry, known_provider_model2: ProviderModelEntry, dynamic_model: Model
|
||||
) -> MockModelRegistryHelperWithDynamicModels:
|
||||
"""Helper that includes dynamically available models."""
|
||||
return MockModelRegistryHelperWithDynamicModels(
|
||||
[known_provider_model, known_provider_model2], [dynamic_model.provider_resource_id]
|
||||
)
|
||||
|
||||
|
||||
async def test_lookup_unknown_model(helper: ModelRegistryHelper, unknown_model: Model) -> None:
|
||||
assert helper.get_provider_model_id(unknown_model.model_id) is None
|
||||
|
||||
|
|
@ -151,3 +182,63 @@ async def test_unregister_model_during_init(helper: ModelRegistryHelper, known_m
|
|||
assert helper.get_provider_model_id(known_model.provider_resource_id) == known_model.provider_model_id
|
||||
await helper.unregister_model(known_model.provider_resource_id)
|
||||
assert helper.get_provider_model_id(known_model.provider_resource_id) is None
|
||||
|
||||
|
||||
async def test_register_model_from_check_model_availability(
|
||||
helper_with_dynamic_models: MockModelRegistryHelperWithDynamicModels, dynamic_model: Model
|
||||
) -> None:
|
||||
"""Test that models returned by check_model_availability can be registered."""
|
||||
# Verify the model is not in static config
|
||||
assert helper_with_dynamic_models.get_provider_model_id(dynamic_model.provider_resource_id) is None
|
||||
|
||||
# But it should be available via check_model_availability
|
||||
is_available = await helper_with_dynamic_models.check_model_availability(dynamic_model.provider_resource_id)
|
||||
assert is_available
|
||||
|
||||
# Registration should succeed
|
||||
registered_model = await helper_with_dynamic_models.register_model(dynamic_model)
|
||||
assert registered_model == dynamic_model
|
||||
|
||||
# Model should now be registered and accessible
|
||||
assert (
|
||||
helper_with_dynamic_models.get_provider_model_id(dynamic_model.model_id) == dynamic_model.provider_resource_id
|
||||
)
|
||||
|
||||
|
||||
async def test_register_model_not_in_static_or_dynamic(
|
||||
helper_with_dynamic_models: MockModelRegistryHelperWithDynamicModels, unknown_model: Model
|
||||
) -> None:
|
||||
"""Test that models not in static config or dynamic models are rejected."""
|
||||
# Verify the model is not in static config
|
||||
assert helper_with_dynamic_models.get_provider_model_id(unknown_model.provider_resource_id) is None
|
||||
|
||||
# And not available via check_model_availability
|
||||
is_available = await helper_with_dynamic_models.check_model_availability(unknown_model.provider_resource_id)
|
||||
assert not is_available
|
||||
|
||||
# Registration should fail with comprehensive error message
|
||||
with pytest.raises(Exception) as exc_info: # UnsupportedModelError
|
||||
await helper_with_dynamic_models.register_model(unknown_model)
|
||||
|
||||
# Error should include static models and "..." for dynamic models
|
||||
error_str = str(exc_info.value)
|
||||
assert "..." in error_str # "..." should be in error message
|
||||
|
||||
|
||||
async def test_register_alias_for_dynamic_model(
|
||||
helper_with_dynamic_models: MockModelRegistryHelperWithDynamicModels, dynamic_model: Model
|
||||
) -> None:
|
||||
"""Test that we can register an alias that maps to a dynamically available model."""
|
||||
# Create a model with a different identifier but same provider_resource_id
|
||||
alias_model = Model(
|
||||
provider_id=dynamic_model.provider_id,
|
||||
identifier="dynamic-model-alias",
|
||||
provider_resource_id=dynamic_model.provider_resource_id,
|
||||
)
|
||||
|
||||
# Registration should succeed since the provider_resource_id is available dynamically
|
||||
registered_model = await helper_with_dynamic_models.register_model(alias_model)
|
||||
assert registered_model == alias_model
|
||||
|
||||
# Both the original provider_resource_id and the new alias should work
|
||||
assert helper_with_dynamic_models.get_provider_model_id(alias_model.model_id) == dynamic_model.provider_resource_id
|
||||
|
|
|
|||
|
|
@ -12,6 +12,8 @@ from pymilvus import MilvusClient, connections
|
|||
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_io import Chunk, ChunkMetadata
|
||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
||||
from llama_stack.providers.inline.vector_io.faiss.faiss import FaissIndex, FaissVectorIOAdapter
|
||||
from llama_stack.providers.inline.vector_io.milvus.config import MilvusVectorIOConfig, SqliteKVStoreConfig
|
||||
from llama_stack.providers.inline.vector_io.sqlite_vec import SQLiteVectorIOConfig
|
||||
from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import SQLiteVecIndex, SQLiteVecVectorIOAdapter
|
||||
|
|
@ -90,7 +92,7 @@ def sample_embeddings_with_metadata(sample_chunks_with_metadata):
|
|||
return np.array([np.random.rand(EMBEDDING_DIMENSION).astype(np.float32) for _ in sample_chunks_with_metadata])
|
||||
|
||||
|
||||
@pytest.fixture(params=["milvus", "sqlite_vec"])
|
||||
@pytest.fixture(params=["milvus", "sqlite_vec", "faiss"])
|
||||
def vector_provider(request):
|
||||
return request.param
|
||||
|
||||
|
|
@ -116,7 +118,7 @@ async def unique_kvstore_config(tmp_path_factory):
|
|||
|
||||
@pytest.fixture(scope="session")
|
||||
def sqlite_vec_db_path(tmp_path_factory):
|
||||
db_path = str(tmp_path_factory.getbasetemp() / "test.db")
|
||||
db_path = str(tmp_path_factory.getbasetemp() / "test_sqlite_vec.db")
|
||||
return db_path
|
||||
|
||||
|
||||
|
|
@ -198,11 +200,49 @@ async def milvus_vec_adapter(milvus_vec_db_path, mock_inference_api):
|
|||
await adapter.shutdown()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def faiss_vec_db_path(tmp_path_factory):
|
||||
db_path = str(tmp_path_factory.getbasetemp() / "test_faiss.db")
|
||||
return db_path
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def faiss_vec_index(embedding_dimension):
|
||||
index = FaissIndex(embedding_dimension)
|
||||
yield index
|
||||
await index.delete()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def faiss_vec_adapter(unique_kvstore_config, mock_inference_api, embedding_dimension):
|
||||
config = FaissVectorIOConfig(
|
||||
kvstore=unique_kvstore_config,
|
||||
)
|
||||
adapter = FaissVectorIOAdapter(
|
||||
config=config,
|
||||
inference_api=mock_inference_api,
|
||||
files_api=None,
|
||||
)
|
||||
await adapter.initialize()
|
||||
await adapter.register_vector_db(
|
||||
VectorDB(
|
||||
identifier=f"faiss_test_collection_{np.random.randint(1e6)}",
|
||||
provider_id="test_provider",
|
||||
embedding_model="test_model",
|
||||
embedding_dimension=embedding_dimension,
|
||||
)
|
||||
)
|
||||
yield adapter
|
||||
await adapter.shutdown()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def vector_io_adapter(vector_provider, request):
|
||||
"""Returns the appropriate vector IO adapter based on the provider parameter."""
|
||||
if vector_provider == "milvus":
|
||||
return request.getfixturevalue("milvus_vec_adapter")
|
||||
elif vector_provider == "faiss":
|
||||
return request.getfixturevalue("faiss_vec_adapter")
|
||||
else:
|
||||
return request.getfixturevalue("sqlite_vec_adapter")
|
||||
|
||||
|
|
|
|||
|
|
@ -94,7 +94,7 @@ async def test_query_unregistered_raises(vector_io_adapter):
|
|||
|
||||
async def test_insert_chunks_calls_underlying_index(vector_io_adapter):
|
||||
fake_index = AsyncMock()
|
||||
vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=fake_index)
|
||||
vector_io_adapter.cache["db1"] = fake_index
|
||||
|
||||
chunks = ["chunk1", "chunk2"]
|
||||
await vector_io_adapter.insert_chunks("db1", chunks)
|
||||
|
|
@ -112,7 +112,7 @@ async def test_insert_chunks_missing_db_raises(vector_io_adapter):
|
|||
async def test_query_chunks_calls_underlying_index_and_returns(vector_io_adapter):
|
||||
expected = QueryChunksResponse(chunks=[Chunk(content="c1")], scores=[0.1])
|
||||
fake_index = AsyncMock(query_chunks=AsyncMock(return_value=expected))
|
||||
vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=fake_index)
|
||||
vector_io_adapter.cache["db1"] = fake_index
|
||||
|
||||
response = await vector_io_adapter.query_chunks("db1", "my_query", {"param": 1})
|
||||
|
||||
|
|
@ -286,5 +286,7 @@ async def test_delete_openai_vector_store_file_from_storage(vector_io_adapter, t
|
|||
await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, file_contents)
|
||||
await vector_io_adapter._delete_openai_vector_store_file_from_storage(store_id, file_id)
|
||||
|
||||
loaded_file_info = await vector_io_adapter._load_openai_vector_store_file(store_id, file_id)
|
||||
assert loaded_file_info == {}
|
||||
loaded_contents = await vector_io_adapter._load_openai_vector_store_file_contents(store_id, file_id)
|
||||
assert loaded_contents == []
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue