diff --git a/README.md b/README.md
index 03aa3dd50..8db4580a2 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,8 @@
 # Llama Stack
 
+<a href="https://trendshift.io/repositories/11824" target="_blank"><img src="https://trendshift.io/api/badge/repositories/11824" alt="meta-llama%2Fllama-stack | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+
+-----
 [![PyPI version](https://img.shields.io/pypi/v/llama_stack.svg)](https://pypi.org/project/llama_stack/)
 [![PyPI - Downloads](https://img.shields.io/pypi/dm/llama-stack)](https://pypi.org/project/llama-stack/)
 [![License](https://img.shields.io/pypi/l/llama_stack.svg)](https://github.com/meta-llama/llama-stack/blob/main/LICENSE)
@@ -9,6 +12,7 @@
 
 [**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
 
+
 ### ✨🎉 Llama 4 Support  🎉✨
 We released [Version 0.2.0](https://github.com/meta-llama/llama-stack/releases/tag/v0.2.0) with support for the Llama 4 herd of models released by Meta.
 
@@ -179,3 +183,17 @@ Please checkout our [Documentation](https://llama-stack.readthedocs.io/en/latest
 Check out our client SDKs for connecting to a Llama Stack server in your preferred language, you can choose from [python](https://github.com/meta-llama/llama-stack-client-python), [typescript](https://github.com/meta-llama/llama-stack-client-typescript), [swift](https://github.com/meta-llama/llama-stack-client-swift), and [kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) programming languages to quickly build your applications.
 
 You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repo.
+
+
+## 🌟 GitHub Star History
+## Star History
+
+[![Star History Chart](https://api.star-history.com/svg?repos=meta-llama/llama-stack&type=Date)](https://www.star-history.com/#meta-llama/llama-stack&Date)
+
+## ✨ Contributors
+
+Thanks to all of our amazing contributors!
+
+<a href="https://github.com/meta-llama/llama-stack/graphs/contributors">
+  <img src="https://contrib.rocks/image?repo=meta-llama/llama-stack" />
+</a>
\ No newline at end of file
diff --git a/docs/_static/js/keyboard_shortcuts.js b/docs/_static/js/keyboard_shortcuts.js
new file mode 100644
index 000000000..81d0b7c65
--- /dev/null
+++ b/docs/_static/js/keyboard_shortcuts.js
@@ -0,0 +1,14 @@
+document.addEventListener('keydown', function(event) {
+  // command+K or ctrl+K
+  if ((event.metaKey || event.ctrlKey) && event.key === 'k') {
+    event.preventDefault();
+    document.querySelector('.search-input, .search-field, input[name="q"]').focus();
+  }
+
+  // forward slash
+  if (event.key === '/' &&
+      !event.target.matches('input, textarea, select')) {
+    event.preventDefault();
+    document.querySelector('.search-input, .search-field, input[name="q"]').focus();
+  }
+});
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 20f1abf00..3f84d1310 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -131,6 +131,7 @@ html_static_path = ["../_static"]
 def setup(app):
     app.add_css_file("css/my_theme.css")
     app.add_js_file("js/detect_theme.js")
+    app.add_js_file("js/keyboard_shortcuts.js")
 
     def dockerhub_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
         url = f"https://hub.docker.com/r/llamastack/{text}"
diff --git a/docs/source/contributing/index.md b/docs/source/contributing/index.md
index 1e067ea6c..79c3861ea 100644
--- a/docs/source/contributing/index.md
+++ b/docs/source/contributing/index.md
@@ -2,14 +2,28 @@
 ```{include} ../../../CONTRIBUTING.md
 ```
 
-See the [Adding a New API Provider](new_api_provider.md) which describes how to add new API providers to the Stack.
+## Testing
 
+See the [Test Page](testing.md) which describes how to test your changes.
+```{toctree}
+:maxdepth: 1
+:hidden:
+:caption: Testing
 
+testing
+```
 
+## Adding a New Provider
+
+See the [Adding a New API Provider Page](new_api_provider.md) which describes how to add new API providers to the Stack.
+
+See the [Vector Database Page](new_vector_database.md) which describes how to add a new vector databases with Llama Stack.
+
+See the [External Provider Page](../providers/external/index.md) which describes how to add external providers to the Stack.
 ```{toctree}
 :maxdepth: 1
 :hidden:
 
 new_api_provider
-testing
+new_vector_database
 ```
diff --git a/docs/source/contributing/new_vector_database.md b/docs/source/contributing/new_vector_database.md
new file mode 100644
index 000000000..83c0f55bc
--- /dev/null
+++ b/docs/source/contributing/new_vector_database.md
@@ -0,0 +1,75 @@
+# Adding a New Vector Database
+
+This guide will walk you through the process of adding a new vector database to Llama Stack.
+
+> **_NOTE:_** Here's an example Pull Request of the [Milvus Vector Database Provider](https://github.com/meta-llama/llama-stack/pull/1467).
+
+Vector Database providers are used to store and retrieve vector embeddings. Vector databases are not limited to vector
+search but can support keyword and hybrid search. Additionally, vector database can also support operations like
+filtering, sorting, and aggregating vectors.
+
+## Steps to Add a New Vector Database Provider
+1. **Choose the Database Type**: Determine if your vector database is a remote service, inline, or both.
+   - Remote databases make requests to external services, while inline databases execute locally. Some providers support both.
+2. **Implement the Provider**: Create a new provider class that inherits from `VectorDatabaseProvider` and implements the required methods.
+   - Implement methods for vector storage, retrieval, search, and any additional features your database supports.
+     - You will need to implement the following methods for `YourVectorIndex`:
+        - `YourVectorIndex.create()`
+        - `YourVectorIndex.initialize()`
+        - `YourVectorIndex.add_chunks()`
+        - `YourVectorIndex.delete_chunk()`
+        - `YourVectorIndex.query_vector()`
+        - `YourVectorIndex.query_keyword()`
+        - `YourVectorIndex.query_hybrid()`
+     - You will need to implement the following methods for `YourVectorIOAdapter`:
+        - `YourVectorIOAdapter.initialize()`
+        - `YourVectorIOAdapter.shutdown()`
+        - `YourVectorIOAdapter.list_vector_dbs()`
+        - `YourVectorIOAdapter.register_vector_db()`
+        - `YourVectorIOAdapter.unregister_vector_db()`
+        - `YourVectorIOAdapter.insert_chunks()`
+        - `YourVectorIOAdapter.query_chunks()`
+        - `YourVectorIOAdapter.delete_chunks()`
+3. **Add to Registry**: Register your provider in the appropriate registry file.
+   - Update {repopath}`llama_stack/providers/registry/vector_io.py` to include your new provider.
+```python
+from llama_stack.providers.registry.specs import InlineProviderSpec
+from llama_stack.providers.registry.api import Api
+
+InlineProviderSpec(
+    api=Api.vector_io,
+    provider_type="inline::milvus",
+    pip_packages=["pymilvus>=2.4.10"],
+    module="llama_stack.providers.inline.vector_io.milvus",
+    config_class="llama_stack.providers.inline.vector_io.milvus.MilvusVectorIOConfig",
+    api_dependencies=[Api.inference],
+    optional_api_dependencies=[Api.files],
+    description="",
+),
+```
+4. **Add Tests**: Create unit tests and integration tests for your provider in the `tests/` directory.
+   - Unit Tests
+     - By following the structure of the class methods, you will be able to easily run unit and integration tests for your database.
+       1. You have to configure the tests for your provide in `/tests/unit/providers/vector_io/conftest.py`.
+       2. Update the `vector_provider` fixture to include your provider if they are an inline provider.
+       3. Create a `your_vectorprovider_index` fixture that initializes your vector index.
+       4. Create a `your_vectorprovider_adapter` fixture that initializes your vector adapter.
+       5. Add your provider to the `vector_io_providers` fixture dictionary.
+         - Please follow the naming convention of `your_vectorprovider_index` and `your_vectorprovider_adapter` as the tests require this to execute properly.
+   - Integration Tests
+     - Integration tests are located in {repopath}`tests/integration`. These tests use the python client-SDK APIs (from the `llama_stack_client` package) to test functionality.
+     - The two set of integration tests are:
+       - `tests/integration/vector_io/test_vector_io.py`: This file tests registration, insertion, and retrieval.
+       - `tests/integration/vector_io/test_openai_vector_stores.py`: These tests are for OpenAI-compatible vector stores and test the OpenAI API compatibility.
+        - You will need to update `skip_if_provider_doesnt_support_openai_vector_stores` to include your provider as well as `skip_if_provider_doesnt_support_openai_vector_stores_search` to test the appropriate search functionality.
+     - Running the tests in the GitHub CI
+       - You will need to update the `.github/workflows/integration-vector-io-tests.yml` file to include your provider.
+        - If your provider is a remote provider, you will also have to add a container to spin up and run it in the action.
+   - Updating the pyproject.yml
+     - If you are adding tests for the `inline` provider you will have to update the `unit` group.
+       - `uv add new_pip_package --group unit`
+     - If you are adding tests for the `remote` provider you will have to update the `test` group, which is used in the GitHub CI for integration tests.
+       - `uv add new_pip_package --group test`
+5. **Update Documentation**: Please update the documentation for end users
+   - Generate the provider documentation by running {repopath}`./scripts/provider_codegen.py`.
+   - Update the autogenerated content in the registry/vector_io.py file with information about your provider. Please see other providers for examples.
\ No newline at end of file
diff --git a/docs/source/contributing/testing.md b/docs/source/contributing/testing.md
index 47bf9dea7..454ded266 100644
--- a/docs/source/contributing/testing.md
+++ b/docs/source/contributing/testing.md
@@ -1,6 +1,8 @@
-# Testing Llama Stack
+```{include} ../../../tests/README.md
+```
 
-Tests are of three different kinds:
-- Unit tests
-- Provider focused integration tests
-- Client SDK tests
+```{include} ../../../tests/unit/README.md
+```
+
+```{include} ../../../tests/integration/README.md
+```
diff --git a/docs/source/providers/inference/index.md b/docs/source/providers/inference/index.md
index 1c7bc86b9..38781e5eb 100644
--- a/docs/source/providers/inference/index.md
+++ b/docs/source/providers/inference/index.md
@@ -29,6 +29,7 @@ remote_runpod
 remote_sambanova
 remote_tgi
 remote_together
+remote_vertexai
 remote_vllm
 remote_watsonx
 ```
diff --git a/docs/source/providers/inference/remote_vertexai.md b/docs/source/providers/inference/remote_vertexai.md
new file mode 100644
index 000000000..962bbd76f
--- /dev/null
+++ b/docs/source/providers/inference/remote_vertexai.md
@@ -0,0 +1,40 @@
+# remote::vertexai
+
+## Description
+
+Google Vertex AI inference provider enables you to use Google's Gemini models through Google Cloud's Vertex AI platform, providing several advantages:
+
+• Enterprise-grade security: Uses Google Cloud's security controls and IAM
+• Better integration: Seamless integration with other Google Cloud services
+• Advanced features: Access to additional Vertex AI features like model tuning and monitoring
+• Authentication: Uses Google Cloud Application Default Credentials (ADC) instead of API keys
+
+Configuration:
+- Set VERTEX_AI_PROJECT environment variable (required)
+- Set VERTEX_AI_LOCATION environment variable (optional, defaults to us-central1)
+- Use Google Cloud Application Default Credentials or service account key
+
+Authentication Setup:
+Option 1 (Recommended): gcloud auth application-default login
+Option 2: Set GOOGLE_APPLICATION_CREDENTIALS to service account key path
+
+Available Models:
+- vertex_ai/gemini-2.0-flash
+- vertex_ai/gemini-2.5-flash
+- vertex_ai/gemini-2.5-pro
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `project` | `<class 'str'>` | No |  | Google Cloud project ID for Vertex AI |
+| `location` | `<class 'str'>` | No | us-central1 | Google Cloud location for Vertex AI |
+
+## Sample Configuration
+
+```yaml
+project: ${env.VERTEX_AI_PROJECT:=}
+location: ${env.VERTEX_AI_LOCATION:=us-central1}
+
+```
+
diff --git a/docs/source/providers/vector_io/inline_faiss.md b/docs/source/providers/vector_io/inline_faiss.md
index bcff66f3f..cfa18a839 100644
--- a/docs/source/providers/vector_io/inline_faiss.md
+++ b/docs/source/providers/vector_io/inline_faiss.md
@@ -12,6 +12,18 @@ That means you'll get fast and efficient vector retrieval.
 - Lightweight and easy to use
 - Fully integrated with Llama Stack
 - GPU support
+- **Vector search** - FAISS supports pure vector similarity search using embeddings
+
+## Search Modes
+
+**Supported:**
+- **Vector Search** (`mode="vector"`): Performs vector similarity search using embeddings
+
+**Not Supported:**
+- **Keyword Search** (`mode="keyword"`): Not supported by FAISS
+- **Hybrid Search** (`mode="hybrid"`): Not supported by FAISS
+
+> **Note**: FAISS is designed as a pure vector similarity search library. See the [FAISS GitHub repository](https://github.com/facebookresearch/faiss) for more details about FAISS's core functionality.
 
 ## Usage
 
diff --git a/docs/source/providers/vector_io/remote_milvus.md b/docs/source/providers/vector_io/remote_milvus.md
index 3646f4acc..2af64b8bb 100644
--- a/docs/source/providers/vector_io/remote_milvus.md
+++ b/docs/source/providers/vector_io/remote_milvus.md
@@ -11,6 +11,7 @@ That means you're not limited to storing vectors in memory or in a separate serv
 
 - Easy to use
 - Fully integrated with Llama Stack
+- Supports all search modes: vector, keyword, and hybrid search (both inline and remote configurations)
 
 ## Usage
 
@@ -101,6 +102,92 @@ vector_io:
 - **`client_pem_path`**: Path to the **client certificate** file (required for mTLS).
 - **`client_key_path`**: Path to the **client private key** file (required for mTLS).
 
+## Search Modes
+
+Milvus supports three different search modes for both inline and remote configurations:
+
+### Vector Search
+Vector search uses semantic similarity to find the most relevant chunks based on embedding vectors. This is the default search mode and works well for finding conceptually similar content.
+
+```python
+# Vector search example
+search_response = client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="What is machine learning?",
+    search_mode="vector",
+    max_num_results=5,
+)
+```
+
+### Keyword Search
+Keyword search uses traditional text-based matching to find chunks containing specific terms or phrases. This is useful when you need exact term matches.
+
+```python
+# Keyword search example
+search_response = client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="Python programming language",
+    search_mode="keyword",
+    max_num_results=5,
+)
+```
+
+### Hybrid Search
+Hybrid search combines both vector and keyword search methods to provide more comprehensive results. It leverages the strengths of both semantic similarity and exact term matching.
+
+#### Basic Hybrid Search
+```python
+# Basic hybrid search example (uses RRF ranker with default impact_factor=60.0)
+search_response = client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="neural networks in Python",
+    search_mode="hybrid",
+    max_num_results=5,
+)
+```
+
+**Note**: The default `impact_factor` value of 60.0 was empirically determined to be optimal in the original RRF research paper: ["Reciprocal Rank Fusion outperforms Condorcet and individual Rank Learning Methods"](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) (Cormack et al., 2009).
+
+#### Hybrid Search with RRF (Reciprocal Rank Fusion) Ranker
+RRF combines rankings from vector and keyword search by using reciprocal ranks. The impact factor controls how much weight is given to higher-ranked results.
+
+```python
+# Hybrid search with custom RRF parameters
+search_response = client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="neural networks in Python",
+    search_mode="hybrid",
+    max_num_results=5,
+    ranking_options={
+        "ranker": {
+            "type": "rrf",
+            "impact_factor": 100.0,  # Higher values give more weight to top-ranked results
+        }
+    },
+)
+```
+
+#### Hybrid Search with Weighted Ranker
+Weighted ranker linearly combines normalized scores from vector and keyword search. The alpha parameter controls the balance between the two search methods.
+
+```python
+# Hybrid search with weighted ranker
+search_response = client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="neural networks in Python",
+    search_mode="hybrid",
+    max_num_results=5,
+    ranking_options={
+        "ranker": {
+            "type": "weighted",
+            "alpha": 0.7,  # 70% vector search, 30% keyword search
+        }
+    },
+)
+```
+
+For detailed documentation on RRF and Weighted rankers, please refer to the [Milvus Reranking Guide](https://milvus.io/docs/reranking.md).
+
 ## Documentation
 See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.
 
diff --git a/llama_stack/core/routing_tables/toolgroups.py b/llama_stack/core/routing_tables/toolgroups.py
index e172af991..6910b3906 100644
--- a/llama_stack/core/routing_tables/toolgroups.py
+++ b/llama_stack/core/routing_tables/toolgroups.py
@@ -124,10 +124,7 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
         return toolgroup
 
     async def unregister_toolgroup(self, toolgroup_id: str) -> None:
-        tool_group = await self.get_tool_group(toolgroup_id)
-        if tool_group is None:
-            raise ToolGroupNotFoundError(toolgroup_id)
-        await self.unregister_object(tool_group)
+        await self.unregister_object(await self.get_tool_group(toolgroup_id))
 
     async def shutdown(self) -> None:
         pass
diff --git a/llama_stack/distributions/ci-tests/build.yaml b/llama_stack/distributions/ci-tests/build.yaml
index 2f9ae8682..e6e699b62 100644
--- a/llama_stack/distributions/ci-tests/build.yaml
+++ b/llama_stack/distributions/ci-tests/build.yaml
@@ -14,6 +14,7 @@ distribution_spec:
     - provider_type: remote::openai
     - provider_type: remote::anthropic
     - provider_type: remote::gemini
+    - provider_type: remote::vertexai
     - provider_type: remote::groq
     - provider_type: remote::sambanova
     - provider_type: inline::sentence-transformers
diff --git a/llama_stack/distributions/ci-tests/run.yaml b/llama_stack/distributions/ci-tests/run.yaml
index 188c66275..05e1b4576 100644
--- a/llama_stack/distributions/ci-tests/run.yaml
+++ b/llama_stack/distributions/ci-tests/run.yaml
@@ -65,6 +65,11 @@ providers:
     provider_type: remote::gemini
     config:
       api_key: ${env.GEMINI_API_KEY:=}
+  - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
+    provider_type: remote::vertexai
+    config:
+      project: ${env.VERTEX_AI_PROJECT:=}
+      location: ${env.VERTEX_AI_LOCATION:=us-central1}
   - provider_id: groq
     provider_type: remote::groq
     config:
diff --git a/llama_stack/distributions/starter/build.yaml b/llama_stack/distributions/starter/build.yaml
index f95a03a9e..1a4f81d49 100644
--- a/llama_stack/distributions/starter/build.yaml
+++ b/llama_stack/distributions/starter/build.yaml
@@ -14,6 +14,7 @@ distribution_spec:
     - provider_type: remote::openai
     - provider_type: remote::anthropic
     - provider_type: remote::gemini
+    - provider_type: remote::vertexai
     - provider_type: remote::groq
     - provider_type: remote::sambanova
     - provider_type: inline::sentence-transformers
diff --git a/llama_stack/distributions/starter/run.yaml b/llama_stack/distributions/starter/run.yaml
index 8bd737686..46bd12956 100644
--- a/llama_stack/distributions/starter/run.yaml
+++ b/llama_stack/distributions/starter/run.yaml
@@ -65,6 +65,11 @@ providers:
     provider_type: remote::gemini
     config:
       api_key: ${env.GEMINI_API_KEY:=}
+  - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
+    provider_type: remote::vertexai
+    config:
+      project: ${env.VERTEX_AI_PROJECT:=}
+      location: ${env.VERTEX_AI_LOCATION:=us-central1}
   - provider_id: groq
     provider_type: remote::groq
     config:
diff --git a/llama_stack/distributions/starter/starter.py b/llama_stack/distributions/starter/starter.py
index a970f2d1c..0270b68ad 100644
--- a/llama_stack/distributions/starter/starter.py
+++ b/llama_stack/distributions/starter/starter.py
@@ -56,6 +56,7 @@ ENABLED_INFERENCE_PROVIDERS = [
     "fireworks",
     "together",
     "gemini",
+    "vertexai",
     "groq",
     "sambanova",
     "anthropic",
@@ -71,6 +72,7 @@ INFERENCE_PROVIDER_IDS = {
     "tgi": "${env.TGI_URL:+tgi}",
     "cerebras": "${env.CEREBRAS_API_KEY:+cerebras}",
     "nvidia": "${env.NVIDIA_API_KEY:+nvidia}",
+    "vertexai": "${env.VERTEX_AI_PROJECT:+vertexai}",
 }
 
 
@@ -246,6 +248,14 @@ def get_distribution_template() -> DistributionTemplate:
                 "",
                 "Gemini API Key",
             ),
+            "VERTEX_AI_PROJECT": (
+                "",
+                "Google Cloud Project ID for Vertex AI",
+            ),
+            "VERTEX_AI_LOCATION": (
+                "us-central1",
+                "Google Cloud Location for Vertex AI",
+            ),
             "SAMBANOVA_API_KEY": (
                 "",
                 "SambaNova API Key",
diff --git a/llama_stack/log.py b/llama_stack/log.py
index ab53e08c0..0a2d63ef6 100644
--- a/llama_stack/log.py
+++ b/llama_stack/log.py
@@ -99,7 +99,8 @@ def parse_environment_config(env_config: str) -> dict[str, int]:
         Dict[str, int]: A dictionary mapping categories to their log levels.
     """
     category_levels = {}
-    for pair in env_config.split(";"):
+    delimiter = ","
+    for pair in env_config.split(delimiter):
         if not pair.strip():
             continue
 
diff --git a/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py b/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
index 796771ee1..e11ec5cf5 100644
--- a/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
+++ b/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
@@ -15,6 +15,7 @@ from llama_stack.apis.safety import (
     RunShieldResponse,
     Safety,
     SafetyViolation,
+    ShieldStore,
     ViolationLevel,
 )
 from llama_stack.apis.shields import Shield
@@ -32,6 +33,8 @@ PROMPT_GUARD_MODEL = "Prompt-Guard-86M"
 
 
 class PromptGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
+    shield_store: ShieldStore
+
     def __init__(self, config: PromptGuardConfig, _deps) -> None:
         self.config = config
 
@@ -53,7 +56,7 @@ class PromptGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
         self,
         shield_id: str,
         messages: list[Message],
-        params: dict[str, Any] = None,
+        params: dict[str, Any],
     ) -> RunShieldResponse:
         shield = await self.shield_store.get_shield(shield_id)
         if not shield:
@@ -117,8 +120,10 @@ class PromptGuardShield:
         elif self.config.guard_type == PromptGuardType.jailbreak.value and score_malicious > self.threshold:
             violation = SafetyViolation(
                 violation_level=ViolationLevel.ERROR,
-                violation_type=f"prompt_injection:malicious={score_malicious}",
-                violation_return_message="Sorry, I cannot do this.",
+                user_message="Sorry, I cannot do this.",
+                metadata={
+                    "violation_type": f"prompt_injection:malicious={score_malicious}",
+                },
             )
 
         return RunShieldResponse(violation=violation)
diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py
index 7a5373726..5a063592c 100644
--- a/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -174,7 +174,9 @@ class FaissIndex(EmbeddingIndex):
         k: int,
         score_threshold: float,
     ) -> QueryChunksResponse:
-        raise NotImplementedError("Keyword search is not supported in FAISS")
+        raise NotImplementedError(
+            "Keyword search is not supported - underlying DB FAISS does not support this search mode"
+        )
 
     async def query_hybrid(
         self,
@@ -185,7 +187,9 @@ class FaissIndex(EmbeddingIndex):
         reranker_type: str,
         reranker_params: dict[str, Any] | None = None,
     ) -> QueryChunksResponse:
-        raise NotImplementedError("Hybrid search is not supported in FAISS")
+        raise NotImplementedError(
+            "Hybrid search is not supported - underlying DB FAISS does not support this search mode"
+        )
 
 
 class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py
index a8bc96a77..1801cdcad 100644
--- a/llama_stack/providers/registry/inference.py
+++ b/llama_stack/providers/registry/inference.py
@@ -213,6 +213,36 @@ def available_providers() -> list[ProviderSpec]:
                 description="Google Gemini inference provider for accessing Gemini models and Google's AI services.",
             ),
         ),
+        remote_provider_spec(
+            api=Api.inference,
+            adapter=AdapterSpec(
+                adapter_type="vertexai",
+                pip_packages=["litellm", "google-cloud-aiplatform"],
+                module="llama_stack.providers.remote.inference.vertexai",
+                config_class="llama_stack.providers.remote.inference.vertexai.VertexAIConfig",
+                provider_data_validator="llama_stack.providers.remote.inference.vertexai.config.VertexAIProviderDataValidator",
+                description="""Google Vertex AI inference provider enables you to use Google's Gemini models through Google Cloud's Vertex AI platform, providing several advantages:
+
+• Enterprise-grade security: Uses Google Cloud's security controls and IAM
+• Better integration: Seamless integration with other Google Cloud services
+• Advanced features: Access to additional Vertex AI features like model tuning and monitoring
+• Authentication: Uses Google Cloud Application Default Credentials (ADC) instead of API keys
+
+Configuration:
+- Set VERTEX_AI_PROJECT environment variable (required)
+- Set VERTEX_AI_LOCATION environment variable (optional, defaults to us-central1)
+- Use Google Cloud Application Default Credentials or service account key
+
+Authentication Setup:
+Option 1 (Recommended): gcloud auth application-default login
+Option 2: Set GOOGLE_APPLICATION_CREDENTIALS to service account key path
+
+Available Models:
+- vertex_ai/gemini-2.0-flash
+- vertex_ai/gemini-2.5-flash
+- vertex_ai/gemini-2.5-pro""",
+            ),
+        ),
         remote_provider_spec(
             api=Api.inference,
             adapter=AdapterSpec(
diff --git a/llama_stack/providers/registry/vector_io.py b/llama_stack/providers/registry/vector_io.py
index 846f7b88e..ed170b508 100644
--- a/llama_stack/providers/registry/vector_io.py
+++ b/llama_stack/providers/registry/vector_io.py
@@ -45,6 +45,18 @@ That means you'll get fast and efficient vector retrieval.
 - Lightweight and easy to use
 - Fully integrated with Llama Stack
 - GPU support
+- **Vector search** - FAISS supports pure vector similarity search using embeddings
+
+## Search Modes
+
+**Supported:**
+- **Vector Search** (`mode="vector"`): Performs vector similarity search using embeddings
+
+**Not Supported:**
+- **Keyword Search** (`mode="keyword"`): Not supported by FAISS
+- **Hybrid Search** (`mode="hybrid"`): Not supported by FAISS
+
+> **Note**: FAISS is designed as a pure vector similarity search library. See the [FAISS GitHub repository](https://github.com/facebookresearch/faiss) for more details about FAISS's core functionality.
 
 ## Usage
 
@@ -535,6 +547,7 @@ That means you're not limited to storing vectors in memory or in a separate serv
 
 - Easy to use
 - Fully integrated with Llama Stack
+- Supports all search modes: vector, keyword, and hybrid search (both inline and remote configurations)
 
 ## Usage
 
@@ -625,6 +638,92 @@ vector_io:
 - **`client_pem_path`**: Path to the **client certificate** file (required for mTLS).
 - **`client_key_path`**: Path to the **client private key** file (required for mTLS).
 
+## Search Modes
+
+Milvus supports three different search modes for both inline and remote configurations:
+
+### Vector Search
+Vector search uses semantic similarity to find the most relevant chunks based on embedding vectors. This is the default search mode and works well for finding conceptually similar content.
+
+```python
+# Vector search example
+search_response = client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="What is machine learning?",
+    search_mode="vector",
+    max_num_results=5,
+)
+```
+
+### Keyword Search
+Keyword search uses traditional text-based matching to find chunks containing specific terms or phrases. This is useful when you need exact term matches.
+
+```python
+# Keyword search example
+search_response = client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="Python programming language",
+    search_mode="keyword",
+    max_num_results=5,
+)
+```
+
+### Hybrid Search
+Hybrid search combines both vector and keyword search methods to provide more comprehensive results. It leverages the strengths of both semantic similarity and exact term matching.
+
+#### Basic Hybrid Search
+```python
+# Basic hybrid search example (uses RRF ranker with default impact_factor=60.0)
+search_response = client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="neural networks in Python",
+    search_mode="hybrid",
+    max_num_results=5,
+)
+```
+
+**Note**: The default `impact_factor` value of 60.0 was empirically determined to be optimal in the original RRF research paper: ["Reciprocal Rank Fusion outperforms Condorcet and individual Rank Learning Methods"](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) (Cormack et al., 2009).
+
+#### Hybrid Search with RRF (Reciprocal Rank Fusion) Ranker
+RRF combines rankings from vector and keyword search by using reciprocal ranks. The impact factor controls how much weight is given to higher-ranked results.
+
+```python
+# Hybrid search with custom RRF parameters
+search_response = client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="neural networks in Python",
+    search_mode="hybrid",
+    max_num_results=5,
+    ranking_options={
+        "ranker": {
+            "type": "rrf",
+            "impact_factor": 100.0,  # Higher values give more weight to top-ranked results
+        }
+    },
+)
+```
+
+#### Hybrid Search with Weighted Ranker
+Weighted ranker linearly combines normalized scores from vector and keyword search. The alpha parameter controls the balance between the two search methods.
+
+```python
+# Hybrid search with weighted ranker
+search_response = client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="neural networks in Python",
+    search_mode="hybrid",
+    max_num_results=5,
+    ranking_options={
+        "ranker": {
+            "type": "weighted",
+            "alpha": 0.7,  # 70% vector search, 30% keyword search
+        }
+    },
+)
+```
+
+For detailed documentation on RRF and Weighted rankers, please refer to the [Milvus Reranking Guide](https://milvus.io/docs/reranking.md).
+
 ## Documentation
 See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.
 
diff --git a/llama_stack/providers/remote/inference/vertexai/__init__.py b/llama_stack/providers/remote/inference/vertexai/__init__.py
new file mode 100644
index 000000000..d9e9419be
--- /dev/null
+++ b/llama_stack/providers/remote/inference/vertexai/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .config import VertexAIConfig
+
+
+async def get_adapter_impl(config: VertexAIConfig, _deps):
+    from .vertexai import VertexAIInferenceAdapter
+
+    impl = VertexAIInferenceAdapter(config)
+    await impl.initialize()
+    return impl
diff --git a/llama_stack/providers/remote/inference/vertexai/config.py b/llama_stack/providers/remote/inference/vertexai/config.py
new file mode 100644
index 000000000..659de653e
--- /dev/null
+++ b/llama_stack/providers/remote/inference/vertexai/config.py
@@ -0,0 +1,45 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+from llama_stack.schema_utils import json_schema_type
+
+
+class VertexAIProviderDataValidator(BaseModel):
+    vertex_project: str | None = Field(
+        default=None,
+        description="Google Cloud project ID for Vertex AI",
+    )
+    vertex_location: str | None = Field(
+        default=None,
+        description="Google Cloud location for Vertex AI (e.g., us-central1)",
+    )
+
+
+@json_schema_type
+class VertexAIConfig(BaseModel):
+    project: str = Field(
+        description="Google Cloud project ID for Vertex AI",
+    )
+    location: str = Field(
+        default="us-central1",
+        description="Google Cloud location for Vertex AI",
+    )
+
+    @classmethod
+    def sample_run_config(
+        cls,
+        project: str = "${env.VERTEX_AI_PROJECT:=}",
+        location: str = "${env.VERTEX_AI_LOCATION:=us-central1}",
+        **kwargs,
+    ) -> dict[str, Any]:
+        return {
+            "project": project,
+            "location": location,
+        }
diff --git a/llama_stack/providers/remote/inference/vertexai/models.py b/llama_stack/providers/remote/inference/vertexai/models.py
new file mode 100644
index 000000000..e72db533d
--- /dev/null
+++ b/llama_stack/providers/remote/inference/vertexai/models.py
@@ -0,0 +1,20 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.providers.utils.inference.model_registry import (
+    ProviderModelEntry,
+)
+
+# Vertex AI model IDs with vertex_ai/ prefix as required by litellm
+LLM_MODEL_IDS = [
+    "vertex_ai/gemini-2.0-flash",
+    "vertex_ai/gemini-2.5-flash",
+    "vertex_ai/gemini-2.5-pro",
+]
+
+SAFETY_MODELS_ENTRIES = list[ProviderModelEntry]()
+
+MODEL_ENTRIES = [ProviderModelEntry(provider_model_id=m) for m in LLM_MODEL_IDS] + SAFETY_MODELS_ENTRIES
diff --git a/llama_stack/providers/remote/inference/vertexai/vertexai.py b/llama_stack/providers/remote/inference/vertexai/vertexai.py
new file mode 100644
index 000000000..8807fd0e6
--- /dev/null
+++ b/llama_stack/providers/remote/inference/vertexai/vertexai.py
@@ -0,0 +1,52 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+
+from llama_stack.apis.inference import ChatCompletionRequest
+from llama_stack.providers.utils.inference.litellm_openai_mixin import (
+    LiteLLMOpenAIMixin,
+)
+
+from .config import VertexAIConfig
+from .models import MODEL_ENTRIES
+
+
+class VertexAIInferenceAdapter(LiteLLMOpenAIMixin):
+    def __init__(self, config: VertexAIConfig) -> None:
+        LiteLLMOpenAIMixin.__init__(
+            self,
+            MODEL_ENTRIES,
+            litellm_provider_name="vertex_ai",
+            api_key_from_config=None,  # Vertex AI uses ADC, not API keys
+            provider_data_api_key_field="vertex_project",  # Use project for validation
+        )
+        self.config = config
+
+    def get_api_key(self) -> str:
+        # Vertex AI doesn't use API keys, it uses Application Default Credentials
+        # Return empty string to let litellm handle authentication via ADC
+        return ""
+
+    async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]:
+        # Get base parameters from parent
+        params = await super()._get_params(request)
+
+        # Add Vertex AI specific parameters
+        provider_data = self.get_request_provider_data()
+        if provider_data:
+            if getattr(provider_data, "vertex_project", None):
+                params["vertex_project"] = provider_data.vertex_project
+            if getattr(provider_data, "vertex_location", None):
+                params["vertex_location"] = provider_data.vertex_location
+        else:
+            params["vertex_project"] = self.config.project
+            params["vertex_location"] = self.config.location
+
+        # Remove api_key since Vertex AI uses ADC
+        params.pop("api_key", None)
+
+        return params
diff --git a/llama_stack/ui/app/chat-playground/page.tsx b/llama_stack/ui/app/chat-playground/page.tsx
index c31248b78..d8094af85 100644
--- a/llama_stack/ui/app/chat-playground/page.tsx
+++ b/llama_stack/ui/app/chat-playground/page.tsx
@@ -175,7 +175,7 @@ const handleSubmitWithContent = async (content: string) => {
   return (
     <div className="flex flex-col h-full max-w-4xl mx-auto">
       <div className="mb-4 flex justify-between items-center">
-        <h1 className="text-2xl font-bold">Chat Playground</h1>
+        <h1 className="text-2xl font-bold">Chat Playground (Completions)</h1>
         <div className="flex gap-2">
           <Select value={selectedModel} onValueChange={setSelectedModel} disabled={isModelsLoading || isGenerating}>
             <SelectTrigger className="w-[180px]">
diff --git a/llama_stack/ui/components/layout/app-sidebar.tsx b/llama_stack/ui/components/layout/app-sidebar.tsx
index 26ac21da3..2ff106e01 100644
--- a/llama_stack/ui/components/layout/app-sidebar.tsx
+++ b/llama_stack/ui/components/layout/app-sidebar.tsx
@@ -6,6 +6,8 @@ import {
   MoveUpRight,
   Database,
   MessageCircle,
+  Settings2,
+  Compass,
 } from "lucide-react";
 import Link from "next/link";
 import { usePathname } from "next/navigation";
@@ -22,15 +24,16 @@ import {
   SidebarMenuItem,
   SidebarHeader,
 } from "@/components/ui/sidebar";
-// Extracted Chat Playground item
-const chatPlaygroundItem = {
-  title: "Chat Playground",
-  url: "/chat-playground",
-  icon: MessageCircle,
-};
 
-// Removed Chat Playground from log items
-const logItems = [
+const createItems = [
+  {
+    title: "Chat Playground",
+    url: "/chat-playground",
+    icon: MessageCircle,
+  },
+];
+
+const manageItems = [
   {
     title: "Chat Completions",
     url: "/logs/chat-completions",
@@ -53,77 +56,96 @@ const logItems = [
   },
 ];
 
+const optimizeItems: { title: string; url: string; icon: React.ElementType }[] = [
+    {
+        title: "Evaluations",
+        url: "",
+        icon: Compass,
+    },
+    {
+        title: "Fine-tuning",
+        url: "",
+        icon: Settings2,
+    },
+];
+
+interface SidebarItem {
+  title: string;
+  url: string;
+  icon: React.ElementType;
+}
+
 export function AppSidebar() {
   const pathname = usePathname();
 
-  return (
-    <Sidebar>
-      <SidebarHeader>
-        <Link href="/">Llama Stack</Link>
-      </SidebarHeader>
-      <SidebarContent>
-        {/* Chat Playground as its own section */}
-        <SidebarGroup>
-          <SidebarGroupContent>
-            <SidebarMenu>
-              <SidebarMenuItem>
+  const renderSidebarItems = (items: SidebarItem[]) => {
+    return items.map((item) => {
+      const isActive = pathname.startsWith(item.url);
+      return (
+        <SidebarMenuItem key={item.title}>
+          <SidebarMenuButton
+            asChild
+            className={cn(
+              "justify-start",
+              isActive &&
+                "bg-gray-200 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-700 text-gray-900 dark:text-gray-100",
+            )}
+          >
+            <Link href={item.url}>
+              <item.icon
+                className={cn(
+                  isActive && "text-gray-900 dark:text-gray-100",
+                  "mr-2 h-4 w-4",
+                )}
+              />
+              <span>{item.title}</span>
+            </Link>
+          </SidebarMenuButton>
+        </SidebarMenuItem>
+      );
+    });
+  };
+
+return (
+  <Sidebar>
+    <SidebarHeader>
+      <Link href="/">Llama Stack</Link>
+    </SidebarHeader>
+    <SidebarContent>
+      <SidebarGroup>
+        <SidebarGroupLabel>Create</SidebarGroupLabel>
+        <SidebarGroupContent>
+          <SidebarMenu>{renderSidebarItems(createItems)}</SidebarMenu>
+        </SidebarGroupContent>
+      </SidebarGroup>
+
+      <SidebarGroup>
+        <SidebarGroupLabel>Manage</SidebarGroupLabel>
+        <SidebarGroupContent>
+          <SidebarMenu>{renderSidebarItems(manageItems)}</SidebarMenu>
+        </SidebarGroupContent>
+      </SidebarGroup>
+
+      <SidebarGroup>
+        <SidebarGroupLabel>Optimize</SidebarGroupLabel>
+        <SidebarGroupContent>
+          <SidebarMenu>
+            {optimizeItems.map((item) => (
+              <SidebarMenuItem key={item.title}>
                 <SidebarMenuButton
-                  asChild
-                  className={cn(
-                    "justify-start",
-                    pathname.startsWith(chatPlaygroundItem.url) &&
-                      "bg-gray-200 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-700 text-gray-900 dark:text-gray-100",
-                  )}
+                  disabled
+                  className="justify-start opacity-60 cursor-not-allowed"
                 >
-                  <Link href={chatPlaygroundItem.url}>
-                    <chatPlaygroundItem.icon
-                      className={cn(
-                        pathname.startsWith(chatPlaygroundItem.url) && "text-gray-900 dark:text-gray-100",
-                        "mr-2 h-4 w-4",
-                      )}
-                    />
-                    <span>{chatPlaygroundItem.title}</span>
-                  </Link>
+                  <item.icon className="mr-2 h-4 w-4" />
+                  <span>{item.title}</span>
+                  <span className="ml-2 text-xs text-gray-500">(Coming Soon)</span>
                 </SidebarMenuButton>
               </SidebarMenuItem>
-            </SidebarMenu>
-          </SidebarGroupContent>
-        </SidebarGroup>
-
-        {/* Logs section */}
-        <SidebarGroup>
-          <SidebarGroupLabel>Logs</SidebarGroupLabel>
-          <SidebarGroupContent>
-            <SidebarMenu>
-              {logItems.map((item) => {
-                const isActive = pathname.startsWith(item.url);
-                return (
-                  <SidebarMenuItem key={item.title}>
-                    <SidebarMenuButton
-                      asChild
-                      className={cn(
-                        "justify-start",
-                        isActive &&
-                          "bg-gray-200 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-700 text-gray-900 dark:text-gray-100",
-                      )}
-                    >
-                      <Link href={item.url}>
-                        <item.icon
-                          className={cn(
-                            isActive && "text-gray-900 dark:text-gray-100",
-                            "mr-2 h-4 w-4",
-                          )}
-                        />
-                        <span>{item.title}</span>
-                      </Link>
-                    </SidebarMenuButton>
-                  </SidebarMenuItem>
-                );
-              })}
-            </SidebarMenu>
-          </SidebarGroupContent>
-        </SidebarGroup>
-      </SidebarContent>
-    </Sidebar>
+            ))}
+          </SidebarMenu>
+        </SidebarGroupContent>
+      </SidebarGroup>
+    </SidebarContent>
+  </Sidebar>
   );
 }
diff --git a/pyproject.toml b/pyproject.toml
index bb079790f..a77ec5ac9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -266,7 +266,6 @@ exclude = [
     "^llama_stack/providers/inline/post_training/common/validator\\.py$",
     "^llama_stack/providers/inline/safety/code_scanner/",
     "^llama_stack/providers/inline/safety/llama_guard/",
-    "^llama_stack/providers/inline/safety/prompt_guard/",
     "^llama_stack/providers/inline/scoring/basic/",
     "^llama_stack/providers/inline/scoring/braintrust/",
     "^llama_stack/providers/inline/scoring/llm_as_judge/",
diff --git a/tests/integration/inference/test_openai_completion.py b/tests/integration/inference/test_openai_completion.py
index 0222bfb79..72137662d 100644
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@@ -34,6 +34,7 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
         "remote::runpod",
         "remote::sambanova",
         "remote::tgi",
+        "remote::vertexai",
     ):
         pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.")
 
diff --git a/tests/integration/inference/test_text_inference.py b/tests/integration/inference/test_text_inference.py
index 08e19726e..d7ffe5929 100644
--- a/tests/integration/inference/test_text_inference.py
+++ b/tests/integration/inference/test_text_inference.py
@@ -29,6 +29,7 @@ def skip_if_model_doesnt_support_completion(client_with_models, model_id):
             "remote::openai",
             "remote::anthropic",
             "remote::gemini",
+            "remote::vertexai",
             "remote::groq",
             "remote::sambanova",
         )