diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 8021e0e55..6794d1fbb 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -14796,7 +14796,8 @@
"description": "Template for formatting each retrieved chunk in the context. Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). Default: \"Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n\""
},
"mode": {
- "type": "string",
+ "$ref": "#/components/schemas/RAGSearchMode",
+ "default": "vector",
"description": "Search mode for retrieval—either \"vector\", \"keyword\", or \"hybrid\". Default \"vector\"."
},
"ranker": {
@@ -14831,6 +14832,16 @@
}
}
},
+ "RAGSearchMode": {
+ "type": "string",
+ "enum": [
+ "vector",
+ "keyword",
+ "hybrid"
+ ],
+ "title": "RAGSearchMode",
+ "description": "Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search for semantic matching - KEYWORD: Uses keyword-based search for exact matching - HYBRID: Combines both vector and keyword search for better results"
+ },
"RRFRanker": {
"type": "object",
"properties": {
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index a18474646..548c5a988 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -10346,7 +10346,8 @@ components:
content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent:
{chunk.content}\nMetadata: {metadata}\n"
mode:
- type: string
+ $ref: '#/components/schemas/RAGSearchMode'
+ default: vector
description: >-
Search mode for retrieval—either "vector", "keyword", or "hybrid". Default
"vector".
@@ -10373,6 +10374,17 @@ components:
mapping:
default: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
llm: '#/components/schemas/LLMRAGQueryGeneratorConfig'
+ RAGSearchMode:
+ type: string
+ enum:
+ - vector
+ - keyword
+ - hybrid
+ title: RAGSearchMode
+ description: >-
+ Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search
+ for semantic matching - KEYWORD: Uses keyword-based search for exact matching
+ - HYBRID: Combines both vector and keyword search for better results
RRFRanker:
type: object
properties:
diff --git a/docs/source/distributions/building_distro.md b/docs/source/distributions/building_distro.md
index f24974dd3..cd2c6b6a8 100644
--- a/docs/source/distributions/building_distro.md
+++ b/docs/source/distributions/building_distro.md
@@ -145,6 +145,10 @@ $ llama stack build --template starter
...
You can now edit ~/.llama/distributions/llamastack-starter/starter-run.yaml and run `llama stack run ~/.llama/distributions/llamastack-starter/starter-run.yaml`
```
+
+```{tip}
+The generated `run.yaml` file is a starting point for your configuration. For comprehensive guidance on customizing it for your specific needs, infrastructure, and deployment scenarios, see [Customizing Your run.yaml Configuration](customizing_run_yaml.md).
+```
:::
:::{tab-item} Building from Scratch
diff --git a/docs/source/distributions/configuration.md b/docs/source/distributions/configuration.md
index 4709cb8c6..9548780c6 100644
--- a/docs/source/distributions/configuration.md
+++ b/docs/source/distributions/configuration.md
@@ -2,6 +2,10 @@
The Llama Stack runtime configuration is specified as a YAML file. Here is a simplified version of an example configuration file for the Ollama distribution:
+```{note}
+The default `run.yaml` files generated by templates are starting points for your configuration. For guidance on customizing these files for your specific needs, see [Customizing Your run.yaml Configuration](customizing_run_yaml.md).
+```
+
```{dropdown} 👋 Click here for a Sample Configuration File
```yaml
diff --git a/docs/source/distributions/customizing_run_yaml.md b/docs/source/distributions/customizing_run_yaml.md
new file mode 100644
index 000000000..10067bab7
--- /dev/null
+++ b/docs/source/distributions/customizing_run_yaml.md
@@ -0,0 +1,40 @@
+# Customizing run.yaml Files
+
+The `run.yaml` files generated by Llama Stack templates are **starting points** designed to be customized for your specific needs. They are not meant to be used as-is in production environments.
+
+## Key Points
+
+- **Templates are starting points**: Generated `run.yaml` files contain defaults for development/testing
+- **Customization expected**: Update URLs, credentials, models, and settings for your environment
+- **Version control separately**: Keep customized configs in your own repository
+- **Environment-specific**: Create different configurations for dev, staging, production
+
+## What You Can Customize
+
+You can customize:
+- **Provider endpoints**: Change `http://localhost:8000` to your actual servers
+- **Swap providers**: Replace default providers (e.g., swap Tavily with Brave for search)
+- **Storage paths**: Move from `/tmp/` to production directories
+- **Authentication**: Add API keys, SSL, timeouts
+- **Models**: Different model sizes for dev vs prod
+- **Database settings**: Switch from SQLite to PostgreSQL
+- **Tool configurations**: Add custom tools and integrations
+
+## Best Practices
+
+- Use environment variables for secrets and environment-specific values
+- Create separate `run.yaml` files for different environments (dev, staging, prod)
+- Document your changes with comments
+- Test configurations before deployment
+- Keep your customized configs in version control
+
+Example structure:
+```
+your-project/
+├── configs/
+│ ├── dev-run.yaml
+│ ├── prod-run.yaml
+└── README.md
+```
+
+The goal is to take the generated template and adapt it to your specific infrastructure and operational needs.
\ No newline at end of file
diff --git a/docs/source/distributions/index.md b/docs/source/distributions/index.md
index 103a6131f..600eec3a1 100644
--- a/docs/source/distributions/index.md
+++ b/docs/source/distributions/index.md
@@ -9,6 +9,7 @@ This section provides an overview of the distributions available in Llama Stack.
importing_as_library
configuration
+customizing_run_yaml
list_of_distributions
kubernetes_deployment
building_distro
diff --git a/docs/source/getting_started/detailed_tutorial.md b/docs/source/getting_started/detailed_tutorial.md
index 35cb7f02e..97e7df774 100644
--- a/docs/source/getting_started/detailed_tutorial.md
+++ b/docs/source/getting_started/detailed_tutorial.md
@@ -54,7 +54,7 @@ Llama Stack is a server that exposes multiple APIs, you connect with it using th
You can use Python to build and run the Llama Stack server, which is useful for testing and development.
Llama Stack uses a [YAML configuration file](../distributions/configuration.md) to specify the stack setup,
-which defines the providers and their settings.
+which defines the providers and their settings. The generated configuration serves as a starting point that you can [customize for your specific needs](../distributions/customizing_run_yaml.md).
Now let's build and run the Llama Stack config for Ollama.
We use `starter` as template. By default all providers are disabled, this requires enable ollama by passing environment variables.
diff --git a/llama_stack/apis/tools/rag_tool.py b/llama_stack/apis/tools/rag_tool.py
index d497fe1a7..cfaa49488 100644
--- a/llama_stack/apis/tools/rag_tool.py
+++ b/llama_stack/apis/tools/rag_tool.py
@@ -87,6 +87,20 @@ class RAGQueryGenerator(Enum):
custom = "custom"
+@json_schema_type
+class RAGSearchMode(Enum):
+ """
+ Search modes for RAG query retrieval:
+ - VECTOR: Uses vector similarity search for semantic matching
+ - KEYWORD: Uses keyword-based search for exact matching
+ - HYBRID: Combines both vector and keyword search for better results
+ """
+
+ VECTOR = "vector"
+ KEYWORD = "keyword"
+ HYBRID = "hybrid"
+
+
@json_schema_type
class DefaultRAGQueryGeneratorConfig(BaseModel):
type: Literal["default"] = "default"
@@ -128,7 +142,7 @@ class RAGQueryConfig(BaseModel):
max_tokens_in_context: int = 4096
max_chunks: int = 5
chunk_template: str = "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n"
- mode: str | None = None
+ mode: RAGSearchMode | None = RAGSearchMode.VECTOR
ranker: Ranker | None = Field(default=None) # Only used for hybrid mode
@field_validator("chunk_template")
diff --git a/tests/unit/rag/test_rag_query.py b/tests/unit/rag/test_rag_query.py
index b2baa744a..ad155c205 100644
--- a/tests/unit/rag/test_rag_query.py
+++ b/tests/unit/rag/test_rag_query.py
@@ -8,6 +8,7 @@ from unittest.mock import AsyncMock, MagicMock
import pytest
+from llama_stack.apis.tools.rag_tool import RAGQueryConfig
from llama_stack.apis.vector_io import (
Chunk,
ChunkMetadata,
@@ -58,3 +59,14 @@ class TestRagQuery:
)
assert expected_metadata_string in result.content[1].text
assert result.content is not None
+
+ async def test_query_raises_incorrect_mode(self):
+ with pytest.raises(ValueError):
+ RAGQueryConfig(mode="invalid_mode")
+
+ @pytest.mark.asyncio
+ async def test_query_accepts_valid_modes(self):
+ RAGQueryConfig() # Test default (vector)
+ RAGQueryConfig(mode="vector") # Test vector
+ RAGQueryConfig(mode="keyword") # Test keyword
+ RAGQueryConfig(mode="hybrid") # Test hybrid