mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-23 18:34:01 +00:00
Merge branch 'main' into rm-faiss-file
This commit is contained in:
commit
c30e343fbb
9 changed files with 102 additions and 4 deletions
13
docs/_static/llama-stack-spec.html
vendored
13
docs/_static/llama-stack-spec.html
vendored
|
|
@ -14796,7 +14796,8 @@
|
||||||
"description": "Template for formatting each retrieved chunk in the context. Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). Default: \"Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n\""
|
"description": "Template for formatting each retrieved chunk in the context. Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). Default: \"Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n\""
|
||||||
},
|
},
|
||||||
"mode": {
|
"mode": {
|
||||||
"type": "string",
|
"$ref": "#/components/schemas/RAGSearchMode",
|
||||||
|
"default": "vector",
|
||||||
"description": "Search mode for retrieval—either \"vector\", \"keyword\", or \"hybrid\". Default \"vector\"."
|
"description": "Search mode for retrieval—either \"vector\", \"keyword\", or \"hybrid\". Default \"vector\"."
|
||||||
},
|
},
|
||||||
"ranker": {
|
"ranker": {
|
||||||
|
|
@ -14831,6 +14832,16 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"RAGSearchMode": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"vector",
|
||||||
|
"keyword",
|
||||||
|
"hybrid"
|
||||||
|
],
|
||||||
|
"title": "RAGSearchMode",
|
||||||
|
"description": "Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search for semantic matching - KEYWORD: Uses keyword-based search for exact matching - HYBRID: Combines both vector and keyword search for better results"
|
||||||
|
},
|
||||||
"RRFRanker": {
|
"RRFRanker": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
|
|
||||||
14
docs/_static/llama-stack-spec.yaml
vendored
14
docs/_static/llama-stack-spec.yaml
vendored
|
|
@ -10346,7 +10346,8 @@ components:
|
||||||
content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent:
|
content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent:
|
||||||
{chunk.content}\nMetadata: {metadata}\n"
|
{chunk.content}\nMetadata: {metadata}\n"
|
||||||
mode:
|
mode:
|
||||||
type: string
|
$ref: '#/components/schemas/RAGSearchMode'
|
||||||
|
default: vector
|
||||||
description: >-
|
description: >-
|
||||||
Search mode for retrieval—either "vector", "keyword", or "hybrid". Default
|
Search mode for retrieval—either "vector", "keyword", or "hybrid". Default
|
||||||
"vector".
|
"vector".
|
||||||
|
|
@ -10373,6 +10374,17 @@ components:
|
||||||
mapping:
|
mapping:
|
||||||
default: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
|
default: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
|
||||||
llm: '#/components/schemas/LLMRAGQueryGeneratorConfig'
|
llm: '#/components/schemas/LLMRAGQueryGeneratorConfig'
|
||||||
|
RAGSearchMode:
|
||||||
|
type: string
|
||||||
|
enum:
|
||||||
|
- vector
|
||||||
|
- keyword
|
||||||
|
- hybrid
|
||||||
|
title: RAGSearchMode
|
||||||
|
description: >-
|
||||||
|
Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search
|
||||||
|
for semantic matching - KEYWORD: Uses keyword-based search for exact matching
|
||||||
|
- HYBRID: Combines both vector and keyword search for better results
|
||||||
RRFRanker:
|
RRFRanker:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
|
||||||
|
|
@ -145,6 +145,10 @@ $ llama stack build --template starter
|
||||||
...
|
...
|
||||||
You can now edit ~/.llama/distributions/llamastack-starter/starter-run.yaml and run `llama stack run ~/.llama/distributions/llamastack-starter/starter-run.yaml`
|
You can now edit ~/.llama/distributions/llamastack-starter/starter-run.yaml and run `llama stack run ~/.llama/distributions/llamastack-starter/starter-run.yaml`
|
||||||
```
|
```
|
||||||
|
|
||||||
|
```{tip}
|
||||||
|
The generated `run.yaml` file is a starting point for your configuration. For comprehensive guidance on customizing it for your specific needs, infrastructure, and deployment scenarios, see [Customizing Your run.yaml Configuration](customizing_run_yaml.md).
|
||||||
|
```
|
||||||
:::
|
:::
|
||||||
:::{tab-item} Building from Scratch
|
:::{tab-item} Building from Scratch
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,10 @@
|
||||||
|
|
||||||
The Llama Stack runtime configuration is specified as a YAML file. Here is a simplified version of an example configuration file for the Ollama distribution:
|
The Llama Stack runtime configuration is specified as a YAML file. Here is a simplified version of an example configuration file for the Ollama distribution:
|
||||||
|
|
||||||
|
```{note}
|
||||||
|
The default `run.yaml` files generated by templates are starting points for your configuration. For guidance on customizing these files for your specific needs, see [Customizing Your run.yaml Configuration](customizing_run_yaml.md).
|
||||||
|
```
|
||||||
|
|
||||||
```{dropdown} 👋 Click here for a Sample Configuration File
|
```{dropdown} 👋 Click here for a Sample Configuration File
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
|
|
|
||||||
40
docs/source/distributions/customizing_run_yaml.md
Normal file
40
docs/source/distributions/customizing_run_yaml.md
Normal file
|
|
@ -0,0 +1,40 @@
|
||||||
|
# Customizing run.yaml Files
|
||||||
|
|
||||||
|
The `run.yaml` files generated by Llama Stack templates are **starting points** designed to be customized for your specific needs. They are not meant to be used as-is in production environments.
|
||||||
|
|
||||||
|
## Key Points
|
||||||
|
|
||||||
|
- **Templates are starting points**: Generated `run.yaml` files contain defaults for development/testing
|
||||||
|
- **Customization expected**: Update URLs, credentials, models, and settings for your environment
|
||||||
|
- **Version control separately**: Keep customized configs in your own repository
|
||||||
|
- **Environment-specific**: Create different configurations for dev, staging, production
|
||||||
|
|
||||||
|
## What You Can Customize
|
||||||
|
|
||||||
|
You can customize:
|
||||||
|
- **Provider endpoints**: Change `http://localhost:8000` to your actual servers
|
||||||
|
- **Swap providers**: Replace default providers (e.g., swap Tavily with Brave for search)
|
||||||
|
- **Storage paths**: Move from `/tmp/` to production directories
|
||||||
|
- **Authentication**: Add API keys, SSL, timeouts
|
||||||
|
- **Models**: Different model sizes for dev vs prod
|
||||||
|
- **Database settings**: Switch from SQLite to PostgreSQL
|
||||||
|
- **Tool configurations**: Add custom tools and integrations
|
||||||
|
|
||||||
|
## Best Practices
|
||||||
|
|
||||||
|
- Use environment variables for secrets and environment-specific values
|
||||||
|
- Create separate `run.yaml` files for different environments (dev, staging, prod)
|
||||||
|
- Document your changes with comments
|
||||||
|
- Test configurations before deployment
|
||||||
|
- Keep your customized configs in version control
|
||||||
|
|
||||||
|
Example structure:
|
||||||
|
```
|
||||||
|
your-project/
|
||||||
|
├── configs/
|
||||||
|
│ ├── dev-run.yaml
|
||||||
|
│ ├── prod-run.yaml
|
||||||
|
└── README.md
|
||||||
|
```
|
||||||
|
|
||||||
|
The goal is to take the generated template and adapt it to your specific infrastructure and operational needs.
|
||||||
|
|
@ -9,6 +9,7 @@ This section provides an overview of the distributions available in Llama Stack.
|
||||||
|
|
||||||
importing_as_library
|
importing_as_library
|
||||||
configuration
|
configuration
|
||||||
|
customizing_run_yaml
|
||||||
list_of_distributions
|
list_of_distributions
|
||||||
kubernetes_deployment
|
kubernetes_deployment
|
||||||
building_distro
|
building_distro
|
||||||
|
|
|
||||||
|
|
@ -54,7 +54,7 @@ Llama Stack is a server that exposes multiple APIs, you connect with it using th
|
||||||
You can use Python to build and run the Llama Stack server, which is useful for testing and development.
|
You can use Python to build and run the Llama Stack server, which is useful for testing and development.
|
||||||
|
|
||||||
Llama Stack uses a [YAML configuration file](../distributions/configuration.md) to specify the stack setup,
|
Llama Stack uses a [YAML configuration file](../distributions/configuration.md) to specify the stack setup,
|
||||||
which defines the providers and their settings.
|
which defines the providers and their settings. The generated configuration serves as a starting point that you can [customize for your specific needs](../distributions/customizing_run_yaml.md).
|
||||||
Now let's build and run the Llama Stack config for Ollama.
|
Now let's build and run the Llama Stack config for Ollama.
|
||||||
We use `starter` as template. By default all providers are disabled, this requires enable ollama by passing environment variables.
|
We use `starter` as template. By default all providers are disabled, this requires enable ollama by passing environment variables.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -87,6 +87,20 @@ class RAGQueryGenerator(Enum):
|
||||||
custom = "custom"
|
custom = "custom"
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class RAGSearchMode(Enum):
|
||||||
|
"""
|
||||||
|
Search modes for RAG query retrieval:
|
||||||
|
- VECTOR: Uses vector similarity search for semantic matching
|
||||||
|
- KEYWORD: Uses keyword-based search for exact matching
|
||||||
|
- HYBRID: Combines both vector and keyword search for better results
|
||||||
|
"""
|
||||||
|
|
||||||
|
VECTOR = "vector"
|
||||||
|
KEYWORD = "keyword"
|
||||||
|
HYBRID = "hybrid"
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class DefaultRAGQueryGeneratorConfig(BaseModel):
|
class DefaultRAGQueryGeneratorConfig(BaseModel):
|
||||||
type: Literal["default"] = "default"
|
type: Literal["default"] = "default"
|
||||||
|
|
@ -128,7 +142,7 @@ class RAGQueryConfig(BaseModel):
|
||||||
max_tokens_in_context: int = 4096
|
max_tokens_in_context: int = 4096
|
||||||
max_chunks: int = 5
|
max_chunks: int = 5
|
||||||
chunk_template: str = "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n"
|
chunk_template: str = "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n"
|
||||||
mode: str | None = None
|
mode: RAGSearchMode | None = RAGSearchMode.VECTOR
|
||||||
ranker: Ranker | None = Field(default=None) # Only used for hybrid mode
|
ranker: Ranker | None = Field(default=None) # Only used for hybrid mode
|
||||||
|
|
||||||
@field_validator("chunk_template")
|
@field_validator("chunk_template")
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ from unittest.mock import AsyncMock, MagicMock
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from llama_stack.apis.tools.rag_tool import RAGQueryConfig
|
||||||
from llama_stack.apis.vector_io import (
|
from llama_stack.apis.vector_io import (
|
||||||
Chunk,
|
Chunk,
|
||||||
ChunkMetadata,
|
ChunkMetadata,
|
||||||
|
|
@ -58,3 +59,14 @@ class TestRagQuery:
|
||||||
)
|
)
|
||||||
assert expected_metadata_string in result.content[1].text
|
assert expected_metadata_string in result.content[1].text
|
||||||
assert result.content is not None
|
assert result.content is not None
|
||||||
|
|
||||||
|
async def test_query_raises_incorrect_mode(self):
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
RAGQueryConfig(mode="invalid_mode")
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_query_accepts_valid_modes(self):
|
||||||
|
RAGQueryConfig() # Test default (vector)
|
||||||
|
RAGQueryConfig(mode="vector") # Test vector
|
||||||
|
RAGQueryConfig(mode="keyword") # Test keyword
|
||||||
|
RAGQueryConfig(mode="hybrid") # Test hybrid
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue