diff --git a/docs/openapi_generator/pyopenapi/operations.py b/docs/openapi_generator/pyopenapi/operations.py
index 4cea9d970..abeb16936 100644
--- a/docs/openapi_generator/pyopenapi/operations.py
+++ b/docs/openapi_generator/pyopenapi/operations.py
@@ -172,10 +172,16 @@ def _get_endpoint_functions(
def _get_defining_class(member_fn: str, derived_cls: type) -> type:
"Find the class in which a member function is first defined in a class inheritance hierarchy."
+ # This import must be dynamic here
+ from llama_stack.apis.tools import RAGToolRuntime, ToolRuntime
+
# iterate in reverse member resolution order to find most specific class first
for cls in reversed(inspect.getmro(derived_cls)):
for name, _ in inspect.getmembers(cls, inspect.isfunction):
if name == member_fn:
+ # HACK ALERT
+ if cls == RAGToolRuntime:
+ return ToolRuntime
return cls
raise ValidationError(
diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html
index ffa1df8e6..f00d7b291 100644
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
@@ -1929,6 +1929,49 @@
}
}
},
+ "/v1/tool-runtime/rag-tool/insert-documents": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK"
+ }
+ },
+ "tags": [
+ "ToolRuntime"
+ ],
+ "summary": "Index documents so they can be used by the RAG system",
+ "parameters": [
+ {
+ "name": "X-LlamaStack-Provider-Data",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "X-LlamaStack-Client-Version",
+ "in": "header",
+ "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/InsertDocumentsRequest"
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
"/v1/tool-runtime/invoke": {
"post": {
"responses": {
@@ -3039,6 +3082,56 @@
}
}
},
+ "/v1/tool-runtime/rag-tool/query-context": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/RAGQueryResult"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "ToolRuntime"
+ ],
+ "summary": "Query the RAG system for context; typically invoked by the agent",
+ "parameters": [
+ {
+ "name": "X-LlamaStack-Provider-Data",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "X-LlamaStack-Client-Version",
+ "in": "header",
+ "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/QueryContextRequest"
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
"/v1/telemetry/spans": {
"get": {
"responses": {
@@ -6940,6 +7033,90 @@
"chunks"
]
},
+ "RAGDocument": {
+ "type": "object",
+ "properties": {
+ "document_id": {
+ "type": "string"
+ },
+ "content": {
+ "oneOf": [
+ {
+ "type": "string"
+ },
+ {
+ "$ref": "#/components/schemas/InterleavedContentItem"
+ },
+ {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/InterleavedContentItem"
+ }
+ },
+ {
+ "$ref": "#/components/schemas/URL"
+ }
+ ]
+ },
+ "mime_type": {
+ "type": "string"
+ },
+ "metadata": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "document_id",
+ "content",
+ "metadata"
+ ]
+ },
+ "InsertDocumentsRequest": {
+ "type": "object",
+ "properties": {
+ "documents": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/RAGDocument"
+ }
+ },
+ "vector_db_id": {
+ "type": "string"
+ },
+ "chunk_size_in_tokens": {
+ "type": "integer"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "documents",
+ "vector_db_id",
+ "chunk_size_in_tokens"
+ ]
+ },
"InvokeToolRequest": {
"type": "object",
"properties": {
@@ -7804,6 +7981,111 @@
"scores"
]
},
+ "DefaultRAGQueryGeneratorConfig": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "default",
+ "default": "default"
+ },
+ "separator": {
+ "type": "string",
+ "default": " "
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "separator"
+ ]
+ },
+ "LLMRAGQueryGeneratorConfig": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "llm",
+ "default": "llm"
+ },
+ "model": {
+ "type": "string"
+ },
+ "template": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "model",
+ "template"
+ ]
+ },
+ "RAGQueryConfig": {
+ "type": "object",
+ "properties": {
+ "query_generator_config": {
+ "$ref": "#/components/schemas/RAGQueryGeneratorConfig"
+ },
+ "max_tokens_in_context": {
+ "type": "integer",
+ "default": 4096
+ },
+ "max_chunks": {
+ "type": "integer",
+ "default": 5
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "query_generator_config",
+ "max_tokens_in_context",
+ "max_chunks"
+ ]
+ },
+ "RAGQueryGeneratorConfig": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/DefaultRAGQueryGeneratorConfig"
+ },
+ {
+ "$ref": "#/components/schemas/LLMRAGQueryGeneratorConfig"
+ }
+ ]
+ },
+ "QueryContextRequest": {
+ "type": "object",
+ "properties": {
+ "content": {
+ "$ref": "#/components/schemas/InterleavedContent"
+ },
+ "query_config": {
+ "$ref": "#/components/schemas/RAGQueryConfig"
+ },
+ "vector_db_ids": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "content",
+ "query_config",
+ "vector_db_ids"
+ ]
+ },
+ "RAGQueryResult": {
+ "type": "object",
+ "properties": {
+ "content": {
+ "$ref": "#/components/schemas/InterleavedContent"
+ }
+ },
+ "additionalProperties": false
+ },
"QueryCondition": {
"type": "object",
"properties": {
@@ -8902,6 +9184,10 @@
{
"name": "Datasets"
},
+ {
+ "name": "DefaultRAGQueryGeneratorConfig",
+ "description": ""
+ },
{
"name": "EfficiencyConfig",
"description": ""
@@ -8959,6 +9245,10 @@
"name": "InsertChunksRequest",
"description": ""
},
+ {
+ "name": "InsertDocumentsRequest",
+ "description": ""
+ },
{
"name": "Inspect"
},
@@ -8990,6 +9280,10 @@
"name": "LLMAsJudgeScoringFnParams",
"description": ""
},
+ {
+ "name": "LLMRAGQueryGeneratorConfig",
+ "description": ""
+ },
{
"name": "ListDatasetsResponse",
"description": ""
@@ -9140,6 +9434,10 @@
"name": "QueryConditionOp",
"description": ""
},
+ {
+ "name": "QueryContextRequest",
+ "description": ""
+ },
{
"name": "QuerySpanTreeResponse",
"description": ""
@@ -9152,6 +9450,22 @@
"name": "QueryTracesResponse",
"description": ""
},
+ {
+ "name": "RAGDocument",
+ "description": ""
+ },
+ {
+ "name": "RAGQueryConfig",
+ "description": ""
+ },
+ {
+ "name": "RAGQueryGeneratorConfig",
+ "description": ""
+ },
+ {
+ "name": "RAGQueryResult",
+ "description": ""
+ },
{
"name": "RegexParserScoringFnParams",
"description": ""
@@ -9531,6 +9845,7 @@
"DataConfig",
"Dataset",
"DatasetFormat",
+ "DefaultRAGQueryGeneratorConfig",
"EfficiencyConfig",
"EmbeddingsRequest",
"EmbeddingsResponse",
@@ -9543,6 +9858,7 @@
"ImageDelta",
"InferenceStep",
"InsertChunksRequest",
+ "InsertDocumentsRequest",
"InterleavedContent",
"InterleavedContentItem",
"InvokeToolRequest",
@@ -9550,6 +9866,7 @@
"JobStatus",
"JsonType",
"LLMAsJudgeScoringFnParams",
+ "LLMRAGQueryGeneratorConfig",
"ListDatasetsResponse",
"ListEvalTasksResponse",
"ListModelsResponse",
@@ -9586,9 +9903,14 @@
"QueryChunksResponse",
"QueryCondition",
"QueryConditionOp",
+ "QueryContextRequest",
"QuerySpanTreeResponse",
"QuerySpansResponse",
"QueryTracesResponse",
+ "RAGDocument",
+ "RAGQueryConfig",
+ "RAGQueryGeneratorConfig",
+ "RAGQueryResult",
"RegexParserScoringFnParams",
"RegisterDatasetRequest",
"RegisterEvalTaskRequest",
diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml
index 1678b1bb9..e1ae07c45 100644
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
@@ -761,6 +761,20 @@ components:
- instruct
- dialog
type: string
+ DefaultRAGQueryGeneratorConfig:
+ additionalProperties: false
+ properties:
+ separator:
+ default: ' '
+ type: string
+ type:
+ const: default
+ default: default
+ type: string
+ required:
+ - type
+ - separator
+ type: object
EfficiencyConfig:
additionalProperties: false
properties:
@@ -995,6 +1009,22 @@ components:
- vector_db_id
- chunks
type: object
+ InsertDocumentsRequest:
+ additionalProperties: false
+ properties:
+ chunk_size_in_tokens:
+ type: integer
+ documents:
+ items:
+ $ref: '#/components/schemas/RAGDocument'
+ type: array
+ vector_db_id:
+ type: string
+ required:
+ - documents
+ - vector_db_id
+ - chunk_size_in_tokens
+ type: object
InterleavedContent:
oneOf:
- type: string
@@ -1073,6 +1103,22 @@ components:
- type
- judge_model
type: object
+ LLMRAGQueryGeneratorConfig:
+ additionalProperties: false
+ properties:
+ model:
+ type: string
+ template:
+ type: string
+ type:
+ const: llm
+ default: llm
+ type: string
+ required:
+ - type
+ - model
+ - template
+ type: object
ListDatasetsResponse:
additionalProperties: false
properties:
@@ -1664,6 +1710,22 @@ components:
- gt
- lt
type: string
+ QueryContextRequest:
+ additionalProperties: false
+ properties:
+ content:
+ $ref: '#/components/schemas/InterleavedContent'
+ query_config:
+ $ref: '#/components/schemas/RAGQueryConfig'
+ vector_db_ids:
+ items:
+ type: string
+ type: array
+ required:
+ - content
+ - query_config
+ - vector_db_ids
+ type: object
QuerySpanTreeResponse:
additionalProperties: false
properties:
@@ -1694,6 +1756,62 @@ components:
required:
- data
type: object
+ RAGDocument:
+ additionalProperties: false
+ properties:
+ content:
+ oneOf:
+ - type: string
+ - $ref: '#/components/schemas/InterleavedContentItem'
+ - items:
+ $ref: '#/components/schemas/InterleavedContentItem'
+ type: array
+ - $ref: '#/components/schemas/URL'
+ document_id:
+ type: string
+ metadata:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ mime_type:
+ type: string
+ required:
+ - document_id
+ - content
+ - metadata
+ type: object
+ RAGQueryConfig:
+ additionalProperties: false
+ properties:
+ max_chunks:
+ default: 5
+ type: integer
+ max_tokens_in_context:
+ default: 4096
+ type: integer
+ query_generator_config:
+ $ref: '#/components/schemas/RAGQueryGeneratorConfig'
+ required:
+ - query_generator_config
+ - max_tokens_in_context
+ - max_chunks
+ type: object
+ RAGQueryGeneratorConfig:
+ oneOf:
+ - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
+ - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig'
+ RAGQueryResult:
+ additionalProperties: false
+ properties:
+ content:
+ $ref: '#/components/schemas/InterleavedContent'
+ type: object
RegexParserScoringFnParams:
additionalProperties: false
properties:
@@ -5058,6 +5176,68 @@ paths:
description: OK
tags:
- ToolRuntime
+ /v1/tool-runtime/rag-tool/insert-documents:
+ post:
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-Provider-Data
+ required: false
+ schema:
+ type: string
+ - description: Version of the client making the request. This is used to ensure
+ that the client and server are compatible.
+ in: header
+ name: X-LlamaStack-Client-Version
+ required: false
+ schema:
+ type: string
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/InsertDocumentsRequest'
+ required: true
+ responses:
+ '200':
+ description: OK
+ summary: Index documents so they can be used by the RAG system
+ tags:
+ - ToolRuntime
+ /v1/tool-runtime/rag-tool/query-context:
+ post:
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-Provider-Data
+ required: false
+ schema:
+ type: string
+ - description: Version of the client making the request. This is used to ensure
+ that the client and server are compatible.
+ in: header
+ name: X-LlamaStack-Client-Version
+ required: false
+ schema:
+ type: string
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/QueryContextRequest'
+ required: true
+ responses:
+ '200':
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/RAGQueryResult'
+ description: OK
+ summary: Query the RAG system for context; typically invoked by the agent
+ tags:
+ - ToolRuntime
/v1/toolgroups:
get:
parameters:
@@ -5596,6 +5776,9 @@ tags:
name: DatasetFormat
- name: DatasetIO
- name: Datasets
+- description:
+ name: DefaultRAGQueryGeneratorConfig
- description:
name: EfficiencyConfig
@@ -5631,6 +5814,9 @@ tags:
- description:
name: InsertChunksRequest
+- description:
+ name: InsertDocumentsRequest
- name: Inspect
- description:
@@ -5650,6 +5836,9 @@ tags:
- description:
name: LLMAsJudgeScoringFnParams
+- description:
+ name: LLMRAGQueryGeneratorConfig
- description:
name: ListDatasetsResponse
@@ -5754,6 +5943,9 @@ tags:
- description:
name: QueryConditionOp
+- description:
+ name: QueryContextRequest
- description:
name: QuerySpanTreeResponse
@@ -5763,6 +5955,15 @@ tags:
- description:
name: QueryTracesResponse
+- description:
+ name: RAGDocument
+- description:
+ name: RAGQueryConfig
+- description:
+ name: RAGQueryGeneratorConfig
+- description:
+ name: RAGQueryResult
- description:
name: RegexParserScoringFnParams
@@ -6031,6 +6232,7 @@ x-tagGroups:
- DataConfig
- Dataset
- DatasetFormat
+ - DefaultRAGQueryGeneratorConfig
- EfficiencyConfig
- EmbeddingsRequest
- EmbeddingsResponse
@@ -6043,6 +6245,7 @@ x-tagGroups:
- ImageDelta
- InferenceStep
- InsertChunksRequest
+ - InsertDocumentsRequest
- InterleavedContent
- InterleavedContentItem
- InvokeToolRequest
@@ -6050,6 +6253,7 @@ x-tagGroups:
- JobStatus
- JsonType
- LLMAsJudgeScoringFnParams
+ - LLMRAGQueryGeneratorConfig
- ListDatasetsResponse
- ListEvalTasksResponse
- ListModelsResponse
@@ -6086,9 +6290,14 @@ x-tagGroups:
- QueryChunksResponse
- QueryCondition
- QueryConditionOp
+ - QueryContextRequest
- QuerySpanTreeResponse
- QuerySpansResponse
- QueryTracesResponse
+ - RAGDocument
+ - RAGQueryConfig
+ - RAGQueryGeneratorConfig
+ - RAGQueryResult
- RegexParserScoringFnParams
- RegisterDatasetRequest
- RegisterEvalTaskRequest
diff --git a/llama_stack/distribution/stack.py b/llama_stack/distribution/stack.py
index 180ec0ecc..f0c34dba4 100644
--- a/llama_stack/distribution/stack.py
+++ b/llama_stack/distribution/stack.py
@@ -29,7 +29,7 @@ from llama_stack.apis.scoring_functions import ScoringFunctions
from llama_stack.apis.shields import Shields
from llama_stack.apis.synthetic_data_generation import SyntheticDataGeneration
from llama_stack.apis.telemetry import Telemetry
-from llama_stack.apis.tools import ToolGroups, ToolRuntime
+from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime
from llama_stack.apis.vector_dbs import VectorDBs
from llama_stack.apis.vector_io import VectorIO
from llama_stack.distribution.datatypes import StackRunConfig
@@ -62,6 +62,7 @@ class LlamaStack(
Inspect,
ToolGroups,
ToolRuntime,
+ RAGToolRuntime,
):
pass