diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml index c14661a5a..5d9917bfd 100644 --- a/client-sdks/stainless/openapi.yml +++ b/client-sdks/stainless/openapi.yml @@ -2055,69 +2055,6 @@ paths: schema: $ref: '#/components/schemas/URL' deprecated: false - /v1/tool-runtime/rag-tool/insert: - post: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolRuntime - summary: >- - Index documents so they can be used by the RAG system. - description: >- - Index documents so they can be used by the RAG system. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/InsertRequest' - required: true - deprecated: false - /v1/tool-runtime/rag-tool/query: - post: - responses: - '200': - description: >- - RAGQueryResult containing the retrieved content and metadata - content: - application/json: - schema: - $ref: '#/components/schemas/RAGQueryResult' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolRuntime - summary: >- - Query the RAG system for context; typically invoked by the agent. - description: >- - Query the RAG system for context; typically invoked by the agent. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/QueryRequest' - required: true - deprecated: false /v1/toolgroups: get: responses: @@ -9633,274 +9570,6 @@ components: title: ListToolDefsResponse description: >- Response containing a list of tool definitions. - RAGDocument: - type: object - properties: - document_id: - type: string - description: The unique identifier for the document. - content: - oneOf: - - type: string - - $ref: '#/components/schemas/InterleavedContentItem' - - type: array - items: - $ref: '#/components/schemas/InterleavedContentItem' - - $ref: '#/components/schemas/URL' - description: The content of the document. - mime_type: - type: string - description: The MIME type of the document. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Additional metadata for the document. - additionalProperties: false - required: - - document_id - - content - - metadata - title: RAGDocument - description: >- - A document to be used for document ingestion in the RAG Tool. - InsertRequest: - type: object - properties: - documents: - type: array - items: - $ref: '#/components/schemas/RAGDocument' - description: >- - List of documents to index in the RAG system - vector_store_id: - type: string - description: >- - ID of the vector database to store the document embeddings - chunk_size_in_tokens: - type: integer - description: >- - (Optional) Size in tokens for document chunking during indexing - additionalProperties: false - required: - - documents - - vector_store_id - - chunk_size_in_tokens - title: InsertRequest - DefaultRAGQueryGeneratorConfig: - type: object - properties: - type: - type: string - const: default - default: default - description: >- - Type of query generator, always 'default' - separator: - type: string - default: ' ' - description: >- - String separator used to join query terms - additionalProperties: false - required: - - type - - separator - title: DefaultRAGQueryGeneratorConfig - description: >- - Configuration for the default RAG query generator. - LLMRAGQueryGeneratorConfig: - type: object - properties: - type: - type: string - const: llm - default: llm - description: Type of query generator, always 'llm' - model: - type: string - description: >- - Name of the language model to use for query generation - template: - type: string - description: >- - Template string for formatting the query generation prompt - additionalProperties: false - required: - - type - - model - - template - title: LLMRAGQueryGeneratorConfig - description: >- - Configuration for the LLM-based RAG query generator. - RAGQueryConfig: - type: object - properties: - query_generator_config: - oneOf: - - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig' - - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig' - discriminator: - propertyName: type - mapping: - default: '#/components/schemas/DefaultRAGQueryGeneratorConfig' - llm: '#/components/schemas/LLMRAGQueryGeneratorConfig' - description: Configuration for the query generator. - max_tokens_in_context: - type: integer - default: 4096 - description: Maximum number of tokens in the context. - max_chunks: - type: integer - default: 5 - description: Maximum number of chunks to retrieve. - chunk_template: - type: string - default: > - Result {index} - - Content: {chunk.content} - - Metadata: {metadata} - description: >- - Template for formatting each retrieved chunk in the context. Available - placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk - content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent: - {chunk.content}\nMetadata: {metadata}\n" - mode: - $ref: '#/components/schemas/RAGSearchMode' - default: vector - description: >- - Search mode for retrieval—either "vector", "keyword", or "hybrid". Default - "vector". - ranker: - $ref: '#/components/schemas/Ranker' - description: >- - Configuration for the ranker to use in hybrid search. Defaults to RRF - ranker. - additionalProperties: false - required: - - query_generator_config - - max_tokens_in_context - - max_chunks - - chunk_template - title: RAGQueryConfig - description: >- - Configuration for the RAG query generation. - RAGSearchMode: - type: string - enum: - - vector - - keyword - - hybrid - title: RAGSearchMode - description: >- - Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search - for semantic matching - KEYWORD: Uses keyword-based search for exact matching - - HYBRID: Combines both vector and keyword search for better results - RRFRanker: - type: object - properties: - type: - type: string - const: rrf - default: rrf - description: The type of ranker, always "rrf" - impact_factor: - type: number - default: 60.0 - description: >- - The impact factor for RRF scoring. Higher values give more weight to higher-ranked - results. Must be greater than 0 - additionalProperties: false - required: - - type - - impact_factor - title: RRFRanker - description: >- - Reciprocal Rank Fusion (RRF) ranker configuration. - Ranker: - oneOf: - - $ref: '#/components/schemas/RRFRanker' - - $ref: '#/components/schemas/WeightedRanker' - discriminator: - propertyName: type - mapping: - rrf: '#/components/schemas/RRFRanker' - weighted: '#/components/schemas/WeightedRanker' - WeightedRanker: - type: object - properties: - type: - type: string - const: weighted - default: weighted - description: The type of ranker, always "weighted" - alpha: - type: number - default: 0.5 - description: >- - Weight factor between 0 and 1. 0 means only use keyword scores, 1 means - only use vector scores, values in between blend both scores. - additionalProperties: false - required: - - type - - alpha - title: WeightedRanker - description: >- - Weighted ranker configuration that combines vector and keyword scores. - QueryRequest: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The query content to search for in the indexed documents - vector_store_ids: - type: array - items: - type: string - description: >- - List of vector database IDs to search within - query_config: - $ref: '#/components/schemas/RAGQueryConfig' - description: >- - (Optional) Configuration parameters for the query operation - additionalProperties: false - required: - - content - - vector_store_ids - title: QueryRequest - RAGQueryResult: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - (Optional) The retrieved content from the query - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Additional metadata about the query result - additionalProperties: false - required: - - metadata - title: RAGQueryResult - description: >- - Result of a RAG query containing retrieved content and metadata. ToolGroup: type: object properties: diff --git a/docs/openapi_generator/pyopenapi/operations.py b/docs/openapi_generator/pyopenapi/operations.py index 2970d7e53..a1c95c7a7 100644 --- a/docs/openapi_generator/pyopenapi/operations.py +++ b/docs/openapi_generator/pyopenapi/operations.py @@ -170,7 +170,7 @@ def _get_endpoint_functions( for webmethod in webmethods: print(f"Processing {colored(func_name, 'white')}...") operation_name = func_name - + if webmethod.method == "GET": prefix = "get" elif webmethod.method == "DELETE": @@ -196,16 +196,10 @@ def _get_endpoint_functions( def _get_defining_class(member_fn: str, derived_cls: type) -> type: "Find the class in which a member function is first defined in a class inheritance hierarchy." - # This import must be dynamic here - from llama_stack.apis.tools import RAGToolRuntime, ToolRuntime - # iterate in reverse member resolution order to find most specific class first for cls in reversed(inspect.getmro(derived_cls)): for name, _ in inspect.getmembers(cls, inspect.isfunction): if name == member_fn: - # HACK ALERT - if cls == RAGToolRuntime: - return ToolRuntime return cls raise ValidationError( diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index ea6b07c0e..a705f499a 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -2052,69 +2052,6 @@ paths: schema: $ref: '#/components/schemas/URL' deprecated: false - /v1/tool-runtime/rag-tool/insert: - post: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolRuntime - summary: >- - Index documents so they can be used by the RAG system. - description: >- - Index documents so they can be used by the RAG system. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/InsertRequest' - required: true - deprecated: false - /v1/tool-runtime/rag-tool/query: - post: - responses: - '200': - description: >- - RAGQueryResult containing the retrieved content and metadata - content: - application/json: - schema: - $ref: '#/components/schemas/RAGQueryResult' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolRuntime - summary: >- - Query the RAG system for context; typically invoked by the agent. - description: >- - Query the RAG system for context; typically invoked by the agent. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/QueryRequest' - required: true - deprecated: false /v1/toolgroups: get: responses: @@ -8917,274 +8854,6 @@ components: title: ListToolDefsResponse description: >- Response containing a list of tool definitions. - RAGDocument: - type: object - properties: - document_id: - type: string - description: The unique identifier for the document. - content: - oneOf: - - type: string - - $ref: '#/components/schemas/InterleavedContentItem' - - type: array - items: - $ref: '#/components/schemas/InterleavedContentItem' - - $ref: '#/components/schemas/URL' - description: The content of the document. - mime_type: - type: string - description: The MIME type of the document. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Additional metadata for the document. - additionalProperties: false - required: - - document_id - - content - - metadata - title: RAGDocument - description: >- - A document to be used for document ingestion in the RAG Tool. - InsertRequest: - type: object - properties: - documents: - type: array - items: - $ref: '#/components/schemas/RAGDocument' - description: >- - List of documents to index in the RAG system - vector_store_id: - type: string - description: >- - ID of the vector database to store the document embeddings - chunk_size_in_tokens: - type: integer - description: >- - (Optional) Size in tokens for document chunking during indexing - additionalProperties: false - required: - - documents - - vector_store_id - - chunk_size_in_tokens - title: InsertRequest - DefaultRAGQueryGeneratorConfig: - type: object - properties: - type: - type: string - const: default - default: default - description: >- - Type of query generator, always 'default' - separator: - type: string - default: ' ' - description: >- - String separator used to join query terms - additionalProperties: false - required: - - type - - separator - title: DefaultRAGQueryGeneratorConfig - description: >- - Configuration for the default RAG query generator. - LLMRAGQueryGeneratorConfig: - type: object - properties: - type: - type: string - const: llm - default: llm - description: Type of query generator, always 'llm' - model: - type: string - description: >- - Name of the language model to use for query generation - template: - type: string - description: >- - Template string for formatting the query generation prompt - additionalProperties: false - required: - - type - - model - - template - title: LLMRAGQueryGeneratorConfig - description: >- - Configuration for the LLM-based RAG query generator. - RAGQueryConfig: - type: object - properties: - query_generator_config: - oneOf: - - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig' - - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig' - discriminator: - propertyName: type - mapping: - default: '#/components/schemas/DefaultRAGQueryGeneratorConfig' - llm: '#/components/schemas/LLMRAGQueryGeneratorConfig' - description: Configuration for the query generator. - max_tokens_in_context: - type: integer - default: 4096 - description: Maximum number of tokens in the context. - max_chunks: - type: integer - default: 5 - description: Maximum number of chunks to retrieve. - chunk_template: - type: string - default: > - Result {index} - - Content: {chunk.content} - - Metadata: {metadata} - description: >- - Template for formatting each retrieved chunk in the context. Available - placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk - content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent: - {chunk.content}\nMetadata: {metadata}\n" - mode: - $ref: '#/components/schemas/RAGSearchMode' - default: vector - description: >- - Search mode for retrieval—either "vector", "keyword", or "hybrid". Default - "vector". - ranker: - $ref: '#/components/schemas/Ranker' - description: >- - Configuration for the ranker to use in hybrid search. Defaults to RRF - ranker. - additionalProperties: false - required: - - query_generator_config - - max_tokens_in_context - - max_chunks - - chunk_template - title: RAGQueryConfig - description: >- - Configuration for the RAG query generation. - RAGSearchMode: - type: string - enum: - - vector - - keyword - - hybrid - title: RAGSearchMode - description: >- - Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search - for semantic matching - KEYWORD: Uses keyword-based search for exact matching - - HYBRID: Combines both vector and keyword search for better results - RRFRanker: - type: object - properties: - type: - type: string - const: rrf - default: rrf - description: The type of ranker, always "rrf" - impact_factor: - type: number - default: 60.0 - description: >- - The impact factor for RRF scoring. Higher values give more weight to higher-ranked - results. Must be greater than 0 - additionalProperties: false - required: - - type - - impact_factor - title: RRFRanker - description: >- - Reciprocal Rank Fusion (RRF) ranker configuration. - Ranker: - oneOf: - - $ref: '#/components/schemas/RRFRanker' - - $ref: '#/components/schemas/WeightedRanker' - discriminator: - propertyName: type - mapping: - rrf: '#/components/schemas/RRFRanker' - weighted: '#/components/schemas/WeightedRanker' - WeightedRanker: - type: object - properties: - type: - type: string - const: weighted - default: weighted - description: The type of ranker, always "weighted" - alpha: - type: number - default: 0.5 - description: >- - Weight factor between 0 and 1. 0 means only use keyword scores, 1 means - only use vector scores, values in between blend both scores. - additionalProperties: false - required: - - type - - alpha - title: WeightedRanker - description: >- - Weighted ranker configuration that combines vector and keyword scores. - QueryRequest: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The query content to search for in the indexed documents - vector_store_ids: - type: array - items: - type: string - description: >- - List of vector database IDs to search within - query_config: - $ref: '#/components/schemas/RAGQueryConfig' - description: >- - (Optional) Configuration parameters for the query operation - additionalProperties: false - required: - - content - - vector_store_ids - title: QueryRequest - RAGQueryResult: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - (Optional) The retrieved content from the query - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Additional metadata about the query result - additionalProperties: false - required: - - metadata - title: RAGQueryResult - description: >- - Result of a RAG query containing retrieved content and metadata. ToolGroup: type: object properties: diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index c14661a5a..5d9917bfd 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -2055,69 +2055,6 @@ paths: schema: $ref: '#/components/schemas/URL' deprecated: false - /v1/tool-runtime/rag-tool/insert: - post: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolRuntime - summary: >- - Index documents so they can be used by the RAG system. - description: >- - Index documents so they can be used by the RAG system. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/InsertRequest' - required: true - deprecated: false - /v1/tool-runtime/rag-tool/query: - post: - responses: - '200': - description: >- - RAGQueryResult containing the retrieved content and metadata - content: - application/json: - schema: - $ref: '#/components/schemas/RAGQueryResult' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolRuntime - summary: >- - Query the RAG system for context; typically invoked by the agent. - description: >- - Query the RAG system for context; typically invoked by the agent. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/QueryRequest' - required: true - deprecated: false /v1/toolgroups: get: responses: @@ -9633,274 +9570,6 @@ components: title: ListToolDefsResponse description: >- Response containing a list of tool definitions. - RAGDocument: - type: object - properties: - document_id: - type: string - description: The unique identifier for the document. - content: - oneOf: - - type: string - - $ref: '#/components/schemas/InterleavedContentItem' - - type: array - items: - $ref: '#/components/schemas/InterleavedContentItem' - - $ref: '#/components/schemas/URL' - description: The content of the document. - mime_type: - type: string - description: The MIME type of the document. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Additional metadata for the document. - additionalProperties: false - required: - - document_id - - content - - metadata - title: RAGDocument - description: >- - A document to be used for document ingestion in the RAG Tool. - InsertRequest: - type: object - properties: - documents: - type: array - items: - $ref: '#/components/schemas/RAGDocument' - description: >- - List of documents to index in the RAG system - vector_store_id: - type: string - description: >- - ID of the vector database to store the document embeddings - chunk_size_in_tokens: - type: integer - description: >- - (Optional) Size in tokens for document chunking during indexing - additionalProperties: false - required: - - documents - - vector_store_id - - chunk_size_in_tokens - title: InsertRequest - DefaultRAGQueryGeneratorConfig: - type: object - properties: - type: - type: string - const: default - default: default - description: >- - Type of query generator, always 'default' - separator: - type: string - default: ' ' - description: >- - String separator used to join query terms - additionalProperties: false - required: - - type - - separator - title: DefaultRAGQueryGeneratorConfig - description: >- - Configuration for the default RAG query generator. - LLMRAGQueryGeneratorConfig: - type: object - properties: - type: - type: string - const: llm - default: llm - description: Type of query generator, always 'llm' - model: - type: string - description: >- - Name of the language model to use for query generation - template: - type: string - description: >- - Template string for formatting the query generation prompt - additionalProperties: false - required: - - type - - model - - template - title: LLMRAGQueryGeneratorConfig - description: >- - Configuration for the LLM-based RAG query generator. - RAGQueryConfig: - type: object - properties: - query_generator_config: - oneOf: - - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig' - - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig' - discriminator: - propertyName: type - mapping: - default: '#/components/schemas/DefaultRAGQueryGeneratorConfig' - llm: '#/components/schemas/LLMRAGQueryGeneratorConfig' - description: Configuration for the query generator. - max_tokens_in_context: - type: integer - default: 4096 - description: Maximum number of tokens in the context. - max_chunks: - type: integer - default: 5 - description: Maximum number of chunks to retrieve. - chunk_template: - type: string - default: > - Result {index} - - Content: {chunk.content} - - Metadata: {metadata} - description: >- - Template for formatting each retrieved chunk in the context. Available - placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk - content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent: - {chunk.content}\nMetadata: {metadata}\n" - mode: - $ref: '#/components/schemas/RAGSearchMode' - default: vector - description: >- - Search mode for retrieval—either "vector", "keyword", or "hybrid". Default - "vector". - ranker: - $ref: '#/components/schemas/Ranker' - description: >- - Configuration for the ranker to use in hybrid search. Defaults to RRF - ranker. - additionalProperties: false - required: - - query_generator_config - - max_tokens_in_context - - max_chunks - - chunk_template - title: RAGQueryConfig - description: >- - Configuration for the RAG query generation. - RAGSearchMode: - type: string - enum: - - vector - - keyword - - hybrid - title: RAGSearchMode - description: >- - Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search - for semantic matching - KEYWORD: Uses keyword-based search for exact matching - - HYBRID: Combines both vector and keyword search for better results - RRFRanker: - type: object - properties: - type: - type: string - const: rrf - default: rrf - description: The type of ranker, always "rrf" - impact_factor: - type: number - default: 60.0 - description: >- - The impact factor for RRF scoring. Higher values give more weight to higher-ranked - results. Must be greater than 0 - additionalProperties: false - required: - - type - - impact_factor - title: RRFRanker - description: >- - Reciprocal Rank Fusion (RRF) ranker configuration. - Ranker: - oneOf: - - $ref: '#/components/schemas/RRFRanker' - - $ref: '#/components/schemas/WeightedRanker' - discriminator: - propertyName: type - mapping: - rrf: '#/components/schemas/RRFRanker' - weighted: '#/components/schemas/WeightedRanker' - WeightedRanker: - type: object - properties: - type: - type: string - const: weighted - default: weighted - description: The type of ranker, always "weighted" - alpha: - type: number - default: 0.5 - description: >- - Weight factor between 0 and 1. 0 means only use keyword scores, 1 means - only use vector scores, values in between blend both scores. - additionalProperties: false - required: - - type - - alpha - title: WeightedRanker - description: >- - Weighted ranker configuration that combines vector and keyword scores. - QueryRequest: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The query content to search for in the indexed documents - vector_store_ids: - type: array - items: - type: string - description: >- - List of vector database IDs to search within - query_config: - $ref: '#/components/schemas/RAGQueryConfig' - description: >- - (Optional) Configuration parameters for the query operation - additionalProperties: false - required: - - content - - vector_store_ids - title: QueryRequest - RAGQueryResult: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - (Optional) The retrieved content from the query - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Additional metadata about the query result - additionalProperties: false - required: - - metadata - title: RAGQueryResult - description: >- - Result of a RAG query containing retrieved content and metadata. ToolGroup: type: object properties: diff --git a/src/llama_stack/apis/tools/rag_tool.py b/src/llama_stack/apis/tools/rag_tool.py index 4e43bb284..8bcc89bf0 100644 --- a/src/llama_stack/apis/tools/rag_tool.py +++ b/src/llama_stack/apis/tools/rag_tool.py @@ -5,18 +5,13 @@ # the root directory of this source tree. from enum import Enum, StrEnum -from typing import Annotated, Any, Literal, Protocol +from typing import Annotated, Any, Literal from pydantic import BaseModel, Field, field_validator -from typing_extensions import runtime_checkable from llama_stack.apis.common.content_types import URL, InterleavedContent -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.core.telemetry.trace_protocol import trace_protocol -from llama_stack.schema_utils import json_schema_type, register_schema, webmethod -@json_schema_type class RRFRanker(BaseModel): """ Reciprocal Rank Fusion (RRF) ranker configuration. @@ -30,7 +25,6 @@ class RRFRanker(BaseModel): impact_factor: float = Field(default=60.0, gt=0.0) # default of 60 for optimal performance -@json_schema_type class WeightedRanker(BaseModel): """ Weighted ranker configuration that combines vector and keyword scores. @@ -55,10 +49,8 @@ Ranker = Annotated[ RRFRanker | WeightedRanker, Field(discriminator="type"), ] -register_schema(Ranker, name="Ranker") -@json_schema_type class RAGDocument(BaseModel): """ A document to be used for document ingestion in the RAG Tool. @@ -75,7 +67,6 @@ class RAGDocument(BaseModel): metadata: dict[str, Any] = Field(default_factory=dict) -@json_schema_type class RAGQueryResult(BaseModel): """Result of a RAG query containing retrieved content and metadata. @@ -87,7 +78,6 @@ class RAGQueryResult(BaseModel): metadata: dict[str, Any] = Field(default_factory=dict) -@json_schema_type class RAGQueryGenerator(Enum): """Types of query generators for RAG systems. @@ -101,7 +91,6 @@ class RAGQueryGenerator(Enum): custom = "custom" -@json_schema_type class RAGSearchMode(StrEnum): """ Search modes for RAG query retrieval: @@ -115,7 +104,6 @@ class RAGSearchMode(StrEnum): HYBRID = "hybrid" -@json_schema_type class DefaultRAGQueryGeneratorConfig(BaseModel): """Configuration for the default RAG query generator. @@ -127,7 +115,6 @@ class DefaultRAGQueryGeneratorConfig(BaseModel): separator: str = " " -@json_schema_type class LLMRAGQueryGeneratorConfig(BaseModel): """Configuration for the LLM-based RAG query generator. @@ -145,10 +132,8 @@ RAGQueryGeneratorConfig = Annotated[ DefaultRAGQueryGeneratorConfig | LLMRAGQueryGeneratorConfig, Field(discriminator="type"), ] -register_schema(RAGQueryGeneratorConfig, name="RAGQueryGeneratorConfig") -@json_schema_type class RAGQueryConfig(BaseModel): """ Configuration for the RAG query generation. @@ -181,38 +166,3 @@ class RAGQueryConfig(BaseModel): if len(v) == 0: raise ValueError("chunk_template must not be empty") return v - - -@runtime_checkable -@trace_protocol -class RAGToolRuntime(Protocol): - @webmethod(route="/tool-runtime/rag-tool/insert", method="POST", level=LLAMA_STACK_API_V1) - async def insert( - self, - documents: list[RAGDocument], - vector_store_id: str, - chunk_size_in_tokens: int = 512, - ) -> None: - """Index documents so they can be used by the RAG system. - - :param documents: List of documents to index in the RAG system - :param vector_store_id: ID of the vector database to store the document embeddings - :param chunk_size_in_tokens: (Optional) Size in tokens for document chunking during indexing - """ - ... - - @webmethod(route="/tool-runtime/rag-tool/query", method="POST", level=LLAMA_STACK_API_V1) - async def query( - self, - content: InterleavedContent, - vector_store_ids: list[str], - query_config: RAGQueryConfig | None = None, - ) -> RAGQueryResult: - """Query the RAG system for context; typically invoked by the agent. - - :param content: The query content to search for in the indexed documents - :param vector_store_ids: List of vector database IDs to search within - :param query_config: (Optional) Configuration parameters for the query operation - :returns: RAGQueryResult containing the retrieved content and metadata - """ - ... diff --git a/src/llama_stack/apis/tools/tools.py b/src/llama_stack/apis/tools/tools.py index b13ac2f19..29065a713 100644 --- a/src/llama_stack/apis/tools/tools.py +++ b/src/llama_stack/apis/tools/tools.py @@ -16,8 +16,6 @@ from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.core.telemetry.trace_protocol import trace_protocol from llama_stack.schema_utils import json_schema_type, webmethod -from .rag_tool import RAGToolRuntime - @json_schema_type class ToolDef(BaseModel): @@ -195,8 +193,6 @@ class SpecialToolGroup(Enum): class ToolRuntime(Protocol): tool_store: ToolStore | None = None - rag_tool: RAGToolRuntime | None = None - # TODO: This needs to be renamed once OPEN API generator name conflict issue is fixed. @webmethod(route="/tool-runtime/list-tools", method="GET", level=LLAMA_STACK_API_V1) async def list_runtime_tools( diff --git a/src/llama_stack/core/routers/tool_runtime.py b/src/llama_stack/core/routers/tool_runtime.py index be4c13905..fb13d94a4 100644 --- a/src/llama_stack/core/routers/tool_runtime.py +++ b/src/llama_stack/core/routers/tool_runtime.py @@ -8,14 +8,9 @@ from typing import Any from llama_stack.apis.common.content_types import ( URL, - InterleavedContent, ) from llama_stack.apis.tools import ( ListToolDefsResponse, - RAGDocument, - RAGQueryConfig, - RAGQueryResult, - RAGToolRuntime, ToolRuntime, ) from llama_stack.log import get_logger @@ -26,36 +21,6 @@ logger = get_logger(name=__name__, category="core::routers") class ToolRuntimeRouter(ToolRuntime): - class RagToolImpl(RAGToolRuntime): - def __init__( - self, - routing_table: ToolGroupsRoutingTable, - ) -> None: - logger.debug("Initializing ToolRuntimeRouter.RagToolImpl") - self.routing_table = routing_table - - async def query( - self, - content: InterleavedContent, - vector_store_ids: list[str], - query_config: RAGQueryConfig | None = None, - ) -> RAGQueryResult: - logger.debug(f"ToolRuntimeRouter.RagToolImpl.query: {vector_store_ids}") - provider = await self.routing_table.get_provider_impl("knowledge_search") - return await provider.query(content, vector_store_ids, query_config) - - async def insert( - self, - documents: list[RAGDocument], - vector_store_id: str, - chunk_size_in_tokens: int = 512, - ) -> None: - logger.debug( - f"ToolRuntimeRouter.RagToolImpl.insert: {vector_store_id}, {len(documents)} documents, chunk_size={chunk_size_in_tokens}" - ) - provider = await self.routing_table.get_provider_impl("insert_into_memory") - return await provider.insert(documents, vector_store_id, chunk_size_in_tokens) - def __init__( self, routing_table: ToolGroupsRoutingTable, @@ -63,11 +28,6 @@ class ToolRuntimeRouter(ToolRuntime): logger.debug("Initializing ToolRuntimeRouter") self.routing_table = routing_table - # HACK ALERT this should be in sync with "get_all_api_endpoints()" - self.rag_tool = self.RagToolImpl(routing_table) - for method in ("query", "insert"): - setattr(self, f"rag_tool.{method}", getattr(self.rag_tool, method)) - async def initialize(self) -> None: logger.debug("ToolRuntimeRouter.initialize") pass diff --git a/src/llama_stack/core/server/routes.py b/src/llama_stack/core/server/routes.py index 48a961318..4f7ff2295 100644 --- a/src/llama_stack/core/server/routes.py +++ b/src/llama_stack/core/server/routes.py @@ -13,7 +13,6 @@ from aiohttp import hdrs from starlette.routing import Route from llama_stack.apis.datatypes import Api, ExternalApiSpec -from llama_stack.apis.tools import RAGToolRuntime, SpecialToolGroup from llama_stack.core.resolver import api_protocol_map from llama_stack.schema_utils import WebMethod @@ -25,33 +24,16 @@ RouteImpls = dict[str, PathImpl] RouteMatch = tuple[EndpointFunc, PathParams, str, WebMethod] -def toolgroup_protocol_map(): - return { - SpecialToolGroup.rag_tool: RAGToolRuntime, - } - - def get_all_api_routes( external_apis: dict[Api, ExternalApiSpec] | None = None, ) -> dict[Api, list[tuple[Route, WebMethod]]]: apis = {} protocols = api_protocol_map(external_apis) - toolgroup_protocols = toolgroup_protocol_map() for api, protocol in protocols.items(): routes = [] protocol_methods = inspect.getmembers(protocol, predicate=inspect.isfunction) - # HACK ALERT - if api == Api.tool_runtime: - for tool_group in SpecialToolGroup: - sub_protocol = toolgroup_protocols[tool_group] - sub_protocol_methods = inspect.getmembers(sub_protocol, predicate=inspect.isfunction) - for name, method in sub_protocol_methods: - if not hasattr(method, "__webmethod__"): - continue - protocol_methods.append((f"{tool_group.value}.{name}", method)) - for name, method in protocol_methods: # Get all webmethods for this method (supports multiple decorators) webmethods = getattr(method, "__webmethods__", []) diff --git a/src/llama_stack/core/stack.py b/src/llama_stack/core/stack.py index 2ff7db6eb..2ed0eccd2 100644 --- a/src/llama_stack/core/stack.py +++ b/src/llama_stack/core/stack.py @@ -31,7 +31,7 @@ from llama_stack.apis.safety import Safety from llama_stack.apis.scoring import Scoring from llama_stack.apis.scoring_functions import ScoringFunctions from llama_stack.apis.shields import Shields -from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime +from llama_stack.apis.tools import ToolGroups, ToolRuntime from llama_stack.apis.vector_io import VectorIO from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl from llama_stack.core.datatypes import Provider, SafetyConfig, StackRunConfig, VectorStoresConfig @@ -78,7 +78,6 @@ class LlamaStack( Inspect, ToolGroups, ToolRuntime, - RAGToolRuntime, Files, Prompts, Conversations, diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py index 3ee745bf1..6a59be0ca 100644 --- a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py +++ b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py @@ -27,7 +27,6 @@ from llama_stack.apis.tools import ( RAGDocument, RAGQueryConfig, RAGQueryResult, - RAGToolRuntime, ToolDef, ToolGroup, ToolInvocationResult, @@ -91,7 +90,7 @@ async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]: return content_str.encode("utf-8"), "text/plain" -class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRuntime): +class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime): def __init__( self, config: RagToolRuntimeConfig,