From 89f51a86dd1eada8de73b69469e6cd14174fbaf2 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 21 Jan 2025 17:09:04 -0800 Subject: [PATCH] update openapi generator --- .../openapi_generator/pyopenapi/operations.py | 6 + docs/resources/llama-stack-spec.html | 322 ++++++++++++++++++ docs/resources/llama-stack-spec.yaml | 209 ++++++++++++ llama_stack/distribution/stack.py | 3 +- 4 files changed, 539 insertions(+), 1 deletion(-) diff --git a/docs/openapi_generator/pyopenapi/operations.py b/docs/openapi_generator/pyopenapi/operations.py index 4cea9d970..abeb16936 100644 --- a/docs/openapi_generator/pyopenapi/operations.py +++ b/docs/openapi_generator/pyopenapi/operations.py @@ -172,10 +172,16 @@ def _get_endpoint_functions( def _get_defining_class(member_fn: str, derived_cls: type) -> type: "Find the class in which a member function is first defined in a class inheritance hierarchy." + # This import must be dynamic here + from llama_stack.apis.tools import RAGToolRuntime, ToolRuntime + # iterate in reverse member resolution order to find most specific class first for cls in reversed(inspect.getmro(derived_cls)): for name, _ in inspect.getmembers(cls, inspect.isfunction): if name == member_fn: + # HACK ALERT + if cls == RAGToolRuntime: + return ToolRuntime return cls raise ValidationError( diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html index ffa1df8e6..f00d7b291 100644 --- a/docs/resources/llama-stack-spec.html +++ b/docs/resources/llama-stack-spec.html @@ -1929,6 +1929,49 @@ } } }, + "/v1/tool-runtime/rag-tool/insert-documents": { + "post": { + "responses": { + "200": { + "description": "OK" + } + }, + "tags": [ + "ToolRuntime" + ], + "summary": "Index documents so they can be used by the RAG system", + "parameters": [ + { + "name": "X-LlamaStack-Provider-Data", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "X-LlamaStack-Client-Version", + "in": "header", + "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", + "required": false, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/InsertDocumentsRequest" + } + } + }, + "required": true + } + } + }, "/v1/tool-runtime/invoke": { "post": { "responses": { @@ -3039,6 +3082,56 @@ } } }, + "/v1/tool-runtime/rag-tool/query-context": { + "post": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RAGQueryResult" + } + } + } + } + }, + "tags": [ + "ToolRuntime" + ], + "summary": "Query the RAG system for context; typically invoked by the agent", + "parameters": [ + { + "name": "X-LlamaStack-Provider-Data", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "X-LlamaStack-Client-Version", + "in": "header", + "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", + "required": false, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/QueryContextRequest" + } + } + }, + "required": true + } + } + }, "/v1/telemetry/spans": { "get": { "responses": { @@ -6940,6 +7033,90 @@ "chunks" ] }, + "RAGDocument": { + "type": "object", + "properties": { + "document_id": { + "type": "string" + }, + "content": { + "oneOf": [ + { + "type": "string" + }, + { + "$ref": "#/components/schemas/InterleavedContentItem" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/InterleavedContentItem" + } + }, + { + "$ref": "#/components/schemas/URL" + } + ] + }, + "mime_type": { + "type": "string" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "document_id", + "content", + "metadata" + ] + }, + "InsertDocumentsRequest": { + "type": "object", + "properties": { + "documents": { + "type": "array", + "items": { + "$ref": "#/components/schemas/RAGDocument" + } + }, + "vector_db_id": { + "type": "string" + }, + "chunk_size_in_tokens": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "documents", + "vector_db_id", + "chunk_size_in_tokens" + ] + }, "InvokeToolRequest": { "type": "object", "properties": { @@ -7804,6 +7981,111 @@ "scores" ] }, + "DefaultRAGQueryGeneratorConfig": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "default", + "default": "default" + }, + "separator": { + "type": "string", + "default": " " + } + }, + "additionalProperties": false, + "required": [ + "type", + "separator" + ] + }, + "LLMRAGQueryGeneratorConfig": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "llm", + "default": "llm" + }, + "model": { + "type": "string" + }, + "template": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "type", + "model", + "template" + ] + }, + "RAGQueryConfig": { + "type": "object", + "properties": { + "query_generator_config": { + "$ref": "#/components/schemas/RAGQueryGeneratorConfig" + }, + "max_tokens_in_context": { + "type": "integer", + "default": 4096 + }, + "max_chunks": { + "type": "integer", + "default": 5 + } + }, + "additionalProperties": false, + "required": [ + "query_generator_config", + "max_tokens_in_context", + "max_chunks" + ] + }, + "RAGQueryGeneratorConfig": { + "oneOf": [ + { + "$ref": "#/components/schemas/DefaultRAGQueryGeneratorConfig" + }, + { + "$ref": "#/components/schemas/LLMRAGQueryGeneratorConfig" + } + ] + }, + "QueryContextRequest": { + "type": "object", + "properties": { + "content": { + "$ref": "#/components/schemas/InterleavedContent" + }, + "query_config": { + "$ref": "#/components/schemas/RAGQueryConfig" + }, + "vector_db_ids": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "required": [ + "content", + "query_config", + "vector_db_ids" + ] + }, + "RAGQueryResult": { + "type": "object", + "properties": { + "content": { + "$ref": "#/components/schemas/InterleavedContent" + } + }, + "additionalProperties": false + }, "QueryCondition": { "type": "object", "properties": { @@ -8902,6 +9184,10 @@ { "name": "Datasets" }, + { + "name": "DefaultRAGQueryGeneratorConfig", + "description": "" + }, { "name": "EfficiencyConfig", "description": "" @@ -8959,6 +9245,10 @@ "name": "InsertChunksRequest", "description": "" }, + { + "name": "InsertDocumentsRequest", + "description": "" + }, { "name": "Inspect" }, @@ -8990,6 +9280,10 @@ "name": "LLMAsJudgeScoringFnParams", "description": "" }, + { + "name": "LLMRAGQueryGeneratorConfig", + "description": "" + }, { "name": "ListDatasetsResponse", "description": "" @@ -9140,6 +9434,10 @@ "name": "QueryConditionOp", "description": "" }, + { + "name": "QueryContextRequest", + "description": "" + }, { "name": "QuerySpanTreeResponse", "description": "" @@ -9152,6 +9450,22 @@ "name": "QueryTracesResponse", "description": "" }, + { + "name": "RAGDocument", + "description": "" + }, + { + "name": "RAGQueryConfig", + "description": "" + }, + { + "name": "RAGQueryGeneratorConfig", + "description": "" + }, + { + "name": "RAGQueryResult", + "description": "" + }, { "name": "RegexParserScoringFnParams", "description": "" @@ -9531,6 +9845,7 @@ "DataConfig", "Dataset", "DatasetFormat", + "DefaultRAGQueryGeneratorConfig", "EfficiencyConfig", "EmbeddingsRequest", "EmbeddingsResponse", @@ -9543,6 +9858,7 @@ "ImageDelta", "InferenceStep", "InsertChunksRequest", + "InsertDocumentsRequest", "InterleavedContent", "InterleavedContentItem", "InvokeToolRequest", @@ -9550,6 +9866,7 @@ "JobStatus", "JsonType", "LLMAsJudgeScoringFnParams", + "LLMRAGQueryGeneratorConfig", "ListDatasetsResponse", "ListEvalTasksResponse", "ListModelsResponse", @@ -9586,9 +9903,14 @@ "QueryChunksResponse", "QueryCondition", "QueryConditionOp", + "QueryContextRequest", "QuerySpanTreeResponse", "QuerySpansResponse", "QueryTracesResponse", + "RAGDocument", + "RAGQueryConfig", + "RAGQueryGeneratorConfig", + "RAGQueryResult", "RegexParserScoringFnParams", "RegisterDatasetRequest", "RegisterEvalTaskRequest", diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml index 1678b1bb9..e1ae07c45 100644 --- a/docs/resources/llama-stack-spec.yaml +++ b/docs/resources/llama-stack-spec.yaml @@ -761,6 +761,20 @@ components: - instruct - dialog type: string + DefaultRAGQueryGeneratorConfig: + additionalProperties: false + properties: + separator: + default: ' ' + type: string + type: + const: default + default: default + type: string + required: + - type + - separator + type: object EfficiencyConfig: additionalProperties: false properties: @@ -995,6 +1009,22 @@ components: - vector_db_id - chunks type: object + InsertDocumentsRequest: + additionalProperties: false + properties: + chunk_size_in_tokens: + type: integer + documents: + items: + $ref: '#/components/schemas/RAGDocument' + type: array + vector_db_id: + type: string + required: + - documents + - vector_db_id + - chunk_size_in_tokens + type: object InterleavedContent: oneOf: - type: string @@ -1073,6 +1103,22 @@ components: - type - judge_model type: object + LLMRAGQueryGeneratorConfig: + additionalProperties: false + properties: + model: + type: string + template: + type: string + type: + const: llm + default: llm + type: string + required: + - type + - model + - template + type: object ListDatasetsResponse: additionalProperties: false properties: @@ -1664,6 +1710,22 @@ components: - gt - lt type: string + QueryContextRequest: + additionalProperties: false + properties: + content: + $ref: '#/components/schemas/InterleavedContent' + query_config: + $ref: '#/components/schemas/RAGQueryConfig' + vector_db_ids: + items: + type: string + type: array + required: + - content + - query_config + - vector_db_ids + type: object QuerySpanTreeResponse: additionalProperties: false properties: @@ -1694,6 +1756,62 @@ components: required: - data type: object + RAGDocument: + additionalProperties: false + properties: + content: + oneOf: + - type: string + - $ref: '#/components/schemas/InterleavedContentItem' + - items: + $ref: '#/components/schemas/InterleavedContentItem' + type: array + - $ref: '#/components/schemas/URL' + document_id: + type: string + metadata: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + mime_type: + type: string + required: + - document_id + - content + - metadata + type: object + RAGQueryConfig: + additionalProperties: false + properties: + max_chunks: + default: 5 + type: integer + max_tokens_in_context: + default: 4096 + type: integer + query_generator_config: + $ref: '#/components/schemas/RAGQueryGeneratorConfig' + required: + - query_generator_config + - max_tokens_in_context + - max_chunks + type: object + RAGQueryGeneratorConfig: + oneOf: + - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig' + - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig' + RAGQueryResult: + additionalProperties: false + properties: + content: + $ref: '#/components/schemas/InterleavedContent' + type: object RegexParserScoringFnParams: additionalProperties: false properties: @@ -5058,6 +5176,68 @@ paths: description: OK tags: - ToolRuntime + /v1/tool-runtime/rag-tool/insert-documents: + post: + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-Provider-Data + required: false + schema: + type: string + - description: Version of the client making the request. This is used to ensure + that the client and server are compatible. + in: header + name: X-LlamaStack-Client-Version + required: false + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/InsertDocumentsRequest' + required: true + responses: + '200': + description: OK + summary: Index documents so they can be used by the RAG system + tags: + - ToolRuntime + /v1/tool-runtime/rag-tool/query-context: + post: + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-Provider-Data + required: false + schema: + type: string + - description: Version of the client making the request. This is used to ensure + that the client and server are compatible. + in: header + name: X-LlamaStack-Client-Version + required: false + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/QueryContextRequest' + required: true + responses: + '200': + content: + application/json: + schema: + $ref: '#/components/schemas/RAGQueryResult' + description: OK + summary: Query the RAG system for context; typically invoked by the agent + tags: + - ToolRuntime /v1/toolgroups: get: parameters: @@ -5596,6 +5776,9 @@ tags: name: DatasetFormat - name: DatasetIO - name: Datasets +- description: + name: DefaultRAGQueryGeneratorConfig - description: name: EfficiencyConfig @@ -5631,6 +5814,9 @@ tags: - description: name: InsertChunksRequest +- description: + name: InsertDocumentsRequest - name: Inspect - description: @@ -5650,6 +5836,9 @@ tags: - description: name: LLMAsJudgeScoringFnParams +- description: + name: LLMRAGQueryGeneratorConfig - description: name: ListDatasetsResponse @@ -5754,6 +5943,9 @@ tags: - description: name: QueryConditionOp +- description: + name: QueryContextRequest - description: name: QuerySpanTreeResponse @@ -5763,6 +5955,15 @@ tags: - description: name: QueryTracesResponse +- description: + name: RAGDocument +- description: + name: RAGQueryConfig +- description: + name: RAGQueryGeneratorConfig +- description: + name: RAGQueryResult - description: name: RegexParserScoringFnParams @@ -6031,6 +6232,7 @@ x-tagGroups: - DataConfig - Dataset - DatasetFormat + - DefaultRAGQueryGeneratorConfig - EfficiencyConfig - EmbeddingsRequest - EmbeddingsResponse @@ -6043,6 +6245,7 @@ x-tagGroups: - ImageDelta - InferenceStep - InsertChunksRequest + - InsertDocumentsRequest - InterleavedContent - InterleavedContentItem - InvokeToolRequest @@ -6050,6 +6253,7 @@ x-tagGroups: - JobStatus - JsonType - LLMAsJudgeScoringFnParams + - LLMRAGQueryGeneratorConfig - ListDatasetsResponse - ListEvalTasksResponse - ListModelsResponse @@ -6086,9 +6290,14 @@ x-tagGroups: - QueryChunksResponse - QueryCondition - QueryConditionOp + - QueryContextRequest - QuerySpanTreeResponse - QuerySpansResponse - QueryTracesResponse + - RAGDocument + - RAGQueryConfig + - RAGQueryGeneratorConfig + - RAGQueryResult - RegexParserScoringFnParams - RegisterDatasetRequest - RegisterEvalTaskRequest diff --git a/llama_stack/distribution/stack.py b/llama_stack/distribution/stack.py index 180ec0ecc..f0c34dba4 100644 --- a/llama_stack/distribution/stack.py +++ b/llama_stack/distribution/stack.py @@ -29,7 +29,7 @@ from llama_stack.apis.scoring_functions import ScoringFunctions from llama_stack.apis.shields import Shields from llama_stack.apis.synthetic_data_generation import SyntheticDataGeneration from llama_stack.apis.telemetry import Telemetry -from llama_stack.apis.tools import ToolGroups, ToolRuntime +from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime from llama_stack.apis.vector_dbs import VectorDBs from llama_stack.apis.vector_io import VectorIO from llama_stack.distribution.datatypes import StackRunConfig @@ -62,6 +62,7 @@ class LlamaStack( Inspect, ToolGroups, ToolRuntime, + RAGToolRuntime, ): pass