From 460dc8a72a92e906861fb7d7cc13f8553153bee6 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 21 Jan 2025 15:56:48 -0800 Subject: [PATCH] bug fix, generate openapi spec --- docs/resources/llama-stack-spec.html | 999 +++++++----------- docs/resources/llama-stack-spec.yaml | 812 ++++++-------- llama_stack/distribution/routers/routers.py | 2 +- llama_stack/distribution/server/endpoints.py | 2 + .../agents/meta_reference/agent_instance.py | 2 +- 5 files changed, 680 insertions(+), 1137 deletions(-) diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html index 459a53888..ffa1df8e6 100644 --- a/docs/resources/llama-stack-spec.html +++ b/docs/resources/llama-stack-spec.html @@ -1108,98 +1108,6 @@ ] } }, - "/v1/memory-banks/{memory_bank_id}": { - "get": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/MemoryBank" - }, - { - "type": "null" - } - ] - } - } - } - } - }, - "tags": [ - "MemoryBanks" - ], - "parameters": [ - { - "name": "memory_bank_id", - "in": "path", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "X-LlamaStack-Provider-Data", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - }, - { - "name": "X-LlamaStack-Client-Version", - "in": "header", - "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", - "required": false, - "schema": { - "type": "string" - } - } - ] - }, - "delete": { - "responses": { - "200": { - "description": "OK" - } - }, - "tags": [ - "MemoryBanks" - ], - "parameters": [ - { - "name": "memory_bank_id", - "in": "path", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "X-LlamaStack-Provider-Data", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - }, - { - "name": "X-LlamaStack-Client-Version", - "in": "header", - "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", - "required": false, - "schema": { - "type": "string" - } - } - ] - } - }, "/v1/models/{model_id}": { "get": { "responses": { @@ -1848,6 +1756,98 @@ ] } }, + "/v1/vector-dbs/{vector_db_id}": { + "get": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "oneOf": [ + { + "$ref": "#/components/schemas/VectorDB" + }, + { + "type": "null" + } + ] + } + } + } + } + }, + "tags": [ + "VectorDBs" + ], + "parameters": [ + { + "name": "vector_db_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "X-LlamaStack-Provider-Data", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "X-LlamaStack-Client-Version", + "in": "header", + "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", + "required": false, + "schema": { + "type": "string" + } + } + ] + }, + "delete": { + "responses": { + "200": { + "description": "OK" + } + }, + "tags": [ + "VectorDBs" + ], + "parameters": [ + { + "name": "vector_db_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "X-LlamaStack-Provider-Data", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "X-LlamaStack-Client-Version", + "in": "header", + "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", + "required": false, + "schema": { + "type": "string" + } + } + ] + } + }, "/v1/health": { "get": { "responses": { @@ -1887,7 +1887,7 @@ ] } }, - "/v1/memory/insert": { + "/v1/vector-io/insert": { "post": { "responses": { "200": { @@ -1895,7 +1895,7 @@ } }, "tags": [ - "Memory" + "VectorIO" ], "parameters": [ { @@ -1921,7 +1921,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/InsertDocumentsRequest" + "$ref": "#/components/schemas/InsertChunksRequest" } } }, @@ -2300,105 +2300,6 @@ } } }, - "/v1/memory-banks": { - "get": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ListMemoryBanksResponse" - } - } - } - } - }, - "tags": [ - "MemoryBanks" - ], - "parameters": [ - { - "name": "X-LlamaStack-Provider-Data", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - }, - { - "name": "X-LlamaStack-Client-Version", - "in": "header", - "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", - "required": false, - "schema": { - "type": "string" - } - } - ] - }, - "post": { - "responses": { - "200": { - "description": "", - "content": { - "application/json": { - "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/VectorMemoryBank" - }, - { - "$ref": "#/components/schemas/KeyValueMemoryBank" - }, - { - "$ref": "#/components/schemas/KeywordMemoryBank" - }, - { - "$ref": "#/components/schemas/GraphMemoryBank" - } - ] - } - } - } - } - }, - "tags": [ - "MemoryBanks" - ], - "parameters": [ - { - "name": "X-LlamaStack-Provider-Data", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - }, - { - "name": "X-LlamaStack-Client-Version", - "in": "header", - "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", - "required": false, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/RegisterMemoryBankRequest" - } - } - }, - "required": true - } - } - }, "/v1/models": { "get": { "responses": { @@ -2912,6 +2813,92 @@ ] } }, + "/v1/vector-dbs": { + "get": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ListVectorDBsResponse" + } + } + } + } + }, + "tags": [ + "VectorDBs" + ], + "parameters": [ + { + "name": "X-LlamaStack-Provider-Data", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "X-LlamaStack-Client-Version", + "in": "header", + "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", + "required": false, + "schema": { + "type": "string" + } + } + ] + }, + "post": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VectorDB" + } + } + } + } + }, + "tags": [ + "VectorDBs" + ], + "parameters": [ + { + "name": "X-LlamaStack-Provider-Data", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "X-LlamaStack-Client-Version", + "in": "header", + "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", + "required": false, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RegisterVectorDbRequest" + } + } + }, + "required": true + } + } + }, "/v1/telemetry/events": { "post": { "responses": { @@ -3003,7 +2990,7 @@ } } }, - "/v1/memory/query": { + "/v1/vector-io/query": { "post": { "responses": { "200": { @@ -3011,14 +2998,14 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/QueryDocumentsResponse" + "$ref": "#/components/schemas/QueryChunksResponse" } } } } }, "tags": [ - "Memory" + "VectorIO" ], "parameters": [ { @@ -3044,7 +3031,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/QueryDocumentsRequest" + "$ref": "#/components/schemas/QueryChunksRequest" } } }, @@ -5851,118 +5838,6 @@ "aggregated_results" ] }, - "GraphMemoryBank": { - "type": "object", - "properties": { - "identifier": { - "type": "string" - }, - "provider_resource_id": { - "type": "string" - }, - "provider_id": { - "type": "string" - }, - "type": { - "type": "string", - "const": "memory_bank", - "default": "memory_bank" - }, - "memory_bank_type": { - "type": "string", - "const": "graph", - "default": "graph" - } - }, - "additionalProperties": false, - "required": [ - "identifier", - "provider_resource_id", - "provider_id", - "type", - "memory_bank_type" - ] - }, - "KeyValueMemoryBank": { - "type": "object", - "properties": { - "identifier": { - "type": "string" - }, - "provider_resource_id": { - "type": "string" - }, - "provider_id": { - "type": "string" - }, - "type": { - "type": "string", - "const": "memory_bank", - "default": "memory_bank" - }, - "memory_bank_type": { - "type": "string", - "const": "keyvalue", - "default": "keyvalue" - } - }, - "additionalProperties": false, - "required": [ - "identifier", - "provider_resource_id", - "provider_id", - "type", - "memory_bank_type" - ] - }, - "KeywordMemoryBank": { - "type": "object", - "properties": { - "identifier": { - "type": "string" - }, - "provider_resource_id": { - "type": "string" - }, - "provider_id": { - "type": "string" - }, - "type": { - "type": "string", - "const": "memory_bank", - "default": "memory_bank" - }, - "memory_bank_type": { - "type": "string", - "const": "keyword", - "default": "keyword" - } - }, - "additionalProperties": false, - "required": [ - "identifier", - "provider_resource_id", - "provider_id", - "type", - "memory_bank_type" - ] - }, - "MemoryBank": { - "oneOf": [ - { - "$ref": "#/components/schemas/VectorMemoryBank" - }, - { - "$ref": "#/components/schemas/KeyValueMemoryBank" - }, - { - "$ref": "#/components/schemas/KeywordMemoryBank" - }, - { - "$ref": "#/components/schemas/GraphMemoryBank" - } - ] - }, "Session": { "type": "object", "properties": { @@ -5981,9 +5856,6 @@ "started_at": { "type": "string", "format": "date-time" - }, - "memory_bank": { - "$ref": "#/components/schemas/MemoryBank" } }, "additionalProperties": false, @@ -5995,53 +5867,6 @@ ], "title": "A single session of an interaction with an Agentic System." }, - "VectorMemoryBank": { - "type": "object", - "properties": { - "identifier": { - "type": "string" - }, - "provider_resource_id": { - "type": "string" - }, - "provider_id": { - "type": "string" - }, - "type": { - "type": "string", - "const": "memory_bank", - "default": "memory_bank" - }, - "memory_bank_type": { - "type": "string", - "const": "vector", - "default": "vector" - }, - "embedding_model": { - "type": "string" - }, - "chunk_size_in_tokens": { - "type": "integer" - }, - "embedding_dimension": { - "type": "integer", - "default": 384 - }, - "overlap_size_in_tokens": { - "type": "integer" - } - }, - "additionalProperties": false, - "required": [ - "identifier", - "provider_resource_id", - "provider_id", - "type", - "memory_bank_type", - "embedding_model", - "chunk_size_in_tokens" - ] - }, "AgentStepResponse": { "type": "object", "properties": { @@ -7012,6 +6837,40 @@ "data" ] }, + "VectorDB": { + "type": "object", + "properties": { + "identifier": { + "type": "string" + }, + "provider_resource_id": { + "type": "string" + }, + "provider_id": { + "type": "string" + }, + "type": { + "type": "string", + "const": "vector_db", + "default": "vector_db" + }, + "embedding_model": { + "type": "string" + }, + "embedding_dimension": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "identifier", + "provider_resource_id", + "provider_id", + "type", + "embedding_model", + "embedding_dimension" + ] + }, "HealthInfo": { "type": "object", "properties": { @@ -7024,77 +6883,51 @@ "status" ] }, - "MemoryBankDocument": { + "InsertChunksRequest": { "type": "object", "properties": { - "document_id": { + "vector_db_id": { "type": "string" }, - "content": { - "oneOf": [ - { - "type": "string" - }, - { - "$ref": "#/components/schemas/InterleavedContentItem" - }, - { - "type": "array", - "items": { - "$ref": "#/components/schemas/InterleavedContentItem" - } - }, - { - "$ref": "#/components/schemas/URL" - } - ] - }, - "mime_type": { - "type": "string" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "document_id", - "content", - "metadata" - ] - }, - "InsertDocumentsRequest": { - "type": "object", - "properties": { - "bank_id": { - "type": "string" - }, - "documents": { + "chunks": { "type": "array", "items": { - "$ref": "#/components/schemas/MemoryBankDocument" + "type": "object", + "properties": { + "content": { + "$ref": "#/components/schemas/InterleavedContent" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "content", + "metadata" + ] } }, "ttl_seconds": { @@ -7103,8 +6936,8 @@ }, "additionalProperties": false, "required": [ - "bank_id", - "documents" + "vector_db_id", + "chunks" ] }, "InvokeToolRequest": { @@ -7113,7 +6946,7 @@ "tool_name": { "type": "string" }, - "args": { + "kwargs": { "type": "object", "additionalProperties": { "oneOf": [ @@ -7142,7 +6975,7 @@ "additionalProperties": false, "required": [ "tool_name", - "args" + "kwargs" ] }, "ToolInvocationResult": { @@ -7193,21 +7026,6 @@ "data" ] }, - "ListMemoryBanksResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/MemoryBank" - } - } - }, - "additionalProperties": false, - "required": [ - "data" - ] - }, "ListModelsResponse": { "type": "object", "properties": { @@ -7356,6 +7174,21 @@ "data" ] }, + "ListVectorDBsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/VectorDB" + } + } + }, + "additionalProperties": false, + "required": [ + "data" + ] + }, "LogSeverity": { "type": "string", "enum": [ @@ -7873,10 +7706,10 @@ "job_uuid" ] }, - "QueryDocumentsRequest": { + "QueryChunksRequest": { "type": "object", "properties": { - "bank_id": { + "vector_db_id": { "type": "string" }, "query": { @@ -7910,11 +7743,11 @@ }, "additionalProperties": false, "required": [ - "bank_id", + "vector_db_id", "query" ] }, - "QueryDocumentsResponse": { + "QueryChunksResponse": { "type": "object", "properties": { "chunks": { @@ -7925,18 +7758,36 @@ "content": { "$ref": "#/components/schemas/InterleavedContent" }, - "token_count": { - "type": "integer" - }, - "document_id": { - "type": "string" + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } } }, "additionalProperties": false, "required": [ "content", - "token_count", - "document_id" + "metadata" ] } }, @@ -8139,108 +7990,6 @@ "scoring_functions" ] }, - "GraphMemoryBankParams": { - "type": "object", - "properties": { - "memory_bank_type": { - "type": "string", - "const": "graph", - "default": "graph" - } - }, - "additionalProperties": false, - "required": [ - "memory_bank_type" - ] - }, - "KeyValueMemoryBankParams": { - "type": "object", - "properties": { - "memory_bank_type": { - "type": "string", - "const": "keyvalue", - "default": "keyvalue" - } - }, - "additionalProperties": false, - "required": [ - "memory_bank_type" - ] - }, - "KeywordMemoryBankParams": { - "type": "object", - "properties": { - "memory_bank_type": { - "type": "string", - "const": "keyword", - "default": "keyword" - } - }, - "additionalProperties": false, - "required": [ - "memory_bank_type" - ] - }, - "VectorMemoryBankParams": { - "type": "object", - "properties": { - "memory_bank_type": { - "type": "string", - "const": "vector", - "default": "vector" - }, - "embedding_model": { - "type": "string" - }, - "chunk_size_in_tokens": { - "type": "integer" - }, - "overlap_size_in_tokens": { - "type": "integer" - } - }, - "additionalProperties": false, - "required": [ - "memory_bank_type", - "embedding_model", - "chunk_size_in_tokens" - ] - }, - "RegisterMemoryBankRequest": { - "type": "object", - "properties": { - "memory_bank_id": { - "type": "string" - }, - "params": { - "oneOf": [ - { - "$ref": "#/components/schemas/VectorMemoryBankParams" - }, - { - "$ref": "#/components/schemas/KeyValueMemoryBankParams" - }, - { - "$ref": "#/components/schemas/KeywordMemoryBankParams" - }, - { - "$ref": "#/components/schemas/GraphMemoryBankParams" - } - ] - }, - "provider_id": { - "type": "string" - }, - "provider_memory_bank_id": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "memory_bank_id", - "params" - ] - }, "RegisterModelRequest": { "type": "object", "properties": { @@ -8413,6 +8162,31 @@ "provider_id" ] }, + "RegisterVectorDbRequest": { + "type": "object", + "properties": { + "vector_db_id": { + "type": "string" + }, + "embedding_model": { + "type": "string" + }, + "embedding_dimension": { + "type": "integer" + }, + "provider_id": { + "type": "string" + }, + "provider_vector_db_id": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "vector_db_id", + "embedding_model" + ] + }, "RunEvalRequest": { "type": "object", "properties": { @@ -9158,14 +8932,6 @@ "name": "EvaluateRowsRequest", "description": "" }, - { - "name": "GraphMemoryBank", - "description": "" - }, - { - "name": "GraphMemoryBankParams", - "description": "" - }, { "name": "GreedySamplingStrategy", "description": "" @@ -9190,8 +8956,8 @@ "description": "" }, { - "name": "InsertDocumentsRequest", - "description": "" + "name": "InsertChunksRequest", + "description": "" }, { "name": "Inspect" @@ -9220,22 +8986,6 @@ "name": "JsonType", "description": "" }, - { - "name": "KeyValueMemoryBank", - "description": "" - }, - { - "name": "KeyValueMemoryBankParams", - "description": "" - }, - { - "name": "KeywordMemoryBank", - "description": "" - }, - { - "name": "KeywordMemoryBankParams", - "description": "" - }, { "name": "LLMAsJudgeScoringFnParams", "description": "" @@ -9248,10 +8998,6 @@ "name": "ListEvalTasksResponse", "description": "" }, - { - "name": "ListMemoryBanksResponse", - "description": "" - }, { "name": "ListModelsResponse", "description": "" @@ -9284,6 +9030,10 @@ "name": "ListToolsResponse", "description": "" }, + { + "name": "ListVectorDBsResponse", + "description": "" + }, { "name": "LogEventRequest", "description": "" @@ -9296,20 +9046,6 @@ "name": "LoraFinetuningConfig", "description": "" }, - { - "name": "Memory" - }, - { - "name": "MemoryBank", - "description": "" - }, - { - "name": "MemoryBankDocument", - "description": "" - }, - { - "name": "MemoryBanks" - }, { "name": "MemoryRetrievalStep", "description": "" @@ -9388,6 +9124,14 @@ "name": "QATFinetuningConfig", "description": "" }, + { + "name": "QueryChunksRequest", + "description": "" + }, + { + "name": "QueryChunksResponse", + "description": "" + }, { "name": "QueryCondition", "description": "" @@ -9396,14 +9140,6 @@ "name": "QueryConditionOp", "description": "" }, - { - "name": "QueryDocumentsRequest", - "description": "" - }, - { - "name": "QueryDocumentsResponse", - "description": "" - }, { "name": "QuerySpanTreeResponse", "description": "" @@ -9428,10 +9164,6 @@ "name": "RegisterEvalTaskRequest", "description": "" }, - { - "name": "RegisterMemoryBankRequest", - "description": "" - }, { "name": "RegisterModelRequest", "description": "" @@ -9448,6 +9180,10 @@ "name": "RegisterToolGroupRequest", "description": "" }, + { + "name": "RegisterVectorDbRequest", + "description": "" + }, { "name": "ResponseFormat", "description": "" @@ -9701,12 +9437,14 @@ "description": "" }, { - "name": "VectorMemoryBank", - "description": "" + "name": "VectorDB", + "description": "" }, { - "name": "VectorMemoryBankParams", - "description": "" + "name": "VectorDBs" + }, + { + "name": "VectorIO" }, { "name": "VersionInfo", @@ -9729,8 +9467,6 @@ "EvalTasks", "Inference", "Inspect", - "Memory", - "MemoryBanks", "Models", "PostTraining (Coming Soon)", "Safety", @@ -9740,7 +9476,9 @@ "SyntheticDataGeneration (Coming Soon)", "Telemetry", "ToolGroups", - "ToolRuntime" + "ToolRuntime", + "VectorDBs", + "VectorIO" ] }, { @@ -9799,28 +9537,21 @@ "EvalTask", "EvaluateResponse", "EvaluateRowsRequest", - "GraphMemoryBank", - "GraphMemoryBankParams", "GreedySamplingStrategy", "HealthInfo", "ImageContentItem", "ImageDelta", "InferenceStep", - "InsertDocumentsRequest", + "InsertChunksRequest", "InterleavedContent", "InterleavedContentItem", "InvokeToolRequest", "Job", "JobStatus", "JsonType", - "KeyValueMemoryBank", - "KeyValueMemoryBankParams", - "KeywordMemoryBank", - "KeywordMemoryBankParams", "LLMAsJudgeScoringFnParams", "ListDatasetsResponse", "ListEvalTasksResponse", - "ListMemoryBanksResponse", "ListModelsResponse", "ListPostTrainingJobsResponse", "ListProvidersResponse", @@ -9829,11 +9560,10 @@ "ListShieldsResponse", "ListToolGroupsResponse", "ListToolsResponse", + "ListVectorDBsResponse", "LogEventRequest", "LogSeverity", "LoraFinetuningConfig", - "MemoryBank", - "MemoryBankDocument", "MemoryRetrievalStep", "Message", "MetricEvent", @@ -9852,21 +9582,21 @@ "PreferenceOptimizeRequest", "ProviderInfo", "QATFinetuningConfig", + "QueryChunksRequest", + "QueryChunksResponse", "QueryCondition", "QueryConditionOp", - "QueryDocumentsRequest", - "QueryDocumentsResponse", "QuerySpanTreeResponse", "QuerySpansResponse", "QueryTracesResponse", "RegexParserScoringFnParams", "RegisterDatasetRequest", "RegisterEvalTaskRequest", - "RegisterMemoryBankRequest", "RegisterModelRequest", "RegisterScoringFunctionRequest", "RegisterShieldRequest", "RegisterToolGroupRequest", + "RegisterVectorDbRequest", "ResponseFormat", "RouteInfo", "RunEvalRequest", @@ -9924,8 +9654,7 @@ "UnionType", "UnstructuredLogEvent", "UserMessage", - "VectorMemoryBank", - "VectorMemoryBankParams", + "VectorDB", "VersionInfo", "ViolationLevel" ] diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml index 9aeac6db3..1678b1bb9 100644 --- a/docs/resources/llama-stack-spec.yaml +++ b/docs/resources/llama-stack-spec.yaml @@ -891,40 +891,6 @@ components: - scoring_functions - task_config type: object - GraphMemoryBank: - additionalProperties: false - properties: - identifier: - type: string - memory_bank_type: - const: graph - default: graph - type: string - provider_id: - type: string - provider_resource_id: - type: string - type: - const: memory_bank - default: memory_bank - type: string - required: - - identifier - - provider_resource_id - - provider_id - - type - - memory_bank_type - type: object - GraphMemoryBankParams: - additionalProperties: false - properties: - memory_bank_type: - const: graph - default: graph - type: string - required: - - memory_bank_type - type: object GreedySamplingStrategy: additionalProperties: false properties: @@ -997,20 +963,37 @@ components: - step_type - model_response type: object - InsertDocumentsRequest: + InsertChunksRequest: additionalProperties: false properties: - bank_id: - type: string - documents: + chunks: items: - $ref: '#/components/schemas/MemoryBankDocument' + additionalProperties: false + properties: + content: + $ref: '#/components/schemas/InterleavedContent' + metadata: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + required: + - content + - metadata + type: object type: array ttl_seconds: type: integer + vector_db_id: + type: string required: - - bank_id - - documents + - vector_db_id + - chunks type: object InterleavedContent: oneOf: @@ -1026,7 +1009,7 @@ components: InvokeToolRequest: additionalProperties: false properties: - args: + kwargs: additionalProperties: oneOf: - type: 'null' @@ -1040,7 +1023,7 @@ components: type: string required: - tool_name - - args + - kwargs type: object Job: additionalProperties: false @@ -1067,74 +1050,6 @@ components: required: - type type: object - KeyValueMemoryBank: - additionalProperties: false - properties: - identifier: - type: string - memory_bank_type: - const: keyvalue - default: keyvalue - type: string - provider_id: - type: string - provider_resource_id: - type: string - type: - const: memory_bank - default: memory_bank - type: string - required: - - identifier - - provider_resource_id - - provider_id - - type - - memory_bank_type - type: object - KeyValueMemoryBankParams: - additionalProperties: false - properties: - memory_bank_type: - const: keyvalue - default: keyvalue - type: string - required: - - memory_bank_type - type: object - KeywordMemoryBank: - additionalProperties: false - properties: - identifier: - type: string - memory_bank_type: - const: keyword - default: keyword - type: string - provider_id: - type: string - provider_resource_id: - type: string - type: - const: memory_bank - default: memory_bank - type: string - required: - - identifier - - provider_resource_id - - provider_id - - type - - memory_bank_type - type: object - KeywordMemoryBankParams: - additionalProperties: false - properties: - memory_bank_type: - const: keyword - default: keyword - type: string - required: - - memory_bank_type - type: object LLMAsJudgeScoringFnParams: additionalProperties: false properties: @@ -1178,16 +1093,6 @@ components: required: - data type: object - ListMemoryBanksResponse: - additionalProperties: false - properties: - data: - items: - $ref: '#/components/schemas/MemoryBank' - type: array - required: - - data - type: object ListModelsResponse: additionalProperties: false properties: @@ -1274,6 +1179,16 @@ components: required: - data type: object + ListVectorDBsResponse: + additionalProperties: false + properties: + data: + items: + $ref: '#/components/schemas/VectorDB' + type: array + required: + - data + type: object LogEventRequest: additionalProperties: false properties: @@ -1330,42 +1245,6 @@ components: - rank - alpha type: object - MemoryBank: - oneOf: - - $ref: '#/components/schemas/VectorMemoryBank' - - $ref: '#/components/schemas/KeyValueMemoryBank' - - $ref: '#/components/schemas/KeywordMemoryBank' - - $ref: '#/components/schemas/GraphMemoryBank' - MemoryBankDocument: - additionalProperties: false - properties: - content: - oneOf: - - type: string - - $ref: '#/components/schemas/InterleavedContentItem' - - items: - $ref: '#/components/schemas/InterleavedContentItem' - type: array - - $ref: '#/components/schemas/URL' - document_id: - type: string - metadata: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - mime_type: - type: string - required: - - document_id - - content - - metadata - type: object MemoryRetrievalStep: additionalProperties: false properties: @@ -1705,6 +1584,59 @@ components: - quantizer_name - group_size type: object + QueryChunksRequest: + additionalProperties: false + properties: + params: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + query: + $ref: '#/components/schemas/InterleavedContent' + vector_db_id: + type: string + required: + - vector_db_id + - query + type: object + QueryChunksResponse: + additionalProperties: false + properties: + chunks: + items: + additionalProperties: false + properties: + content: + $ref: '#/components/schemas/InterleavedContent' + metadata: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + required: + - content + - metadata + type: object + type: array + scores: + items: + type: number + type: array + required: + - chunks + - scores + type: object QueryCondition: additionalProperties: false properties: @@ -1732,54 +1664,6 @@ components: - gt - lt type: string - QueryDocumentsRequest: - additionalProperties: false - properties: - bank_id: - type: string - params: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - query: - $ref: '#/components/schemas/InterleavedContent' - required: - - bank_id - - query - type: object - QueryDocumentsResponse: - additionalProperties: false - properties: - chunks: - items: - additionalProperties: false - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - document_id: - type: string - token_count: - type: integer - required: - - content - - token_count - - document_id - type: object - type: array - scores: - items: - type: number - type: array - required: - - chunks - - scores - type: object QuerySpanTreeResponse: additionalProperties: false properties: @@ -1888,25 +1772,6 @@ components: - dataset_id - scoring_functions type: object - RegisterMemoryBankRequest: - additionalProperties: false - properties: - memory_bank_id: - type: string - params: - oneOf: - - $ref: '#/components/schemas/VectorMemoryBankParams' - - $ref: '#/components/schemas/KeyValueMemoryBankParams' - - $ref: '#/components/schemas/KeywordMemoryBankParams' - - $ref: '#/components/schemas/GraphMemoryBankParams' - provider_id: - type: string - provider_memory_bank_id: - type: string - required: - - memory_bank_id - - params - type: object RegisterModelRequest: additionalProperties: false properties: @@ -1999,6 +1864,23 @@ components: - toolgroup_id - provider_id type: object + RegisterVectorDbRequest: + additionalProperties: false + properties: + embedding_dimension: + type: integer + embedding_model: + type: string + provider_id: + type: string + provider_vector_db_id: + type: string + vector_db_id: + type: string + required: + - vector_db_id + - embedding_model + type: object ResponseFormat: oneOf: - additionalProperties: false @@ -2298,8 +2180,6 @@ components: Session: additionalProperties: false properties: - memory_bank: - $ref: '#/components/schemas/MemoryBank' session_id: type: string session_name: @@ -3202,58 +3082,30 @@ components: - role - content type: object - VectorMemoryBank: + VectorDB: additionalProperties: false properties: - chunk_size_in_tokens: - type: integer embedding_dimension: - default: 384 type: integer embedding_model: type: string identifier: type: string - memory_bank_type: - const: vector - default: vector - type: string - overlap_size_in_tokens: - type: integer provider_id: type: string provider_resource_id: type: string type: - const: memory_bank - default: memory_bank + const: vector_db + default: vector_db type: string required: - identifier - provider_resource_id - provider_id - type - - memory_bank_type - embedding_model - - chunk_size_in_tokens - type: object - VectorMemoryBankParams: - additionalProperties: false - properties: - chunk_size_in_tokens: - type: integer - embedding_model: - type: string - memory_bank_type: - const: vector - default: vector - type: string - overlap_size_in_tokens: - type: integer - required: - - memory_bank_type - - embedding_model - - chunk_size_in_tokens + - embedding_dimension type: object VersionInfo: additionalProperties: false @@ -4272,186 +4124,6 @@ paths: description: OK tags: - Inspect - /v1/memory-banks: - get: - parameters: - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-Provider-Data - required: false - schema: - type: string - - description: Version of the client making the request. This is used to ensure - that the client and server are compatible. - in: header - name: X-LlamaStack-Client-Version - required: false - schema: - type: string - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/ListMemoryBanksResponse' - description: OK - tags: - - MemoryBanks - post: - parameters: - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-Provider-Data - required: false - schema: - type: string - - description: Version of the client making the request. This is used to ensure - that the client and server are compatible. - in: header - name: X-LlamaStack-Client-Version - required: false - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/RegisterMemoryBankRequest' - required: true - responses: - '200': - content: - application/json: - schema: - oneOf: - - $ref: '#/components/schemas/VectorMemoryBank' - - $ref: '#/components/schemas/KeyValueMemoryBank' - - $ref: '#/components/schemas/KeywordMemoryBank' - - $ref: '#/components/schemas/GraphMemoryBank' - description: '' - tags: - - MemoryBanks - /v1/memory-banks/{memory_bank_id}: - delete: - parameters: - - in: path - name: memory_bank_id - required: true - schema: - type: string - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-Provider-Data - required: false - schema: - type: string - - description: Version of the client making the request. This is used to ensure - that the client and server are compatible. - in: header - name: X-LlamaStack-Client-Version - required: false - schema: - type: string - responses: - '200': - description: OK - tags: - - MemoryBanks - get: - parameters: - - in: path - name: memory_bank_id - required: true - schema: - type: string - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-Provider-Data - required: false - schema: - type: string - - description: Version of the client making the request. This is used to ensure - that the client and server are compatible. - in: header - name: X-LlamaStack-Client-Version - required: false - schema: - type: string - responses: - '200': - content: - application/json: - schema: - oneOf: - - $ref: '#/components/schemas/MemoryBank' - - type: 'null' - description: OK - tags: - - MemoryBanks - /v1/memory/insert: - post: - parameters: - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-Provider-Data - required: false - schema: - type: string - - description: Version of the client making the request. This is used to ensure - that the client and server are compatible. - in: header - name: X-LlamaStack-Client-Version - required: false - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/InsertDocumentsRequest' - required: true - responses: - '200': - description: OK - tags: - - Memory - /v1/memory/query: - post: - parameters: - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-Provider-Data - required: false - schema: - type: string - - description: Version of the client making the request. This is used to ensure - that the client and server are compatible. - in: header - name: X-LlamaStack-Client-Version - required: false - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/QueryDocumentsRequest' - required: true - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/QueryDocumentsResponse' - description: OK - tags: - - Memory /v1/models: get: parameters: @@ -5562,6 +5234,182 @@ paths: description: OK tags: - ToolGroups + /v1/vector-dbs: + get: + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-Provider-Data + required: false + schema: + type: string + - description: Version of the client making the request. This is used to ensure + that the client and server are compatible. + in: header + name: X-LlamaStack-Client-Version + required: false + schema: + type: string + responses: + '200': + content: + application/json: + schema: + $ref: '#/components/schemas/ListVectorDBsResponse' + description: OK + tags: + - VectorDBs + post: + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-Provider-Data + required: false + schema: + type: string + - description: Version of the client making the request. This is used to ensure + that the client and server are compatible. + in: header + name: X-LlamaStack-Client-Version + required: false + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RegisterVectorDbRequest' + required: true + responses: + '200': + content: + application/json: + schema: + $ref: '#/components/schemas/VectorDB' + description: OK + tags: + - VectorDBs + /v1/vector-dbs/{vector_db_id}: + delete: + parameters: + - in: path + name: vector_db_id + required: true + schema: + type: string + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-Provider-Data + required: false + schema: + type: string + - description: Version of the client making the request. This is used to ensure + that the client and server are compatible. + in: header + name: X-LlamaStack-Client-Version + required: false + schema: + type: string + responses: + '200': + description: OK + tags: + - VectorDBs + get: + parameters: + - in: path + name: vector_db_id + required: true + schema: + type: string + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-Provider-Data + required: false + schema: + type: string + - description: Version of the client making the request. This is used to ensure + that the client and server are compatible. + in: header + name: X-LlamaStack-Client-Version + required: false + schema: + type: string + responses: + '200': + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/VectorDB' + - type: 'null' + description: OK + tags: + - VectorDBs + /v1/vector-io/insert: + post: + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-Provider-Data + required: false + schema: + type: string + - description: Version of the client making the request. This is used to ensure + that the client and server are compatible. + in: header + name: X-LlamaStack-Client-Version + required: false + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/InsertChunksRequest' + required: true + responses: + '200': + description: OK + tags: + - VectorIO + /v1/vector-io/query: + post: + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-Provider-Data + required: false + schema: + type: string + - description: Version of the client making the request. This is used to ensure + that the client and server are compatible. + in: header + name: X-LlamaStack-Client-Version + required: false + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/QueryChunksRequest' + required: true + responses: + '200': + content: + application/json: + schema: + $ref: '#/components/schemas/QueryChunksResponse' + description: OK + tags: + - VectorIO /v1/version: get: parameters: @@ -5767,12 +5615,6 @@ tags: - description: name: EvaluateRowsRequest -- description: - name: GraphMemoryBank -- description: - name: GraphMemoryBankParams - description: name: GreedySamplingStrategy @@ -5786,9 +5628,9 @@ tags: - name: Inference - description: name: InferenceStep -- description: - name: InsertDocumentsRequest + name: InsertChunksRequest - name: Inspect - description: @@ -5805,18 +5647,6 @@ tags: name: JobStatus - description: name: JsonType -- description: - name: KeyValueMemoryBank -- description: - name: KeyValueMemoryBankParams -- description: - name: KeywordMemoryBank -- description: - name: KeywordMemoryBankParams - description: name: LLMAsJudgeScoringFnParams @@ -5826,9 +5656,6 @@ tags: - description: name: ListEvalTasksResponse -- description: - name: ListMemoryBanksResponse - description: name: ListModelsResponse @@ -5853,6 +5680,9 @@ tags: - description: name: ListToolsResponse +- description: + name: ListVectorDBsResponse - description: name: LogEventRequest @@ -5861,13 +5691,6 @@ tags: - description: name: LoraFinetuningConfig -- name: Memory -- description: - name: MemoryBank -- description: - name: MemoryBankDocument -- name: MemoryBanks - description: name: MemoryRetrievalStep @@ -5920,17 +5743,17 @@ tags: - description: name: QATFinetuningConfig +- description: + name: QueryChunksRequest +- description: + name: QueryChunksResponse - description: name: QueryCondition - description: name: QueryConditionOp -- description: - name: QueryDocumentsRequest -- description: - name: QueryDocumentsResponse - description: name: QuerySpanTreeResponse @@ -5949,9 +5772,6 @@ tags: - description: name: RegisterEvalTaskRequest -- description: - name: RegisterMemoryBankRequest - description: name: RegisterModelRequest @@ -5964,6 +5784,9 @@ tags: - description: name: RegisterToolGroupRequest +- description: + name: RegisterVectorDbRequest - description: name: ResponseFormat - description: @@ -6128,12 +5951,10 @@ tags: name: UnstructuredLogEvent - description: name: UserMessage -- description: - name: VectorMemoryBank -- description: - name: VectorMemoryBankParams +- description: + name: VectorDB +- name: VectorDBs +- name: VectorIO - description: name: VersionInfo - description: @@ -6149,8 +5970,6 @@ x-tagGroups: - EvalTasks - Inference - Inspect - - Memory - - MemoryBanks - Models - PostTraining (Coming Soon) - Safety @@ -6161,6 +5980,8 @@ x-tagGroups: - Telemetry - ToolGroups - ToolRuntime + - VectorDBs + - VectorIO - name: Types tags: - AgentCandidate @@ -6216,28 +6037,21 @@ x-tagGroups: - EvalTask - EvaluateResponse - EvaluateRowsRequest - - GraphMemoryBank - - GraphMemoryBankParams - GreedySamplingStrategy - HealthInfo - ImageContentItem - ImageDelta - InferenceStep - - InsertDocumentsRequest + - InsertChunksRequest - InterleavedContent - InterleavedContentItem - InvokeToolRequest - Job - JobStatus - JsonType - - KeyValueMemoryBank - - KeyValueMemoryBankParams - - KeywordMemoryBank - - KeywordMemoryBankParams - LLMAsJudgeScoringFnParams - ListDatasetsResponse - ListEvalTasksResponse - - ListMemoryBanksResponse - ListModelsResponse - ListPostTrainingJobsResponse - ListProvidersResponse @@ -6246,11 +6060,10 @@ x-tagGroups: - ListShieldsResponse - ListToolGroupsResponse - ListToolsResponse + - ListVectorDBsResponse - LogEventRequest - LogSeverity - LoraFinetuningConfig - - MemoryBank - - MemoryBankDocument - MemoryRetrievalStep - Message - MetricEvent @@ -6269,21 +6082,21 @@ x-tagGroups: - PreferenceOptimizeRequest - ProviderInfo - QATFinetuningConfig + - QueryChunksRequest + - QueryChunksResponse - QueryCondition - QueryConditionOp - - QueryDocumentsRequest - - QueryDocumentsResponse - QuerySpanTreeResponse - QuerySpansResponse - QueryTracesResponse - RegexParserScoringFnParams - RegisterDatasetRequest - RegisterEvalTaskRequest - - RegisterMemoryBankRequest - RegisterModelRequest - RegisterScoringFunctionRequest - RegisterShieldRequest - RegisterToolGroupRequest + - RegisterVectorDbRequest - ResponseFormat - RouteInfo - RunEvalRequest @@ -6341,7 +6154,6 @@ x-tagGroups: - UnionType - UnstructuredLogEvent - UserMessage - - VectorMemoryBank - - VectorMemoryBankParams + - VectorDB - VersionInfo - ViolationLevel diff --git a/llama_stack/distribution/routers/routers.py b/llama_stack/distribution/routers/routers.py index c2019af12..3ae9833dc 100644 --- a/llama_stack/distribution/routers/routers.py +++ b/llama_stack/distribution/routers/routers.py @@ -440,7 +440,7 @@ class ToolRuntimeRouter(ToolRuntime): ) -> None: self.routing_table = routing_table - # TODO: this should be in sync with "get_all_api_endpoints()" + # HACK ALERT this should be in sync with "get_all_api_endpoints()" # TODO: make sure rag_tool vs builtin::memory is correct everywhere self.rag_tool = self.RagToolImpl(routing_table) setattr(self, "rag_tool.query_context", self.rag_tool.query_context) diff --git a/llama_stack/distribution/server/endpoints.py b/llama_stack/distribution/server/endpoints.py index 745bcddea..180479e40 100644 --- a/llama_stack/distribution/server/endpoints.py +++ b/llama_stack/distribution/server/endpoints.py @@ -38,6 +38,8 @@ def get_all_api_endpoints() -> Dict[Api, List[ApiEndpoint]]: for api, protocol in protocols.items(): endpoints = [] protocol_methods = inspect.getmembers(protocol, predicate=inspect.isfunction) + + # HACK ALERT if api == Api.tool_runtime: for tool_group in SpecialToolGroup: sub_protocol = toolgroup_protocols[tool_group] diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py index 01a82643c..d49c1f749 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py +++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py @@ -970,7 +970,7 @@ async def execute_tool_call_maybe( result = await tool_runtime_api.invoke_tool( tool_name=name, - args=dict( + kwargs=dict( session_id=session_id, **tool_call_args, ),