diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html
index 459a53888..ffa1df8e6 100644
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
@@ -1108,98 +1108,6 @@
]
}
},
- "/v1/memory-banks/{memory_bank_id}": {
- "get": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "oneOf": [
- {
- "$ref": "#/components/schemas/MemoryBank"
- },
- {
- "type": "null"
- }
- ]
- }
- }
- }
- }
- },
- "tags": [
- "MemoryBanks"
- ],
- "parameters": [
- {
- "name": "memory_bank_id",
- "in": "path",
- "required": true,
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "X-LlamaStack-Provider-Data",
- "in": "header",
- "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
- "required": false,
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "X-LlamaStack-Client-Version",
- "in": "header",
- "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.",
- "required": false,
- "schema": {
- "type": "string"
- }
- }
- ]
- },
- "delete": {
- "responses": {
- "200": {
- "description": "OK"
- }
- },
- "tags": [
- "MemoryBanks"
- ],
- "parameters": [
- {
- "name": "memory_bank_id",
- "in": "path",
- "required": true,
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "X-LlamaStack-Provider-Data",
- "in": "header",
- "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
- "required": false,
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "X-LlamaStack-Client-Version",
- "in": "header",
- "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.",
- "required": false,
- "schema": {
- "type": "string"
- }
- }
- ]
- }
- },
"/v1/models/{model_id}": {
"get": {
"responses": {
@@ -1848,6 +1756,98 @@
]
}
},
+ "/v1/vector-dbs/{vector_db_id}": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/VectorDB"
+ },
+ {
+ "type": "null"
+ }
+ ]
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "VectorDBs"
+ ],
+ "parameters": [
+ {
+ "name": "vector_db_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "X-LlamaStack-Provider-Data",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "X-LlamaStack-Client-Version",
+ "in": "header",
+ "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ },
+ "delete": {
+ "responses": {
+ "200": {
+ "description": "OK"
+ }
+ },
+ "tags": [
+ "VectorDBs"
+ ],
+ "parameters": [
+ {
+ "name": "vector_db_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "X-LlamaStack-Provider-Data",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "X-LlamaStack-Client-Version",
+ "in": "header",
+ "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ },
"/v1/health": {
"get": {
"responses": {
@@ -1887,7 +1887,7 @@
]
}
},
- "/v1/memory/insert": {
+ "/v1/vector-io/insert": {
"post": {
"responses": {
"200": {
@@ -1895,7 +1895,7 @@
}
},
"tags": [
- "Memory"
+ "VectorIO"
],
"parameters": [
{
@@ -1921,7 +1921,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/InsertDocumentsRequest"
+ "$ref": "#/components/schemas/InsertChunksRequest"
}
}
},
@@ -2300,105 +2300,6 @@
}
}
},
- "/v1/memory-banks": {
- "get": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/ListMemoryBanksResponse"
- }
- }
- }
- }
- },
- "tags": [
- "MemoryBanks"
- ],
- "parameters": [
- {
- "name": "X-LlamaStack-Provider-Data",
- "in": "header",
- "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
- "required": false,
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "X-LlamaStack-Client-Version",
- "in": "header",
- "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.",
- "required": false,
- "schema": {
- "type": "string"
- }
- }
- ]
- },
- "post": {
- "responses": {
- "200": {
- "description": "",
- "content": {
- "application/json": {
- "schema": {
- "oneOf": [
- {
- "$ref": "#/components/schemas/VectorMemoryBank"
- },
- {
- "$ref": "#/components/schemas/KeyValueMemoryBank"
- },
- {
- "$ref": "#/components/schemas/KeywordMemoryBank"
- },
- {
- "$ref": "#/components/schemas/GraphMemoryBank"
- }
- ]
- }
- }
- }
- }
- },
- "tags": [
- "MemoryBanks"
- ],
- "parameters": [
- {
- "name": "X-LlamaStack-Provider-Data",
- "in": "header",
- "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
- "required": false,
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "X-LlamaStack-Client-Version",
- "in": "header",
- "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.",
- "required": false,
- "schema": {
- "type": "string"
- }
- }
- ],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/RegisterMemoryBankRequest"
- }
- }
- },
- "required": true
- }
- }
- },
"/v1/models": {
"get": {
"responses": {
@@ -2912,6 +2813,92 @@
]
}
},
+ "/v1/vector-dbs": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/ListVectorDBsResponse"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "VectorDBs"
+ ],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-Provider-Data",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "X-LlamaStack-Client-Version",
+ "in": "header",
+ "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ },
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/VectorDB"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "VectorDBs"
+ ],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-Provider-Data",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "X-LlamaStack-Client-Version",
+ "in": "header",
+ "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/RegisterVectorDbRequest"
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
"/v1/telemetry/events": {
"post": {
"responses": {
@@ -3003,7 +2990,7 @@
}
}
},
- "/v1/memory/query": {
+ "/v1/vector-io/query": {
"post": {
"responses": {
"200": {
@@ -3011,14 +2998,14 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/QueryDocumentsResponse"
+ "$ref": "#/components/schemas/QueryChunksResponse"
}
}
}
}
},
"tags": [
- "Memory"
+ "VectorIO"
],
"parameters": [
{
@@ -3044,7 +3031,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/QueryDocumentsRequest"
+ "$ref": "#/components/schemas/QueryChunksRequest"
}
}
},
@@ -5851,118 +5838,6 @@
"aggregated_results"
]
},
- "GraphMemoryBank": {
- "type": "object",
- "properties": {
- "identifier": {
- "type": "string"
- },
- "provider_resource_id": {
- "type": "string"
- },
- "provider_id": {
- "type": "string"
- },
- "type": {
- "type": "string",
- "const": "memory_bank",
- "default": "memory_bank"
- },
- "memory_bank_type": {
- "type": "string",
- "const": "graph",
- "default": "graph"
- }
- },
- "additionalProperties": false,
- "required": [
- "identifier",
- "provider_resource_id",
- "provider_id",
- "type",
- "memory_bank_type"
- ]
- },
- "KeyValueMemoryBank": {
- "type": "object",
- "properties": {
- "identifier": {
- "type": "string"
- },
- "provider_resource_id": {
- "type": "string"
- },
- "provider_id": {
- "type": "string"
- },
- "type": {
- "type": "string",
- "const": "memory_bank",
- "default": "memory_bank"
- },
- "memory_bank_type": {
- "type": "string",
- "const": "keyvalue",
- "default": "keyvalue"
- }
- },
- "additionalProperties": false,
- "required": [
- "identifier",
- "provider_resource_id",
- "provider_id",
- "type",
- "memory_bank_type"
- ]
- },
- "KeywordMemoryBank": {
- "type": "object",
- "properties": {
- "identifier": {
- "type": "string"
- },
- "provider_resource_id": {
- "type": "string"
- },
- "provider_id": {
- "type": "string"
- },
- "type": {
- "type": "string",
- "const": "memory_bank",
- "default": "memory_bank"
- },
- "memory_bank_type": {
- "type": "string",
- "const": "keyword",
- "default": "keyword"
- }
- },
- "additionalProperties": false,
- "required": [
- "identifier",
- "provider_resource_id",
- "provider_id",
- "type",
- "memory_bank_type"
- ]
- },
- "MemoryBank": {
- "oneOf": [
- {
- "$ref": "#/components/schemas/VectorMemoryBank"
- },
- {
- "$ref": "#/components/schemas/KeyValueMemoryBank"
- },
- {
- "$ref": "#/components/schemas/KeywordMemoryBank"
- },
- {
- "$ref": "#/components/schemas/GraphMemoryBank"
- }
- ]
- },
"Session": {
"type": "object",
"properties": {
@@ -5981,9 +5856,6 @@
"started_at": {
"type": "string",
"format": "date-time"
- },
- "memory_bank": {
- "$ref": "#/components/schemas/MemoryBank"
}
},
"additionalProperties": false,
@@ -5995,53 +5867,6 @@
],
"title": "A single session of an interaction with an Agentic System."
},
- "VectorMemoryBank": {
- "type": "object",
- "properties": {
- "identifier": {
- "type": "string"
- },
- "provider_resource_id": {
- "type": "string"
- },
- "provider_id": {
- "type": "string"
- },
- "type": {
- "type": "string",
- "const": "memory_bank",
- "default": "memory_bank"
- },
- "memory_bank_type": {
- "type": "string",
- "const": "vector",
- "default": "vector"
- },
- "embedding_model": {
- "type": "string"
- },
- "chunk_size_in_tokens": {
- "type": "integer"
- },
- "embedding_dimension": {
- "type": "integer",
- "default": 384
- },
- "overlap_size_in_tokens": {
- "type": "integer"
- }
- },
- "additionalProperties": false,
- "required": [
- "identifier",
- "provider_resource_id",
- "provider_id",
- "type",
- "memory_bank_type",
- "embedding_model",
- "chunk_size_in_tokens"
- ]
- },
"AgentStepResponse": {
"type": "object",
"properties": {
@@ -7012,6 +6837,40 @@
"data"
]
},
+ "VectorDB": {
+ "type": "object",
+ "properties": {
+ "identifier": {
+ "type": "string"
+ },
+ "provider_resource_id": {
+ "type": "string"
+ },
+ "provider_id": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string",
+ "const": "vector_db",
+ "default": "vector_db"
+ },
+ "embedding_model": {
+ "type": "string"
+ },
+ "embedding_dimension": {
+ "type": "integer"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "identifier",
+ "provider_resource_id",
+ "provider_id",
+ "type",
+ "embedding_model",
+ "embedding_dimension"
+ ]
+ },
"HealthInfo": {
"type": "object",
"properties": {
@@ -7024,77 +6883,51 @@
"status"
]
},
- "MemoryBankDocument": {
+ "InsertChunksRequest": {
"type": "object",
"properties": {
- "document_id": {
+ "vector_db_id": {
"type": "string"
},
- "content": {
- "oneOf": [
- {
- "type": "string"
- },
- {
- "$ref": "#/components/schemas/InterleavedContentItem"
- },
- {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/InterleavedContentItem"
- }
- },
- {
- "$ref": "#/components/schemas/URL"
- }
- ]
- },
- "mime_type": {
- "type": "string"
- },
- "metadata": {
- "type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "null"
- },
- {
- "type": "boolean"
- },
- {
- "type": "number"
- },
- {
- "type": "string"
- },
- {
- "type": "array"
- },
- {
- "type": "object"
- }
- ]
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "document_id",
- "content",
- "metadata"
- ]
- },
- "InsertDocumentsRequest": {
- "type": "object",
- "properties": {
- "bank_id": {
- "type": "string"
- },
- "documents": {
+ "chunks": {
"type": "array",
"items": {
- "$ref": "#/components/schemas/MemoryBankDocument"
+ "type": "object",
+ "properties": {
+ "content": {
+ "$ref": "#/components/schemas/InterleavedContent"
+ },
+ "metadata": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "content",
+ "metadata"
+ ]
}
},
"ttl_seconds": {
@@ -7103,8 +6936,8 @@
},
"additionalProperties": false,
"required": [
- "bank_id",
- "documents"
+ "vector_db_id",
+ "chunks"
]
},
"InvokeToolRequest": {
@@ -7113,7 +6946,7 @@
"tool_name": {
"type": "string"
},
- "args": {
+ "kwargs": {
"type": "object",
"additionalProperties": {
"oneOf": [
@@ -7142,7 +6975,7 @@
"additionalProperties": false,
"required": [
"tool_name",
- "args"
+ "kwargs"
]
},
"ToolInvocationResult": {
@@ -7193,21 +7026,6 @@
"data"
]
},
- "ListMemoryBanksResponse": {
- "type": "object",
- "properties": {
- "data": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/MemoryBank"
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "data"
- ]
- },
"ListModelsResponse": {
"type": "object",
"properties": {
@@ -7356,6 +7174,21 @@
"data"
]
},
+ "ListVectorDBsResponse": {
+ "type": "object",
+ "properties": {
+ "data": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/VectorDB"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "data"
+ ]
+ },
"LogSeverity": {
"type": "string",
"enum": [
@@ -7873,10 +7706,10 @@
"job_uuid"
]
},
- "QueryDocumentsRequest": {
+ "QueryChunksRequest": {
"type": "object",
"properties": {
- "bank_id": {
+ "vector_db_id": {
"type": "string"
},
"query": {
@@ -7910,11 +7743,11 @@
},
"additionalProperties": false,
"required": [
- "bank_id",
+ "vector_db_id",
"query"
]
},
- "QueryDocumentsResponse": {
+ "QueryChunksResponse": {
"type": "object",
"properties": {
"chunks": {
@@ -7925,18 +7758,36 @@
"content": {
"$ref": "#/components/schemas/InterleavedContent"
},
- "token_count": {
- "type": "integer"
- },
- "document_id": {
- "type": "string"
+ "metadata": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
}
},
"additionalProperties": false,
"required": [
"content",
- "token_count",
- "document_id"
+ "metadata"
]
}
},
@@ -8139,108 +7990,6 @@
"scoring_functions"
]
},
- "GraphMemoryBankParams": {
- "type": "object",
- "properties": {
- "memory_bank_type": {
- "type": "string",
- "const": "graph",
- "default": "graph"
- }
- },
- "additionalProperties": false,
- "required": [
- "memory_bank_type"
- ]
- },
- "KeyValueMemoryBankParams": {
- "type": "object",
- "properties": {
- "memory_bank_type": {
- "type": "string",
- "const": "keyvalue",
- "default": "keyvalue"
- }
- },
- "additionalProperties": false,
- "required": [
- "memory_bank_type"
- ]
- },
- "KeywordMemoryBankParams": {
- "type": "object",
- "properties": {
- "memory_bank_type": {
- "type": "string",
- "const": "keyword",
- "default": "keyword"
- }
- },
- "additionalProperties": false,
- "required": [
- "memory_bank_type"
- ]
- },
- "VectorMemoryBankParams": {
- "type": "object",
- "properties": {
- "memory_bank_type": {
- "type": "string",
- "const": "vector",
- "default": "vector"
- },
- "embedding_model": {
- "type": "string"
- },
- "chunk_size_in_tokens": {
- "type": "integer"
- },
- "overlap_size_in_tokens": {
- "type": "integer"
- }
- },
- "additionalProperties": false,
- "required": [
- "memory_bank_type",
- "embedding_model",
- "chunk_size_in_tokens"
- ]
- },
- "RegisterMemoryBankRequest": {
- "type": "object",
- "properties": {
- "memory_bank_id": {
- "type": "string"
- },
- "params": {
- "oneOf": [
- {
- "$ref": "#/components/schemas/VectorMemoryBankParams"
- },
- {
- "$ref": "#/components/schemas/KeyValueMemoryBankParams"
- },
- {
- "$ref": "#/components/schemas/KeywordMemoryBankParams"
- },
- {
- "$ref": "#/components/schemas/GraphMemoryBankParams"
- }
- ]
- },
- "provider_id": {
- "type": "string"
- },
- "provider_memory_bank_id": {
- "type": "string"
- }
- },
- "additionalProperties": false,
- "required": [
- "memory_bank_id",
- "params"
- ]
- },
"RegisterModelRequest": {
"type": "object",
"properties": {
@@ -8413,6 +8162,31 @@
"provider_id"
]
},
+ "RegisterVectorDbRequest": {
+ "type": "object",
+ "properties": {
+ "vector_db_id": {
+ "type": "string"
+ },
+ "embedding_model": {
+ "type": "string"
+ },
+ "embedding_dimension": {
+ "type": "integer"
+ },
+ "provider_id": {
+ "type": "string"
+ },
+ "provider_vector_db_id": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "vector_db_id",
+ "embedding_model"
+ ]
+ },
"RunEvalRequest": {
"type": "object",
"properties": {
@@ -9158,14 +8932,6 @@
"name": "EvaluateRowsRequest",
"description": ""
},
- {
- "name": "GraphMemoryBank",
- "description": ""
- },
- {
- "name": "GraphMemoryBankParams",
- "description": ""
- },
{
"name": "GreedySamplingStrategy",
"description": ""
@@ -9190,8 +8956,8 @@
"description": ""
},
{
- "name": "InsertDocumentsRequest",
- "description": ""
+ "name": "InsertChunksRequest",
+ "description": ""
},
{
"name": "Inspect"
@@ -9220,22 +8986,6 @@
"name": "JsonType",
"description": ""
},
- {
- "name": "KeyValueMemoryBank",
- "description": ""
- },
- {
- "name": "KeyValueMemoryBankParams",
- "description": ""
- },
- {
- "name": "KeywordMemoryBank",
- "description": ""
- },
- {
- "name": "KeywordMemoryBankParams",
- "description": ""
- },
{
"name": "LLMAsJudgeScoringFnParams",
"description": ""
@@ -9248,10 +8998,6 @@
"name": "ListEvalTasksResponse",
"description": ""
},
- {
- "name": "ListMemoryBanksResponse",
- "description": ""
- },
{
"name": "ListModelsResponse",
"description": ""
@@ -9284,6 +9030,10 @@
"name": "ListToolsResponse",
"description": ""
},
+ {
+ "name": "ListVectorDBsResponse",
+ "description": ""
+ },
{
"name": "LogEventRequest",
"description": ""
@@ -9296,20 +9046,6 @@
"name": "LoraFinetuningConfig",
"description": ""
},
- {
- "name": "Memory"
- },
- {
- "name": "MemoryBank",
- "description": ""
- },
- {
- "name": "MemoryBankDocument",
- "description": ""
- },
- {
- "name": "MemoryBanks"
- },
{
"name": "MemoryRetrievalStep",
"description": ""
@@ -9388,6 +9124,14 @@
"name": "QATFinetuningConfig",
"description": ""
},
+ {
+ "name": "QueryChunksRequest",
+ "description": ""
+ },
+ {
+ "name": "QueryChunksResponse",
+ "description": ""
+ },
{
"name": "QueryCondition",
"description": ""
@@ -9396,14 +9140,6 @@
"name": "QueryConditionOp",
"description": ""
},
- {
- "name": "QueryDocumentsRequest",
- "description": ""
- },
- {
- "name": "QueryDocumentsResponse",
- "description": ""
- },
{
"name": "QuerySpanTreeResponse",
"description": ""
@@ -9428,10 +9164,6 @@
"name": "RegisterEvalTaskRequest",
"description": ""
},
- {
- "name": "RegisterMemoryBankRequest",
- "description": ""
- },
{
"name": "RegisterModelRequest",
"description": ""
@@ -9448,6 +9180,10 @@
"name": "RegisterToolGroupRequest",
"description": ""
},
+ {
+ "name": "RegisterVectorDbRequest",
+ "description": ""
+ },
{
"name": "ResponseFormat",
"description": ""
@@ -9701,12 +9437,14 @@
"description": ""
},
{
- "name": "VectorMemoryBank",
- "description": ""
+ "name": "VectorDB",
+ "description": ""
},
{
- "name": "VectorMemoryBankParams",
- "description": ""
+ "name": "VectorDBs"
+ },
+ {
+ "name": "VectorIO"
},
{
"name": "VersionInfo",
@@ -9729,8 +9467,6 @@
"EvalTasks",
"Inference",
"Inspect",
- "Memory",
- "MemoryBanks",
"Models",
"PostTraining (Coming Soon)",
"Safety",
@@ -9740,7 +9476,9 @@
"SyntheticDataGeneration (Coming Soon)",
"Telemetry",
"ToolGroups",
- "ToolRuntime"
+ "ToolRuntime",
+ "VectorDBs",
+ "VectorIO"
]
},
{
@@ -9799,28 +9537,21 @@
"EvalTask",
"EvaluateResponse",
"EvaluateRowsRequest",
- "GraphMemoryBank",
- "GraphMemoryBankParams",
"GreedySamplingStrategy",
"HealthInfo",
"ImageContentItem",
"ImageDelta",
"InferenceStep",
- "InsertDocumentsRequest",
+ "InsertChunksRequest",
"InterleavedContent",
"InterleavedContentItem",
"InvokeToolRequest",
"Job",
"JobStatus",
"JsonType",
- "KeyValueMemoryBank",
- "KeyValueMemoryBankParams",
- "KeywordMemoryBank",
- "KeywordMemoryBankParams",
"LLMAsJudgeScoringFnParams",
"ListDatasetsResponse",
"ListEvalTasksResponse",
- "ListMemoryBanksResponse",
"ListModelsResponse",
"ListPostTrainingJobsResponse",
"ListProvidersResponse",
@@ -9829,11 +9560,10 @@
"ListShieldsResponse",
"ListToolGroupsResponse",
"ListToolsResponse",
+ "ListVectorDBsResponse",
"LogEventRequest",
"LogSeverity",
"LoraFinetuningConfig",
- "MemoryBank",
- "MemoryBankDocument",
"MemoryRetrievalStep",
"Message",
"MetricEvent",
@@ -9852,21 +9582,21 @@
"PreferenceOptimizeRequest",
"ProviderInfo",
"QATFinetuningConfig",
+ "QueryChunksRequest",
+ "QueryChunksResponse",
"QueryCondition",
"QueryConditionOp",
- "QueryDocumentsRequest",
- "QueryDocumentsResponse",
"QuerySpanTreeResponse",
"QuerySpansResponse",
"QueryTracesResponse",
"RegexParserScoringFnParams",
"RegisterDatasetRequest",
"RegisterEvalTaskRequest",
- "RegisterMemoryBankRequest",
"RegisterModelRequest",
"RegisterScoringFunctionRequest",
"RegisterShieldRequest",
"RegisterToolGroupRequest",
+ "RegisterVectorDbRequest",
"ResponseFormat",
"RouteInfo",
"RunEvalRequest",
@@ -9924,8 +9654,7 @@
"UnionType",
"UnstructuredLogEvent",
"UserMessage",
- "VectorMemoryBank",
- "VectorMemoryBankParams",
+ "VectorDB",
"VersionInfo",
"ViolationLevel"
]
diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml
index 9aeac6db3..1678b1bb9 100644
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
@@ -891,40 +891,6 @@ components:
- scoring_functions
- task_config
type: object
- GraphMemoryBank:
- additionalProperties: false
- properties:
- identifier:
- type: string
- memory_bank_type:
- const: graph
- default: graph
- type: string
- provider_id:
- type: string
- provider_resource_id:
- type: string
- type:
- const: memory_bank
- default: memory_bank
- type: string
- required:
- - identifier
- - provider_resource_id
- - provider_id
- - type
- - memory_bank_type
- type: object
- GraphMemoryBankParams:
- additionalProperties: false
- properties:
- memory_bank_type:
- const: graph
- default: graph
- type: string
- required:
- - memory_bank_type
- type: object
GreedySamplingStrategy:
additionalProperties: false
properties:
@@ -997,20 +963,37 @@ components:
- step_type
- model_response
type: object
- InsertDocumentsRequest:
+ InsertChunksRequest:
additionalProperties: false
properties:
- bank_id:
- type: string
- documents:
+ chunks:
items:
- $ref: '#/components/schemas/MemoryBankDocument'
+ additionalProperties: false
+ properties:
+ content:
+ $ref: '#/components/schemas/InterleavedContent'
+ metadata:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ required:
+ - content
+ - metadata
+ type: object
type: array
ttl_seconds:
type: integer
+ vector_db_id:
+ type: string
required:
- - bank_id
- - documents
+ - vector_db_id
+ - chunks
type: object
InterleavedContent:
oneOf:
@@ -1026,7 +1009,7 @@ components:
InvokeToolRequest:
additionalProperties: false
properties:
- args:
+ kwargs:
additionalProperties:
oneOf:
- type: 'null'
@@ -1040,7 +1023,7 @@ components:
type: string
required:
- tool_name
- - args
+ - kwargs
type: object
Job:
additionalProperties: false
@@ -1067,74 +1050,6 @@ components:
required:
- type
type: object
- KeyValueMemoryBank:
- additionalProperties: false
- properties:
- identifier:
- type: string
- memory_bank_type:
- const: keyvalue
- default: keyvalue
- type: string
- provider_id:
- type: string
- provider_resource_id:
- type: string
- type:
- const: memory_bank
- default: memory_bank
- type: string
- required:
- - identifier
- - provider_resource_id
- - provider_id
- - type
- - memory_bank_type
- type: object
- KeyValueMemoryBankParams:
- additionalProperties: false
- properties:
- memory_bank_type:
- const: keyvalue
- default: keyvalue
- type: string
- required:
- - memory_bank_type
- type: object
- KeywordMemoryBank:
- additionalProperties: false
- properties:
- identifier:
- type: string
- memory_bank_type:
- const: keyword
- default: keyword
- type: string
- provider_id:
- type: string
- provider_resource_id:
- type: string
- type:
- const: memory_bank
- default: memory_bank
- type: string
- required:
- - identifier
- - provider_resource_id
- - provider_id
- - type
- - memory_bank_type
- type: object
- KeywordMemoryBankParams:
- additionalProperties: false
- properties:
- memory_bank_type:
- const: keyword
- default: keyword
- type: string
- required:
- - memory_bank_type
- type: object
LLMAsJudgeScoringFnParams:
additionalProperties: false
properties:
@@ -1178,16 +1093,6 @@ components:
required:
- data
type: object
- ListMemoryBanksResponse:
- additionalProperties: false
- properties:
- data:
- items:
- $ref: '#/components/schemas/MemoryBank'
- type: array
- required:
- - data
- type: object
ListModelsResponse:
additionalProperties: false
properties:
@@ -1274,6 +1179,16 @@ components:
required:
- data
type: object
+ ListVectorDBsResponse:
+ additionalProperties: false
+ properties:
+ data:
+ items:
+ $ref: '#/components/schemas/VectorDB'
+ type: array
+ required:
+ - data
+ type: object
LogEventRequest:
additionalProperties: false
properties:
@@ -1330,42 +1245,6 @@ components:
- rank
- alpha
type: object
- MemoryBank:
- oneOf:
- - $ref: '#/components/schemas/VectorMemoryBank'
- - $ref: '#/components/schemas/KeyValueMemoryBank'
- - $ref: '#/components/schemas/KeywordMemoryBank'
- - $ref: '#/components/schemas/GraphMemoryBank'
- MemoryBankDocument:
- additionalProperties: false
- properties:
- content:
- oneOf:
- - type: string
- - $ref: '#/components/schemas/InterleavedContentItem'
- - items:
- $ref: '#/components/schemas/InterleavedContentItem'
- type: array
- - $ref: '#/components/schemas/URL'
- document_id:
- type: string
- metadata:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- mime_type:
- type: string
- required:
- - document_id
- - content
- - metadata
- type: object
MemoryRetrievalStep:
additionalProperties: false
properties:
@@ -1705,6 +1584,59 @@ components:
- quantizer_name
- group_size
type: object
+ QueryChunksRequest:
+ additionalProperties: false
+ properties:
+ params:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ query:
+ $ref: '#/components/schemas/InterleavedContent'
+ vector_db_id:
+ type: string
+ required:
+ - vector_db_id
+ - query
+ type: object
+ QueryChunksResponse:
+ additionalProperties: false
+ properties:
+ chunks:
+ items:
+ additionalProperties: false
+ properties:
+ content:
+ $ref: '#/components/schemas/InterleavedContent'
+ metadata:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ required:
+ - content
+ - metadata
+ type: object
+ type: array
+ scores:
+ items:
+ type: number
+ type: array
+ required:
+ - chunks
+ - scores
+ type: object
QueryCondition:
additionalProperties: false
properties:
@@ -1732,54 +1664,6 @@ components:
- gt
- lt
type: string
- QueryDocumentsRequest:
- additionalProperties: false
- properties:
- bank_id:
- type: string
- params:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- query:
- $ref: '#/components/schemas/InterleavedContent'
- required:
- - bank_id
- - query
- type: object
- QueryDocumentsResponse:
- additionalProperties: false
- properties:
- chunks:
- items:
- additionalProperties: false
- properties:
- content:
- $ref: '#/components/schemas/InterleavedContent'
- document_id:
- type: string
- token_count:
- type: integer
- required:
- - content
- - token_count
- - document_id
- type: object
- type: array
- scores:
- items:
- type: number
- type: array
- required:
- - chunks
- - scores
- type: object
QuerySpanTreeResponse:
additionalProperties: false
properties:
@@ -1888,25 +1772,6 @@ components:
- dataset_id
- scoring_functions
type: object
- RegisterMemoryBankRequest:
- additionalProperties: false
- properties:
- memory_bank_id:
- type: string
- params:
- oneOf:
- - $ref: '#/components/schemas/VectorMemoryBankParams'
- - $ref: '#/components/schemas/KeyValueMemoryBankParams'
- - $ref: '#/components/schemas/KeywordMemoryBankParams'
- - $ref: '#/components/schemas/GraphMemoryBankParams'
- provider_id:
- type: string
- provider_memory_bank_id:
- type: string
- required:
- - memory_bank_id
- - params
- type: object
RegisterModelRequest:
additionalProperties: false
properties:
@@ -1999,6 +1864,23 @@ components:
- toolgroup_id
- provider_id
type: object
+ RegisterVectorDbRequest:
+ additionalProperties: false
+ properties:
+ embedding_dimension:
+ type: integer
+ embedding_model:
+ type: string
+ provider_id:
+ type: string
+ provider_vector_db_id:
+ type: string
+ vector_db_id:
+ type: string
+ required:
+ - vector_db_id
+ - embedding_model
+ type: object
ResponseFormat:
oneOf:
- additionalProperties: false
@@ -2298,8 +2180,6 @@ components:
Session:
additionalProperties: false
properties:
- memory_bank:
- $ref: '#/components/schemas/MemoryBank'
session_id:
type: string
session_name:
@@ -3202,58 +3082,30 @@ components:
- role
- content
type: object
- VectorMemoryBank:
+ VectorDB:
additionalProperties: false
properties:
- chunk_size_in_tokens:
- type: integer
embedding_dimension:
- default: 384
type: integer
embedding_model:
type: string
identifier:
type: string
- memory_bank_type:
- const: vector
- default: vector
- type: string
- overlap_size_in_tokens:
- type: integer
provider_id:
type: string
provider_resource_id:
type: string
type:
- const: memory_bank
- default: memory_bank
+ const: vector_db
+ default: vector_db
type: string
required:
- identifier
- provider_resource_id
- provider_id
- type
- - memory_bank_type
- embedding_model
- - chunk_size_in_tokens
- type: object
- VectorMemoryBankParams:
- additionalProperties: false
- properties:
- chunk_size_in_tokens:
- type: integer
- embedding_model:
- type: string
- memory_bank_type:
- const: vector
- default: vector
- type: string
- overlap_size_in_tokens:
- type: integer
- required:
- - memory_bank_type
- - embedding_model
- - chunk_size_in_tokens
+ - embedding_dimension
type: object
VersionInfo:
additionalProperties: false
@@ -4272,186 +4124,6 @@ paths:
description: OK
tags:
- Inspect
- /v1/memory-banks:
- get:
- parameters:
- - description: JSON-encoded provider data which will be made available to the
- adapter servicing the API
- in: header
- name: X-LlamaStack-Provider-Data
- required: false
- schema:
- type: string
- - description: Version of the client making the request. This is used to ensure
- that the client and server are compatible.
- in: header
- name: X-LlamaStack-Client-Version
- required: false
- schema:
- type: string
- responses:
- '200':
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/ListMemoryBanksResponse'
- description: OK
- tags:
- - MemoryBanks
- post:
- parameters:
- - description: JSON-encoded provider data which will be made available to the
- adapter servicing the API
- in: header
- name: X-LlamaStack-Provider-Data
- required: false
- schema:
- type: string
- - description: Version of the client making the request. This is used to ensure
- that the client and server are compatible.
- in: header
- name: X-LlamaStack-Client-Version
- required: false
- schema:
- type: string
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/RegisterMemoryBankRequest'
- required: true
- responses:
- '200':
- content:
- application/json:
- schema:
- oneOf:
- - $ref: '#/components/schemas/VectorMemoryBank'
- - $ref: '#/components/schemas/KeyValueMemoryBank'
- - $ref: '#/components/schemas/KeywordMemoryBank'
- - $ref: '#/components/schemas/GraphMemoryBank'
- description: ''
- tags:
- - MemoryBanks
- /v1/memory-banks/{memory_bank_id}:
- delete:
- parameters:
- - in: path
- name: memory_bank_id
- required: true
- schema:
- type: string
- - description: JSON-encoded provider data which will be made available to the
- adapter servicing the API
- in: header
- name: X-LlamaStack-Provider-Data
- required: false
- schema:
- type: string
- - description: Version of the client making the request. This is used to ensure
- that the client and server are compatible.
- in: header
- name: X-LlamaStack-Client-Version
- required: false
- schema:
- type: string
- responses:
- '200':
- description: OK
- tags:
- - MemoryBanks
- get:
- parameters:
- - in: path
- name: memory_bank_id
- required: true
- schema:
- type: string
- - description: JSON-encoded provider data which will be made available to the
- adapter servicing the API
- in: header
- name: X-LlamaStack-Provider-Data
- required: false
- schema:
- type: string
- - description: Version of the client making the request. This is used to ensure
- that the client and server are compatible.
- in: header
- name: X-LlamaStack-Client-Version
- required: false
- schema:
- type: string
- responses:
- '200':
- content:
- application/json:
- schema:
- oneOf:
- - $ref: '#/components/schemas/MemoryBank'
- - type: 'null'
- description: OK
- tags:
- - MemoryBanks
- /v1/memory/insert:
- post:
- parameters:
- - description: JSON-encoded provider data which will be made available to the
- adapter servicing the API
- in: header
- name: X-LlamaStack-Provider-Data
- required: false
- schema:
- type: string
- - description: Version of the client making the request. This is used to ensure
- that the client and server are compatible.
- in: header
- name: X-LlamaStack-Client-Version
- required: false
- schema:
- type: string
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/InsertDocumentsRequest'
- required: true
- responses:
- '200':
- description: OK
- tags:
- - Memory
- /v1/memory/query:
- post:
- parameters:
- - description: JSON-encoded provider data which will be made available to the
- adapter servicing the API
- in: header
- name: X-LlamaStack-Provider-Data
- required: false
- schema:
- type: string
- - description: Version of the client making the request. This is used to ensure
- that the client and server are compatible.
- in: header
- name: X-LlamaStack-Client-Version
- required: false
- schema:
- type: string
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/QueryDocumentsRequest'
- required: true
- responses:
- '200':
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/QueryDocumentsResponse'
- description: OK
- tags:
- - Memory
/v1/models:
get:
parameters:
@@ -5562,6 +5234,182 @@ paths:
description: OK
tags:
- ToolGroups
+ /v1/vector-dbs:
+ get:
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-Provider-Data
+ required: false
+ schema:
+ type: string
+ - description: Version of the client making the request. This is used to ensure
+ that the client and server are compatible.
+ in: header
+ name: X-LlamaStack-Client-Version
+ required: false
+ schema:
+ type: string
+ responses:
+ '200':
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/ListVectorDBsResponse'
+ description: OK
+ tags:
+ - VectorDBs
+ post:
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-Provider-Data
+ required: false
+ schema:
+ type: string
+ - description: Version of the client making the request. This is used to ensure
+ that the client and server are compatible.
+ in: header
+ name: X-LlamaStack-Client-Version
+ required: false
+ schema:
+ type: string
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/RegisterVectorDbRequest'
+ required: true
+ responses:
+ '200':
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/VectorDB'
+ description: OK
+ tags:
+ - VectorDBs
+ /v1/vector-dbs/{vector_db_id}:
+ delete:
+ parameters:
+ - in: path
+ name: vector_db_id
+ required: true
+ schema:
+ type: string
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-Provider-Data
+ required: false
+ schema:
+ type: string
+ - description: Version of the client making the request. This is used to ensure
+ that the client and server are compatible.
+ in: header
+ name: X-LlamaStack-Client-Version
+ required: false
+ schema:
+ type: string
+ responses:
+ '200':
+ description: OK
+ tags:
+ - VectorDBs
+ get:
+ parameters:
+ - in: path
+ name: vector_db_id
+ required: true
+ schema:
+ type: string
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-Provider-Data
+ required: false
+ schema:
+ type: string
+ - description: Version of the client making the request. This is used to ensure
+ that the client and server are compatible.
+ in: header
+ name: X-LlamaStack-Client-Version
+ required: false
+ schema:
+ type: string
+ responses:
+ '200':
+ content:
+ application/json:
+ schema:
+ oneOf:
+ - $ref: '#/components/schemas/VectorDB'
+ - type: 'null'
+ description: OK
+ tags:
+ - VectorDBs
+ /v1/vector-io/insert:
+ post:
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-Provider-Data
+ required: false
+ schema:
+ type: string
+ - description: Version of the client making the request. This is used to ensure
+ that the client and server are compatible.
+ in: header
+ name: X-LlamaStack-Client-Version
+ required: false
+ schema:
+ type: string
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/InsertChunksRequest'
+ required: true
+ responses:
+ '200':
+ description: OK
+ tags:
+ - VectorIO
+ /v1/vector-io/query:
+ post:
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-Provider-Data
+ required: false
+ schema:
+ type: string
+ - description: Version of the client making the request. This is used to ensure
+ that the client and server are compatible.
+ in: header
+ name: X-LlamaStack-Client-Version
+ required: false
+ schema:
+ type: string
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/QueryChunksRequest'
+ required: true
+ responses:
+ '200':
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/QueryChunksResponse'
+ description: OK
+ tags:
+ - VectorIO
/v1/version:
get:
parameters:
@@ -5767,12 +5615,6 @@ tags:
- description:
name: EvaluateRowsRequest
-- description:
- name: GraphMemoryBank
-- description:
- name: GraphMemoryBankParams
- description:
name: GreedySamplingStrategy
@@ -5786,9 +5628,9 @@ tags:
- name: Inference
- description:
name: InferenceStep
-- description:
- name: InsertDocumentsRequest
+ name: InsertChunksRequest
- name: Inspect
- description:
@@ -5805,18 +5647,6 @@ tags:
name: JobStatus
- description:
name: JsonType
-- description:
- name: KeyValueMemoryBank
-- description:
- name: KeyValueMemoryBankParams
-- description:
- name: KeywordMemoryBank
-- description:
- name: KeywordMemoryBankParams
- description:
name: LLMAsJudgeScoringFnParams
@@ -5826,9 +5656,6 @@ tags:
- description:
name: ListEvalTasksResponse
-- description:
- name: ListMemoryBanksResponse
- description:
name: ListModelsResponse
@@ -5853,6 +5680,9 @@ tags:
- description:
name: ListToolsResponse
+- description:
+ name: ListVectorDBsResponse
- description:
name: LogEventRequest
@@ -5861,13 +5691,6 @@ tags:
- description:
name: LoraFinetuningConfig
-- name: Memory
-- description:
- name: MemoryBank
-- description:
- name: MemoryBankDocument
-- name: MemoryBanks
- description:
name: MemoryRetrievalStep
@@ -5920,17 +5743,17 @@ tags:
- description:
name: QATFinetuningConfig
+- description:
+ name: QueryChunksRequest
+- description:
+ name: QueryChunksResponse
- description:
name: QueryCondition
- description:
name: QueryConditionOp
-- description:
- name: QueryDocumentsRequest
-- description:
- name: QueryDocumentsResponse
- description:
name: QuerySpanTreeResponse
@@ -5949,9 +5772,6 @@ tags:
- description:
name: RegisterEvalTaskRequest
-- description:
- name: RegisterMemoryBankRequest
- description:
name: RegisterModelRequest
@@ -5964,6 +5784,9 @@ tags:
- description:
name: RegisterToolGroupRequest
+- description:
+ name: RegisterVectorDbRequest
- description:
name: ResponseFormat
- description:
@@ -6128,12 +5951,10 @@ tags:
name: UnstructuredLogEvent
- description:
name: UserMessage
-- description:
- name: VectorMemoryBank
-- description:
- name: VectorMemoryBankParams
+- description:
+ name: VectorDB
+- name: VectorDBs
+- name: VectorIO
- description:
name: VersionInfo
- description:
@@ -6149,8 +5970,6 @@ x-tagGroups:
- EvalTasks
- Inference
- Inspect
- - Memory
- - MemoryBanks
- Models
- PostTraining (Coming Soon)
- Safety
@@ -6161,6 +5980,8 @@ x-tagGroups:
- Telemetry
- ToolGroups
- ToolRuntime
+ - VectorDBs
+ - VectorIO
- name: Types
tags:
- AgentCandidate
@@ -6216,28 +6037,21 @@ x-tagGroups:
- EvalTask
- EvaluateResponse
- EvaluateRowsRequest
- - GraphMemoryBank
- - GraphMemoryBankParams
- GreedySamplingStrategy
- HealthInfo
- ImageContentItem
- ImageDelta
- InferenceStep
- - InsertDocumentsRequest
+ - InsertChunksRequest
- InterleavedContent
- InterleavedContentItem
- InvokeToolRequest
- Job
- JobStatus
- JsonType
- - KeyValueMemoryBank
- - KeyValueMemoryBankParams
- - KeywordMemoryBank
- - KeywordMemoryBankParams
- LLMAsJudgeScoringFnParams
- ListDatasetsResponse
- ListEvalTasksResponse
- - ListMemoryBanksResponse
- ListModelsResponse
- ListPostTrainingJobsResponse
- ListProvidersResponse
@@ -6246,11 +6060,10 @@ x-tagGroups:
- ListShieldsResponse
- ListToolGroupsResponse
- ListToolsResponse
+ - ListVectorDBsResponse
- LogEventRequest
- LogSeverity
- LoraFinetuningConfig
- - MemoryBank
- - MemoryBankDocument
- MemoryRetrievalStep
- Message
- MetricEvent
@@ -6269,21 +6082,21 @@ x-tagGroups:
- PreferenceOptimizeRequest
- ProviderInfo
- QATFinetuningConfig
+ - QueryChunksRequest
+ - QueryChunksResponse
- QueryCondition
- QueryConditionOp
- - QueryDocumentsRequest
- - QueryDocumentsResponse
- QuerySpanTreeResponse
- QuerySpansResponse
- QueryTracesResponse
- RegexParserScoringFnParams
- RegisterDatasetRequest
- RegisterEvalTaskRequest
- - RegisterMemoryBankRequest
- RegisterModelRequest
- RegisterScoringFunctionRequest
- RegisterShieldRequest
- RegisterToolGroupRequest
+ - RegisterVectorDbRequest
- ResponseFormat
- RouteInfo
- RunEvalRequest
@@ -6341,7 +6154,6 @@ x-tagGroups:
- UnionType
- UnstructuredLogEvent
- UserMessage
- - VectorMemoryBank
- - VectorMemoryBankParams
+ - VectorDB
- VersionInfo
- ViolationLevel
diff --git a/llama_stack/distribution/routers/routers.py b/llama_stack/distribution/routers/routers.py
index c2019af12..3ae9833dc 100644
--- a/llama_stack/distribution/routers/routers.py
+++ b/llama_stack/distribution/routers/routers.py
@@ -440,7 +440,7 @@ class ToolRuntimeRouter(ToolRuntime):
) -> None:
self.routing_table = routing_table
- # TODO: this should be in sync with "get_all_api_endpoints()"
+ # HACK ALERT this should be in sync with "get_all_api_endpoints()"
# TODO: make sure rag_tool vs builtin::memory is correct everywhere
self.rag_tool = self.RagToolImpl(routing_table)
setattr(self, "rag_tool.query_context", self.rag_tool.query_context)
diff --git a/llama_stack/distribution/server/endpoints.py b/llama_stack/distribution/server/endpoints.py
index 745bcddea..180479e40 100644
--- a/llama_stack/distribution/server/endpoints.py
+++ b/llama_stack/distribution/server/endpoints.py
@@ -38,6 +38,8 @@ def get_all_api_endpoints() -> Dict[Api, List[ApiEndpoint]]:
for api, protocol in protocols.items():
endpoints = []
protocol_methods = inspect.getmembers(protocol, predicate=inspect.isfunction)
+
+ # HACK ALERT
if api == Api.tool_runtime:
for tool_group in SpecialToolGroup:
sub_protocol = toolgroup_protocols[tool_group]
diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
index 01a82643c..d49c1f749 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
@@ -970,7 +970,7 @@ async def execute_tool_call_maybe(
result = await tool_runtime_api.invoke_tool(
tool_name=name,
- args=dict(
+ kwargs=dict(
session_id=session_id,
**tool_call_args,
),