diff --git a/source/agentic_system_types.py b/source/agentic_system_types.py
index ecbeffb01..91823586f 100644
--- a/source/agentic_system_types.py
+++ b/source/agentic_system_types.py
@@ -58,16 +58,25 @@ class SafetyFilteringStep(ExecutionStepBase):
violation: Optional[SafetyViolation] = None
+@json_schema_type
@dataclass
-class IndexedMemoryDocument:
- index_id: str
- content: str
+class MemoryBank:
+ uuid: str
+ name: str
+
+
+@dataclass
+class MemoryBankDocument:
+ uuid: str
+ content: bytes
+ metadata: Dict[str, Any]
+ mime_type: str
@dataclass
class MemoryRetrievalStep(ExecutionStepBase):
step_type = ExecutionStepType.memory_retrieval
- documents: List[IndexedMemoryDocument]
+ documents: List[MemoryBankDocument]
scores: List[float]
diff --git a/source/api_definitions.py b/source/api_definitions.py
index 87976a0ea..d147cae6d 100644
--- a/source/api_definitions.py
+++ b/source/api_definitions.py
@@ -7,7 +7,8 @@ import yaml
from agentic_system_types import (
AgenticSystemTurn,
ExecutionStepType,
- IndexedMemoryDocument,
+ MemoryBank,
+ MemoryBankDocument,
SafetyViolation,
)
@@ -172,6 +173,8 @@ class BatchInference(Protocol):
@dataclass
class AgenticSystemCreateRequest:
+ uuid: str
+
instructions: str
model: InstructModel
@@ -182,6 +185,8 @@ class AgenticSystemCreateRequest:
# execute themselves.
executable_tools: Set[str] = field(default_factory=set)
+ memory_bank_uuids: List[str] = field(default_factory=list)
+
input_shields: List[ShieldConfig] = field(default_factory=list)
output_shields: List[ShieldConfig] = field(default_factory=list)
@@ -189,13 +194,13 @@ class AgenticSystemCreateRequest:
@json_schema_type
@dataclass
class AgenticSystemCreateResponse:
- agent_id: str
+ agent_uuid: str
@json_schema_type
@dataclass
class AgenticSystemExecuteRequest:
- agent_id: str
+ agent_uuid: str
messages: List[Message]
turn_history: List[AgenticSystemTurn] = None
stream: bool = False
@@ -227,11 +232,12 @@ class AgenticSystemExecuteResponseStreamChunk:
step_uuid: str
step_type: ExecutionStepType
+ # TODO(ashwin): maybe add more structure here and do this as a proper tagged union
violation: Optional[SafetyViolation] = None
tool_call: Optional[ToolCall] = None
tool_response_delta: Optional[ToolResponse] = None
response_text_delta: Optional[str] = None
- retrieved_document: Optional[IndexedMemoryDocument] = None
+ retrieved_document: Optional[MemoryBankDocument] = None
stop_reason: Optional[StopReason] = None
@@ -259,6 +265,41 @@ class AgenticSystem(Protocol):
) -> None: ...
+class MemoryBanks(Protocol):
+ @webmethod(route="/memory_banks/create")
+ def create_memory_bank(
+ self,
+ bank_uuid: str,
+ bank_name: str,
+ documents: List[MemoryBankDocument],
+ ) -> None: ...
+
+ @webmethod(route="/memory_banks/get")
+ def get_memory_banks(
+ self,
+ ) -> List[MemoryBank]: ...
+
+ @webmethod(route="/memory_banks/insert")
+ def post_insert_memory_documents(
+ self,
+ bank_uuid: str,
+ documents: List[MemoryBankDocument],
+ ) -> None: ...
+
+ @webmethod(route="/memory_banks/delete")
+ def post_delete_memory_documents(
+ self,
+ bank_uuid: str,
+ document_uuids: List[str],
+ ) -> None: ...
+
+ @webmethod(route="/memory_banks/drop")
+ def remove_memory_bank(
+ self,
+ bank_uuid: str,
+ ) -> None: ...
+
+
@dataclass
class KPromptGenerations:
prompt: Message
@@ -456,6 +497,7 @@ class LlamaStackEndpoints(
SyntheticDataGeneration,
Datasets,
Finetuning,
+ MemoryBanks,
): ...
diff --git a/source/openapi.html b/source/openapi.html
index ff00b3a0c..11e18c18b 100644
--- a/source/openapi.html
+++ b/source/openapi.html
@@ -119,6 +119,93 @@
}
}
},
+ "/memory_banks/create": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK"
+ }
+ },
+ "tags": [
+ "MemoryBanks"
+ ],
+ "parameters": [
+ {
+ "name": "bank_uuid",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "bank_name",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "uuid": {
+ "type": "string"
+ },
+ "content": {
+ "type": "string",
+ "contentEncoding": "base64"
+ },
+ "metadata": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ },
+ "mime_type": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "uuid",
+ "content",
+ "metadata",
+ "mime_type"
+ ]
+ }
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
"/agentic_system/delete": {
"delete": {
"responses": {
@@ -192,6 +279,26 @@
]
}
},
+ "/memory_banks/get": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/jsonl": {
+ "schema": {
+ "$ref": "#/components/schemas/MemoryBank"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "MemoryBanks"
+ ],
+ "parameters": []
+ }
+ },
"/finetuning/job/artifacts": {
"get": {
"responses": {
@@ -353,6 +460,41 @@
}
}
},
+ "/memory_banks/delete": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK"
+ }
+ },
+ "tags": [
+ "MemoryBanks"
+ ],
+ "parameters": [
+ {
+ "name": "bank_uuid",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
"/synthetic_data_generation/generate": {
"post": {
"responses": {
@@ -383,6 +525,85 @@
}
}
},
+ "/memory_banks/insert": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK"
+ }
+ },
+ "tags": [
+ "MemoryBanks"
+ ],
+ "parameters": [
+ {
+ "name": "bank_uuid",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "uuid": {
+ "type": "string"
+ },
+ "content": {
+ "type": "string",
+ "contentEncoding": "base64"
+ },
+ "metadata": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ },
+ "mime_type": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "uuid",
+ "content",
+ "metadata",
+ "mime_type"
+ ]
+ }
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
"/reward_scoring/score": {
"post": {
"responses": {
@@ -435,6 +656,28 @@
"required": true
}
}
+ },
+ "/memory_banks/drop": {
+ "delete": {
+ "responses": {
+ "200": {
+ "description": "OK"
+ }
+ },
+ "tags": [
+ "MemoryBanks"
+ ],
+ "parameters": [
+ {
+ "name": "bank_uuid",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ }
}
},
"jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
@@ -487,6 +730,9 @@
"AgenticSystemCreateRequest": {
"type": "object",
"properties": {
+ "uuid": {
+ "type": "string"
+ },
"instructions": {
"type": "string"
},
@@ -571,6 +817,12 @@
},
"uniqueItems": true
},
+ "memory_bank_uuids": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
"input_shields": {
"type": "array",
"items": {
@@ -586,10 +838,12 @@
},
"additionalProperties": false,
"required": [
+ "uuid",
"instructions",
"model",
"available_tools",
"executable_tools",
+ "memory_bank_uuids",
"input_shields",
"output_shields"
]
@@ -597,19 +851,19 @@
"AgenticSystemCreateResponse": {
"type": "object",
"properties": {
- "agent_id": {
+ "agent_uuid": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
- "agent_id"
+ "agent_uuid"
]
},
"AgenticSystemExecuteRequest": {
"type": "object",
"properties": {
- "agent_id": {
+ "agent_uuid": {
"type": "string"
},
"messages": {
@@ -631,7 +885,7 @@
},
"additionalProperties": false,
"required": [
- "agent_id",
+ "agent_uuid",
"messages",
"turn_history",
"stream"
@@ -875,17 +1129,48 @@
"items": {
"type": "object",
"properties": {
- "index_id": {
+ "uuid": {
"type": "string"
},
"content": {
+ "type": "string",
+ "contentEncoding": "base64"
+ },
+ "metadata": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ },
+ "mime_type": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
- "index_id",
- "content"
+ "uuid",
+ "content",
+ "metadata",
+ "mime_type"
]
}
},
@@ -1204,17 +1489,48 @@
"retrieved_document": {
"type": "object",
"properties": {
- "index_id": {
+ "uuid": {
"type": "string"
},
"content": {
+ "type": "string",
+ "contentEncoding": "base64"
+ },
+ "metadata": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ },
+ "mime_type": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
- "index_id",
- "content"
+ "uuid",
+ "content",
+ "metadata",
+ "mime_type"
]
},
"stop_reason": {
@@ -1305,6 +1621,22 @@
],
"title": "Dataset to be used for training or evaluating language models."
},
+ "MemoryBank": {
+ "type": "object",
+ "properties": {
+ "uuid": {
+ "type": "string"
+ },
+ "name": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "uuid",
+ "name"
+ ]
+ },
"FinetuningJobArtifactsResponse": {
"type": "object",
"properties": {
@@ -2406,20 +2738,23 @@
],
"tags": [
{
- "name": "SyntheticDataGeneration"
+ "name": "Inference"
},
{
- "name": "Datasets"
+ "name": "MemoryBanks"
},
{
"name": "AgenticSystem"
},
{
- "name": "Inference"
+ "name": "SyntheticDataGeneration"
},
{
"name": "Finetuning"
},
+ {
+ "name": "Datasets"
+ },
{
"name": "RewardScoring"
},
@@ -2471,6 +2806,10 @@
"name": "Dataset",
"description": "Dataset to be used for training or evaluating language models.\n\n"
},
+ {
+ "name": "MemoryBank",
+ "description": ""
+ },
{
"name": "FinetuningJobArtifactsResponse",
"description": "Artifacts of a finetuning job.\n\n"
@@ -2556,6 +2895,7 @@
"Datasets",
"Finetuning",
"Inference",
+ "MemoryBanks",
"RewardScoring",
"SyntheticDataGeneration"
]
@@ -2584,6 +2924,7 @@
"FinetuningTrainRequest",
"KScoredPromptGenerations",
"LoraFinetuningConfig",
+ "MemoryBank",
"Message",
"MessageScore",
"OptimizerConfig",
diff --git a/source/openapi.yaml b/source/openapi.yaml
index 36978ac42..8d0363936 100644
--- a/source/openapi.yaml
+++ b/source/openapi.yaml
@@ -52,6 +52,10 @@ components:
type: array
instructions:
type: string
+ memory_bank_uuids:
+ items:
+ type: string
+ type: array
model:
enum:
- llama3_8b_chat
@@ -61,26 +65,30 @@ components:
items:
$ref: '#/components/schemas/ShieldConfig'
type: array
+ uuid:
+ type: string
required:
+ - uuid
- instructions
- model
- available_tools
- executable_tools
+ - memory_bank_uuids
- input_shields
- output_shields
type: object
AgenticSystemCreateResponse:
additionalProperties: false
properties:
- agent_id:
+ agent_uuid:
type: string
required:
- - agent_id
+ - agent_uuid
type: object
AgenticSystemExecuteRequest:
additionalProperties: false
properties:
- agent_id:
+ agent_uuid:
type: string
messages:
items:
@@ -94,7 +102,7 @@ components:
$ref: '#/components/schemas/AgenticSystemTurn'
type: array
required:
- - agent_id
+ - agent_uuid
- messages
- turn_history
- stream
@@ -124,12 +132,27 @@ components:
additionalProperties: false
properties:
content:
+ contentEncoding: base64
type: string
- index_id:
+ metadata:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ mime_type:
+ type: string
+ uuid:
type: string
required:
- - index_id
+ - uuid
- content
+ - metadata
+ - mime_type
type: object
step_type:
enum:
@@ -342,12 +365,27 @@ components:
additionalProperties: false
properties:
content:
+ contentEncoding: base64
type: string
- index_id:
+ metadata:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ mime_type:
+ type: string
+ uuid:
type: string
required:
- - index_id
+ - uuid
- content
+ - metadata
+ - mime_type
type: object
type: array
scores:
@@ -972,6 +1010,17 @@ components:
- rank
- alpha
type: object
+ MemoryBank:
+ additionalProperties: false
+ properties:
+ name:
+ type: string
+ uuid:
+ type: string
+ required:
+ - uuid
+ - name
+ type: object
Message:
additionalProperties: false
properties:
@@ -1428,6 +1477,147 @@ paths:
description: OK
tags:
- Finetuning
+ /memory_banks/create:
+ post:
+ parameters:
+ - in: query
+ name: bank_uuid
+ required: true
+ schema:
+ type: string
+ - in: query
+ name: bank_name
+ required: true
+ schema:
+ type: string
+ requestBody:
+ content:
+ application/json:
+ schema:
+ items:
+ additionalProperties: false
+ properties:
+ content:
+ contentEncoding: base64
+ type: string
+ metadata:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ mime_type:
+ type: string
+ uuid:
+ type: string
+ required:
+ - uuid
+ - content
+ - metadata
+ - mime_type
+ type: object
+ type: array
+ required: true
+ responses:
+ '200':
+ description: OK
+ tags:
+ - MemoryBanks
+ /memory_banks/delete:
+ post:
+ parameters:
+ - in: query
+ name: bank_uuid
+ required: true
+ schema:
+ type: string
+ requestBody:
+ content:
+ application/json:
+ schema:
+ items:
+ type: string
+ type: array
+ required: true
+ responses:
+ '200':
+ description: OK
+ tags:
+ - MemoryBanks
+ /memory_banks/drop:
+ delete:
+ parameters:
+ - in: query
+ name: bank_uuid
+ required: true
+ schema:
+ type: string
+ responses:
+ '200':
+ description: OK
+ tags:
+ - MemoryBanks
+ /memory_banks/get:
+ get:
+ parameters: []
+ responses:
+ '200':
+ content:
+ application/jsonl:
+ schema:
+ $ref: '#/components/schemas/MemoryBank'
+ description: OK
+ tags:
+ - MemoryBanks
+ /memory_banks/insert:
+ post:
+ parameters:
+ - in: query
+ name: bank_uuid
+ required: true
+ schema:
+ type: string
+ requestBody:
+ content:
+ application/json:
+ schema:
+ items:
+ additionalProperties: false
+ properties:
+ content:
+ contentEncoding: base64
+ type: string
+ metadata:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ mime_type:
+ type: string
+ uuid:
+ type: string
+ required:
+ - uuid
+ - content
+ - metadata
+ - mime_type
+ type: object
+ type: array
+ required: true
+ responses:
+ '200':
+ description: OK
+ tags:
+ - MemoryBanks
/reward_scoring/score:
post:
parameters: []
@@ -1469,11 +1659,12 @@ security:
servers:
- url: http://llama.meta.com
tags:
-- name: SyntheticDataGeneration
-- name: Datasets
-- name: AgenticSystem
- name: Inference
+- name: MemoryBanks
+- name: AgenticSystem
+- name: SyntheticDataGeneration
- name: Finetuning
+- name: Datasets
- name: RewardScoring
- description:
name: ShieldConfig
@@ -1523,6 +1714,8 @@ tags:
'
name: Dataset
+- description:
+ name: MemoryBank
- description: 'Artifacts of a finetuning job.
@@ -1623,6 +1816,7 @@ x-tagGroups:
- Datasets
- Finetuning
- Inference
+ - MemoryBanks
- RewardScoring
- SyntheticDataGeneration
- name: Types
@@ -1648,6 +1842,7 @@ x-tagGroups:
- FinetuningTrainRequest
- KScoredPromptGenerations
- LoraFinetuningConfig
+ - MemoryBank
- Message
- MessageScore
- OptimizerConfig