diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html
index 4e24f1d9d..613484fe0 100644
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
@@ -21,7 +21,7 @@
"info": {
"title": "[DRAFT] Llama Stack Specification",
"version": "0.0.1",
- "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-09-23 10:08:50.987103"
+ "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-09-23 10:24:44.422779"
},
"servers": [
{
@@ -1180,7 +1180,7 @@
]
}
},
- "/memory_banks/get": {
+ "/memory/get": {
"get": {
"responses": {
"200": {
@@ -1190,7 +1190,7 @@
"schema": {
"oneOf": [
{
- "$ref": "#/components/schemas/MemoryBankSpec"
+ "$ref": "#/components/schemas/MemoryBank"
},
{
"type": "null"
@@ -1206,11 +1206,11 @@
],
"parameters": [
{
- "name": "bank_type",
+ "name": "bank_id",
"in": "query",
"required": true,
"schema": {
- "$ref": "#/components/schemas/MemoryBankType"
+ "type": "string"
}
},
{
@@ -1270,6 +1270,51 @@
]
}
},
+ "/memory_banks/get": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/MemoryBankSpec"
+ },
+ {
+ "type": "null"
+ }
+ ]
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "MemoryBanks"
+ ],
+ "parameters": [
+ {
+ "name": "bank_type",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "$ref": "#/components/schemas/MemoryBankType"
+ }
+ },
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ },
"/shields/get": {
"get": {
"responses": {
@@ -1544,6 +1589,36 @@
}
}
},
+ "tags": [
+ "MemoryBanks"
+ ],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ },
+ "/memory/list": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/jsonl": {
+ "schema": {
+ "$ref": "#/components/schemas/MemoryBank"
+ }
+ }
+ }
+ }
+ },
"tags": [
"Memory"
],
@@ -4471,6 +4546,60 @@
"job_uuid"
]
},
+ "Model": {
+ "description": "The model family and SKU of the model along with other parameters corresponding to the model."
+ },
+ "ModelServingSpec": {
+ "type": "object",
+ "properties": {
+ "llama_model": {
+ "$ref": "#/components/schemas/Model"
+ },
+ "provider_config": {
+ "type": "object",
+ "properties": {
+ "provider_id": {
+ "type": "string"
+ },
+ "config": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "provider_id",
+ "config"
+ ]
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "llama_model",
+ "provider_config"
+ ]
+ },
"MemoryBankType": {
"type": "string",
"enum": [
@@ -4531,60 +4660,6 @@
"provider_config"
]
},
- "Model": {
- "description": "The model family and SKU of the model along with other parameters corresponding to the model."
- },
- "ModelServingSpec": {
- "type": "object",
- "properties": {
- "llama_model": {
- "$ref": "#/components/schemas/Model"
- },
- "provider_config": {
- "type": "object",
- "properties": {
- "provider_id": {
- "type": "string"
- },
- "config": {
- "type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "null"
- },
- {
- "type": "boolean"
- },
- {
- "type": "number"
- },
- {
- "type": "string"
- },
- {
- "type": "array"
- },
- {
- "type": "object"
- }
- ]
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "provider_id",
- "config"
- ]
- }
- },
- "additionalProperties": false,
- "required": [
- "llama_model",
- "provider_config"
- ]
- },
"ShieldSpec": {
"type": "object",
"properties": {
@@ -5895,25 +5970,25 @@
],
"tags": [
{
- "name": "Datasets"
- },
- {
- "name": "Safety"
- },
- {
- "name": "Telemetry"
+ "name": "Inference"
},
{
"name": "Agents"
},
{
- "name": "BatchInference"
+ "name": "PostTraining"
+ },
+ {
+ "name": "SyntheticDataGeneration"
},
{
"name": "Models"
},
{
- "name": "SyntheticDataGeneration"
+ "name": "BatchInference"
+ },
+ {
+ "name": "Safety"
},
{
"name": "Evaluations"
@@ -5922,16 +5997,19 @@
"name": "Shields"
},
{
- "name": "Memory"
- },
- {
- "name": "PostTraining"
+ "name": "Telemetry"
},
{
"name": "RewardScoring"
},
{
- "name": "Inference"
+ "name": "MemoryBanks"
+ },
+ {
+ "name": "Datasets"
+ },
+ {
+ "name": "Memory"
},
{
"name": "BuiltinTool",
@@ -6273,14 +6351,6 @@
"name": "EvaluationJobStatusResponse",
"description": ""
},
- {
- "name": "MemoryBankType",
- "description": ""
- },
- {
- "name": "MemoryBankSpec",
- "description": ""
- },
{
"name": "Model",
"description": "The model family and SKU of the model along with other parameters corresponding to the model.\n\n"
@@ -6289,6 +6359,14 @@
"name": "ModelServingSpec",
"description": ""
},
+ {
+ "name": "MemoryBankType",
+ "description": ""
+ },
+ {
+ "name": "MemoryBankSpec",
+ "description": ""
+ },
{
"name": "ShieldSpec",
"description": ""
@@ -6456,6 +6534,7 @@
"Evaluations",
"Inference",
"Memory",
+ "MemoryBanks",
"Models",
"PostTraining",
"RewardScoring",
diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml
index e4334dbdf..bc5c4c4bd 100644
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
@@ -2443,7 +2443,7 @@ info:
description: "This is the specification of the llama stack that provides\n \
\ a set of endpoints and their corresponding interfaces that are tailored\
\ to\n best leverage Llama Models. The specification is still in\
- \ draft and subject to change.\n Generated at 2024-09-23 10:08:50.987103"
+ \ draft and subject to change.\n Generated at 2024-09-23 10:24:44.422779"
title: '[DRAFT] Llama Stack Specification'
version: 0.0.1
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@@ -3147,6 +3147,32 @@ paths:
description: OK
tags:
- Memory
+ /memory/get:
+ get:
+ parameters:
+ - in: query
+ name: bank_id
+ required: true
+ schema:
+ type: string
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
+ responses:
+ '200':
+ content:
+ application/json:
+ schema:
+ oneOf:
+ - $ref: '#/components/schemas/MemoryBank'
+ - type: 'null'
+ description: OK
+ tags:
+ - Memory
/memory/insert:
post:
parameters:
@@ -3168,6 +3194,25 @@ paths:
description: OK
tags:
- Memory
+ /memory/list:
+ get:
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
+ responses:
+ '200':
+ content:
+ application/jsonl:
+ schema:
+ $ref: '#/components/schemas/MemoryBank'
+ description: OK
+ tags:
+ - Memory
/memory/query:
post:
parameters:
@@ -3239,7 +3284,7 @@ paths:
- type: 'null'
description: OK
tags:
- - Memory
+ - MemoryBanks
/memory_banks/list:
get:
parameters:
@@ -3258,7 +3303,7 @@ paths:
$ref: '#/components/schemas/MemoryBankSpec'
description: OK
tags:
- - Memory
+ - MemoryBanks
/models/get:
get:
parameters:
@@ -3636,19 +3681,20 @@ security:
servers:
- url: http://any-hosted-llama-stack.com
tags:
-- name: Datasets
-- name: Safety
-- name: Telemetry
+- name: Inference
- name: Agents
-- name: BatchInference
-- name: Models
+- name: PostTraining
- name: SyntheticDataGeneration
+- name: Models
+- name: BatchInference
+- name: Safety
- name: Evaluations
- name: Shields
-- name: Memory
-- name: PostTraining
+- name: Telemetry
- name: RewardScoring
-- name: Inference
+- name: MemoryBanks
+- name: Datasets
+- name: Memory
- description:
name: BuiltinTool
- description:
name: EvaluationJobStatusResponse
-- description:
- name: MemoryBankType
-- description:
- name: MemoryBankSpec
- description: 'The model family and SKU of the model along with other parameters
corresponding to the model.
@@ -3929,6 +3971,10 @@ tags:
- description:
name: ModelServingSpec
+- description:
+ name: MemoryBankType
+- description:
+ name: MemoryBankSpec
- description:
name: ShieldSpec
- description:
@@ -4063,6 +4109,7 @@ x-tagGroups:
- Evaluations
- Inference
- Memory
+ - MemoryBanks
- Models
- PostTraining
- RewardScoring
diff --git a/llama_stack/apis/memory_banks/client.py b/llama_stack/apis/memory_banks/client.py
index d43b96be3..78a991374 100644
--- a/llama_stack/apis/memory_banks/client.py
+++ b/llama_stack/apis/memory_banks/client.py
@@ -25,7 +25,7 @@ class MemoryBanksClient(MemoryBanks):
async def shutdown(self) -> None:
pass
- async def list_memory_banks(self) -> List[MemoryBankSpec]:
+ async def list_available_memory_banks(self) -> List[MemoryBankSpec]:
async with httpx.AsyncClient() as client:
response = await client.get(
f"{self.base_url}/memory_banks/list",
@@ -34,7 +34,7 @@ class MemoryBanksClient(MemoryBanks):
response.raise_for_status()
return [MemoryBankSpec(**x) for x in response.json()]
- async def get_memory_bank(
+ async def get_serving_memory_bank(
self, bank_type: MemoryBankType
) -> Optional[MemoryBankSpec]:
async with httpx.AsyncClient() as client:
@@ -55,7 +55,7 @@ class MemoryBanksClient(MemoryBanks):
async def run_main(host: str, port: int, stream: bool):
client = MemoryBanksClient(f"http://{host}:{port}")
- response = await client.list_memory_banks()
+ response = await client.list_available_memory_banks()
cprint(f"list_memory_banks response={response}", "green")
diff --git a/llama_stack/apis/memory_banks/memory_banks.py b/llama_stack/apis/memory_banks/memory_banks.py
index 721983b19..bc09498c9 100644
--- a/llama_stack/apis/memory_banks/memory_banks.py
+++ b/llama_stack/apis/memory_banks/memory_banks.py
@@ -24,9 +24,9 @@ class MemoryBankSpec(BaseModel):
class MemoryBanks(Protocol):
@webmethod(route="/memory_banks/list", method="GET")
- async def list_memory_banks(self) -> List[MemoryBankSpec]: ...
+ async def list_available_memory_banks(self) -> List[MemoryBankSpec]: ...
@webmethod(route="/memory_banks/get", method="GET")
- async def get_memory_bank(
+ async def get_serving_memory_bank(
self, bank_type: MemoryBankType
) -> Optional[MemoryBankSpec]: ...
diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py
index fcd4d2b2b..0bff52608 100644
--- a/llama_stack/distribution/routers/routing_tables.py
+++ b/llama_stack/distribution/routers/routing_tables.py
@@ -95,7 +95,7 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
class MemoryBanksRoutingTable(CommonRoutingTableImpl, MemoryBanks):
- async def list_memory_banks(self) -> List[MemoryBankSpec]:
+ async def list_available_memory_banks(self) -> List[MemoryBankSpec]:
specs = []
for entry in self.routing_table_config:
specs.append(
@@ -106,7 +106,7 @@ class MemoryBanksRoutingTable(CommonRoutingTableImpl, MemoryBanks):
)
return specs
- async def get_memory_bank(self, bank_type: str) -> Optional[MemoryBankSpec]:
+ async def get_serving_memory_bank(self, bank_type: str) -> Optional[MemoryBankSpec]:
for entry in self.routing_table_config:
if entry.routing_key == bank_type:
return MemoryBankSpec(