Add pyopenapi fork to the repository, update RFC assets

2025-12-05 18:27:22 +00:00 · 2024-09-03 21:22:30 -07:00 · 2024-09-03 21:22:30 -07:00 · b60c125c55
commit b60c125c55
parent 0d619b9f8e
12 changed files with 2240 additions and 452 deletions
--- a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html
+++ b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html
@ -21,7 +21,7 @@
    "info": {
        "title": "[DRAFT] Llama Stack Specification",
        "version": "0.0.1",
-        "description": "This is the specification of the llama stack that provides\n                a set of endpoints and their corresponding interfaces that are tailored to\n                best leverage Llama Models. The specification is still in draft and subject to change.\n                Generated at 2024-08-23 06:36:10.417114"
+        "description": "This is the specification of the llama stack that provides\n                a set of endpoints and their corresponding interfaces that are tailored to\n                best leverage Llama Models. The specification is still in draft and subject to change.\n                Generated at 2024-09-03 21:36:00.770405"
    },
    "servers": [
        {
@ -29,7 +29,7 @@
        }
    ],
    "paths": {
-        "/inference/batch_chat_completion": {
+        "/batch_inference/chat_completion": {
            "post": {
                "responses": {
                    "200": {
@ -44,7 +44,7 @@
                    }
                },
                "tags": [
-                    "Inference"
+                    "BatchInference"
                ],
                "parameters": [],
                "requestBody": {
@ -59,7 +59,7 @@
                }
            }
        },
-        "/inference/batch_completion": {
+        "/batch_inference/completion": {
            "post": {
                "responses": {
                    "200": {
@ -74,7 +74,7 @@
                    }
                },
                "tags": [
-                    "Inference"
+                    "BatchInference"
                ],
                "parameters": [],
                "requestBody": {
@ -550,6 +550,58 @@
                ]
            }
        },
+        "/inference/embeddings": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/EmbeddingsResponse"
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "Inference"
+                ],
+                "parameters": [
+                    {
+                        "name": "model",
+                        "in": "query",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "type": "array",
+                                "items": {
+                                    "oneOf": [
+                                        {
+                                            "type": "string"
+                                        },
+                                        {
+                                            "type": "array",
+                                            "items": {
+                                                "type": "string"
+                                            }
+                                        }
+                                    ]
+                                }
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
        "/evaluate/question_answering/": {
            "post": {
                "responses": {
@ -1053,7 +1105,14 @@
                        "content": {
                            "application/json": {
                                "schema": {
-                                    "$ref": "#/components/schemas/MemoryBank"
+                                    "oneOf": [
+                                        {
+                                            "$ref": "#/components/schemas/MemoryBank"
+                                        },
+                                        {
+                                            "type": "null"
+                                        }
+                                    ]
                                }
                            }
                        }
@ -1228,6 +1287,14 @@
                        "schema": {
                            "type": "string"
                        }
+                    },
+                    {
+                        "name": "ttl_seconds",
+                        "in": "query",
+                        "required": false,
+                        "schema": {
+                            "type": "integer"
+                        }
                    }
                ],
                "requestBody": {
@ -1973,8 +2040,8 @@
                    "json",
                    "function_tag"
                ],
-                "title": "This Enum refers to the prompt format for calling zero shot tools",
-                "description": "`json` --\n    Refers to the json format for calling tools.\n    The json format takes the form like\n    {\n        \"type\": \"function\",\n        \"function\" : {\n            \"name\": \"function_name\",\n            \"description\": \"function_description\",\n            \"parameters\": {...}\n        }\n    }\n\n`function_tag` --\n    This is an example of how you could define\n    your own user defined format for making tool calls.\n    The function_tag format looks like this,\n    <function=function_name>(parameters)</function>\n\nThe detailed prompts for each of these formats are defined in `system_prompt.py`"
+                "title": "This Enum refers to the prompt format for calling custom / zero shot tools",
+                "description": "`json` --\n    Refers to the json format for calling tools.\n    The json format takes the form like\n    {\n        \"type\": \"function\",\n        \"function\" : {\n            \"name\": \"function_name\",\n            \"description\": \"function_description\",\n            \"parameters\": {...}\n        }\n    }\n\n`function_tag` --\n    This is an example of how you could define\n    your own user defined format for making tool calls.\n    The function_tag format looks like this,\n    <function=function_name>(parameters)</function>\n\nThe detailed prompts for each of these formats are added to llama cli"
            },
            "ToolResponseMessage": {
                "type": "object",
@ -2037,6 +2104,19 @@
                                }
                            }
                        ]
+                    },
+                    "context": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "type": "string"
+                                }
+                            }
+                        ]
                    }
                },
                "additionalProperties": false,
@ -2393,95 +2473,6 @@
                    },
                    "instructions": {
                        "type": "string"
-                    },
-                    "memory_bank_configs": {
-                        "type": "array",
-                        "items": {
-                            "oneOf": [
-                                {
-                                    "type": "object",
-                                    "properties": {
-                                        "bank_id": {
-                                            "type": "string"
-                                        },
-                                        "type": {
-                                            "type": "string",
-                                            "const": "vector"
-                                        }
-                                    },
-                                    "additionalProperties": false,
-                                    "required": [
-                                        "bank_id",
-                                        "type"
-                                    ]
-                                },
-                                {
-                                    "type": "object",
-                                    "properties": {
-                                        "bank_id": {
-                                            "type": "string"
-                                        },
-                                        "type": {
-                                            "type": "string",
-                                            "const": "keyvalue"
-                                        },
-                                        "keys": {
-                                            "type": "array",
-                                            "items": {
-                                                "type": "string"
-                                            }
-                                        }
-                                    },
-                                    "additionalProperties": false,
-                                    "required": [
-                                        "bank_id",
-                                        "type",
-                                        "keys"
-                                    ]
-                                },
-                                {
-                                    "type": "object",
-                                    "properties": {
-                                        "bank_id": {
-                                            "type": "string"
-                                        },
-                                        "type": {
-                                            "type": "string",
-                                            "const": "keyword"
-                                        }
-                                    },
-                                    "additionalProperties": false,
-                                    "required": [
-                                        "bank_id",
-                                        "type"
-                                    ]
-                                },
-                                {
-                                    "type": "object",
-                                    "properties": {
-                                        "bank_id": {
-                                            "type": "string"
-                                        },
-                                        "type": {
-                                            "type": "string",
-                                            "const": "graph"
-                                        },
-                                        "entities": {
-                                            "type": "array",
-                                            "items": {
-                                                "type": "string"
-                                            }
-                                        }
-                                    },
-                                    "additionalProperties": false,
-                                    "required": [
-                                        "bank_id",
-                                        "type",
-                                        "entities"
-                                    ]
-                                }
-                            ]
-                        }
                    }
                },
                "additionalProperties": false,
@ -2579,6 +2570,9 @@
                        "type": "string",
                        "const": "function_call"
                    },
+                    "function_name": {
+                        "type": "string"
+                    },
                    "description": {
                        "type": "string"
                    },
@ -2595,90 +2589,11 @@
                "additionalProperties": false,
                "required": [
                    "type",
+                    "function_name",
                    "description",
                    "parameters"
                ]
            },
-            "MemoryBank": {
-                "type": "object",
-                "properties": {
-                    "bank_id": {
-                        "type": "string"
-                    },
-                    "name": {
-                        "type": "string"
-                    },
-                    "config": {
-                        "oneOf": [
-                            {
-                                "type": "object",
-                                "properties": {
-                                    "type": {
-                                        "type": "string",
-                                        "const": "vector"
-                                    },
-                                    "embedding_model": {
-                                        "type": "string"
-                                    }
-                                },
-                                "additionalProperties": false,
-                                "required": [
-                                    "type",
-                                    "embedding_model"
-                                ]
-                            },
-                            {
-                                "type": "object",
-                                "properties": {
-                                    "type": {
-                                        "type": "string",
-                                        "const": "keyvalue"
-                                    }
-                                },
-                                "additionalProperties": false,
-                                "required": [
-                                    "type"
-                                ]
-                            },
-                            {
-                                "type": "object",
-                                "properties": {
-                                    "type": {
-                                        "type": "string",
-                                        "const": "keyword"
-                                    }
-                                },
-                                "additionalProperties": false,
-                                "required": [
-                                    "type"
-                                ]
-                            },
-                            {
-                                "type": "object",
-                                "properties": {
-                                    "type": {
-                                        "type": "string",
-                                        "const": "graph"
-                                    }
-                                },
-                                "additionalProperties": false,
-                                "required": [
-                                    "type"
-                                ]
-                            }
-                        ]
-                    },
-                    "url": {
-                        "$ref": "#/components/schemas/URL"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "bank_id",
-                    "name",
-                    "config"
-                ]
-            },
            "MemoryToolDefinition": {
                "type": "object",
                "properties": {
@ -2698,17 +2613,108 @@
                        "type": "string",
                        "const": "memory"
                    },
-                    "memory_banks": {
+                    "memory_bank_configs": {
                        "type": "array",
                        "items": {
-                            "$ref": "#/components/schemas/MemoryBank"
+                            "oneOf": [
+                                {
+                                    "type": "object",
+                                    "properties": {
+                                        "bank_id": {
+                                            "type": "string"
+                                        },
+                                        "type": {
+                                            "type": "string",
+                                            "const": "vector"
+                                        }
+                                    },
+                                    "additionalProperties": false,
+                                    "required": [
+                                        "bank_id",
+                                        "type"
+                                    ]
+                                },
+                                {
+                                    "type": "object",
+                                    "properties": {
+                                        "bank_id": {
+                                            "type": "string"
+                                        },
+                                        "type": {
+                                            "type": "string",
+                                            "const": "keyvalue"
+                                        },
+                                        "keys": {
+                                            "type": "array",
+                                            "items": {
+                                                "type": "string"
+                                            }
+                                        }
+                                    },
+                                    "additionalProperties": false,
+                                    "required": [
+                                        "bank_id",
+                                        "type",
+                                        "keys"
+                                    ]
+                                },
+                                {
+                                    "type": "object",
+                                    "properties": {
+                                        "bank_id": {
+                                            "type": "string"
+                                        },
+                                        "type": {
+                                            "type": "string",
+                                            "const": "keyword"
+                                        }
+                                    },
+                                    "additionalProperties": false,
+                                    "required": [
+                                        "bank_id",
+                                        "type"
+                                    ]
+                                },
+                                {
+                                    "type": "object",
+                                    "properties": {
+                                        "bank_id": {
+                                            "type": "string"
+                                        },
+                                        "type": {
+                                            "type": "string",
+                                            "const": "graph"
+                                        },
+                                        "entities": {
+                                            "type": "array",
+                                            "items": {
+                                                "type": "string"
+                                            }
+                                        }
+                                    },
+                                    "additionalProperties": false,
+                                    "required": [
+                                        "bank_id",
+                                        "type",
+                                        "entities"
+                                    ]
+                                }
+                            ]
                        }
+                    },
+                    "max_tokens_in_context": {
+                        "type": "integer"
+                    },
+                    "max_chunks": {
+                        "type": "integer"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type",
-                    "memory_banks"
+                    "memory_bank_configs",
+                    "max_tokens_in_context",
+                    "max_chunks"
                ]
            },
            "OnViolationAction": {
@ -2973,8 +2979,21 @@
            "Attachment": {
                "type": "object",
                "properties": {
-                    "url": {
-                        "$ref": "#/components/schemas/URL"
+                    "content": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "type": "string"
+                                }
+                            },
+                            {
+                                "$ref": "#/components/schemas/URL"
+                            }
+                        ]
                    },
                    "mime_type": {
                        "type": "string"
@ -2982,7 +3001,7 @@
                },
                "additionalProperties": false,
                "required": [
-                    "url",
+                    "content",
                    "mime_type"
                ]
            },
@ -3177,12 +3196,19 @@
                                    },
                                    "embedding_model": {
                                        "type": "string"
+                                    },
+                                    "chunk_size_in_tokens": {
+                                        "type": "integer"
+                                    },
+                                    "overlap_size_in_tokens": {
+                                        "type": "integer"
                                    }
                                },
                                "additionalProperties": false,
                                "required": [
                                    "type",
-                                    "embedding_model"
+                                    "embedding_model",
+                                    "chunk_size_in_tokens"
                                ]
                            },
                            {
@ -3235,6 +3261,93 @@
                    "config"
                ]
            },
+            "MemoryBank": {
+                "type": "object",
+                "properties": {
+                    "bank_id": {
+                        "type": "string"
+                    },
+                    "name": {
+                        "type": "string"
+                    },
+                    "config": {
+                        "oneOf": [
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "vector"
+                                    },
+                                    "embedding_model": {
+                                        "type": "string"
+                                    },
+                                    "chunk_size_in_tokens": {
+                                        "type": "integer"
+                                    },
+                                    "overlap_size_in_tokens": {
+                                        "type": "integer"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type",
+                                    "embedding_model",
+                                    "chunk_size_in_tokens"
+                                ]
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "keyvalue"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type"
+                                ]
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "keyword"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type"
+                                ]
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "graph"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type"
+                                ]
+                            }
+                        ]
+                    },
+                    "url": {
+                        "$ref": "#/components/schemas/URL"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "bank_id",
+                    "name",
+                    "config"
+                ]
+            },
            "CreateRunRequest": {
                "type": "object",
                "properties": {
@ -3327,6 +3440,24 @@
                    "metadata"
                ]
            },
+            "EmbeddingsResponse": {
+                "type": "object",
+                "properties": {
+                    "embeddings": {
+                        "type": "array",
+                        "items": {
+                            "type": "array",
+                            "items": {
+                                "type": "number"
+                            }
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "embeddings"
+                ]
+            },
            "Checkpoint": {
                "description": "Checkpoint created during training runs"
            },
@ -3484,65 +3615,6 @@
                    "model_response"
                ]
            },
-            "MemoryBankDocument": {
-                "type": "object",
-                "properties": {
-                    "document_id": {
-                        "type": "string"
-                    },
-                    "content": {
-                        "oneOf": [
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "type": "array",
-                                "items": {
-                                    "type": "string"
-                                }
-                            },
-                            {
-                                "$ref": "#/components/schemas/URL"
-                            }
-                        ]
-                    },
-                    "mime_type": {
-                        "type": "string"
-                    },
-                    "metadata": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "document_id",
-                    "content",
-                    "mime_type",
-                    "metadata"
-                ]
-            },
            "MemoryRetrievalStep": {
                "type": "object",
                "properties": {
@ -3570,17 +3642,18 @@
                            "type": "string"
                        }
                    },
-                    "documents": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/MemoryBankDocument"
-                        }
-                    },
-                    "scores": {
-                        "type": "array",
-                        "items": {
-                            "type": "number"
-                        }
+                    "inserted_context": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "type": "string"
+                                }
+                            }
+                        ]
                    }
                },
                "additionalProperties": false,
@ -3589,8 +3662,7 @@
                    "step_id",
                    "step_type",
                    "memory_bank_ids",
-                    "documents",
-                    "scores"
+                    "inserted_context"
                ]
            },
            "Session": {
@ -3611,6 +3683,9 @@
                    "started_at": {
                        "type": "string",
                        "format": "date-time"
+                    },
+                    "memory_bank": {
+                        "$ref": "#/components/schemas/MemoryBank"
                    }
                },
                "additionalProperties": false,
@ -3928,6 +4003,65 @@
                    "other"
                ]
            },
+            "MemoryBankDocument": {
+                "type": "object",
+                "properties": {
+                    "document_id": {
+                        "type": "string"
+                    },
+                    "content": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "type": "string"
+                                }
+                            },
+                            {
+                                "$ref": "#/components/schemas/URL"
+                            }
+                        ]
+                    },
+                    "mime_type": {
+                        "type": "string"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "document_id",
+                    "content",
+                    "mime_type",
+                    "metadata"
+                ]
+            },
            "EvaluationJobArtifactsResponse": {
                "type": "object",
                "properties": {
@ -4504,12 +4638,16 @@
                                },
                                "token_count": {
                                    "type": "integer"
+                                },
+                                "document_id": {
+                                    "type": "string"
                                }
                            },
                            "additionalProperties": false,
                            "required": [
                                "content",
-                                "token_count"
+                                "token_count",
+                                "document_id"
                            ]
                        }
                    },
@ -5109,7 +5247,7 @@
    ],
    "tags": [
        {
-            "name": "Observability"
+            "name": "BatchInference"
        },
        {
            "name": "AgenticSystem"
@ -5121,19 +5259,22 @@
            "name": "Memory"
        },
        {
-            "name": "Evaluations"
+            "name": "Observability"
        },
        {
-            "name": "Datasets"
+            "name": "SyntheticDataGeneration"
+        },
+        {
+            "name": "Evaluations"
        },
        {
            "name": "RewardScoring"
        },
        {
-            "name": "Inference"
+            "name": "Datasets"
        },
        {
-            "name": "SyntheticDataGeneration"
+            "name": "Inference"
        },
        {
            "name": "BatchChatCompletionRequest",
@ -5181,7 +5322,7 @@
        },
        {
            "name": "ToolPromptFormat",
-            "description": "This Enum refers to the prompt format for calling zero shot tools\n\n`json` --\n    Refers to the json format for calling tools.\n    The json format takes the form like\n    {\n        \"type\": \"function\",\n        \"function\" : {\n            \"name\": \"function_name\",\n            \"description\": \"function_description\",\n            \"parameters\": {...}\n        }\n    }\n\n`function_tag` --\n    This is an example of how you could define\n    your own user defined format for making tool calls.\n    The function_tag format looks like this,\n    <function=function_name>(parameters)</function>\n\nThe detailed prompts for each of these formats are defined in `system_prompt.py`\n\n<SchemaDefinition schemaRef=\"#/components/schemas/ToolPromptFormat\" />"
+            "description": "This Enum refers to the prompt format for calling custom / zero shot tools\n\n`json` --\n    Refers to the json format for calling tools.\n    The json format takes the form like\n    {\n        \"type\": \"function\",\n        \"function\" : {\n            \"name\": \"function_name\",\n            \"description\": \"function_description\",\n            \"parameters\": {...}\n        }\n    }\n\n`function_tag` --\n    This is an example of how you could define\n    your own user defined format for making tool calls.\n    The function_tag format looks like this,\n    <function=function_name>(parameters)</function>\n\nThe detailed prompts for each of these formats are added to llama cli\n\n<SchemaDefinition schemaRef=\"#/components/schemas/ToolPromptFormat\" />"
        },
        {
            "name": "ToolResponseMessage",
@ -5259,10 +5400,6 @@
            "name": "FunctionCallToolDefinition",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/FunctionCallToolDefinition\" />"
        },
-        {
-            "name": "MemoryBank",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/MemoryBank\" />"
-        },
        {
            "name": "MemoryToolDefinition",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/MemoryToolDefinition\" />"
@ -5343,6 +5480,10 @@
            "name": "CreateMemoryBankRequest",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/CreateMemoryBankRequest\" />"
        },
+        {
+            "name": "MemoryBank",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/MemoryBank\" />"
+        },
        {
            "name": "CreateRunRequest",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/CreateRunRequest\" />"
@ -5351,6 +5492,10 @@
            "name": "Run",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/Run\" />"
        },
+        {
+            "name": "EmbeddingsResponse",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/EmbeddingsResponse\" />"
+        },
        {
            "name": "Checkpoint",
            "description": "Checkpoint created during training runs\n\n<SchemaDefinition schemaRef=\"#/components/schemas/Checkpoint\" />"
@ -5375,10 +5520,6 @@
            "name": "InferenceStep",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/InferenceStep\" />"
        },
-        {
-            "name": "MemoryBankDocument",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/MemoryBankDocument\" />"
-        },
        {
            "name": "MemoryRetrievalStep",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/MemoryRetrievalStep\" />"
@ -5419,6 +5560,10 @@
            "name": "ArtifactType",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ArtifactType\" />"
        },
+        {
+            "name": "MemoryBankDocument",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/MemoryBankDocument\" />"
+        },
        {
            "name": "EvaluationJobArtifactsResponse",
            "description": "Artifacts of a evaluation job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/EvaluationJobArtifactsResponse\" />"
@ -5565,6 +5710,7 @@
            "name": "Operations",
            "tags": [
                "AgenticSystem",
+                "BatchInference",
                "Datasets",
                "Evaluations",
                "Inference",
@ -5610,6 +5756,7 @@
                "DPOAlignmentConfig",
                "DialogGenerations",
                "DoraFinetuningConfig",
+                "EmbeddingsResponse",
                "EvaluateQuestionAnsweringRequest",
                "EvaluateSummarizationRequest",
                "EvaluateTextGenerationRequest",
--- a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml
+++ b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml
@ -10,64 +10,6 @@ components:
          type: array
        instructions:
          type: string
-        memory_bank_configs:
-          items:
-            oneOf:
-            - additionalProperties: false
-              properties:
-                bank_id:
-                  type: string
-                type:
-                  const: vector
-                  type: string
-              required:
-              - bank_id
-              - type
-              type: object
-            - additionalProperties: false
-              properties:
-                bank_id:
-                  type: string
-                keys:
-                  items:
-                    type: string
-                  type: array
-                type:
-                  const: keyvalue
-                  type: string
-              required:
-              - bank_id
-              - type
-              - keys
-              type: object
-            - additionalProperties: false
-              properties:
-                bank_id:
-                  type: string
-                type:
-                  const: keyword
-                  type: string
-              required:
-              - bank_id
-              - type
-              type: object
-            - additionalProperties: false
-              properties:
-                bank_id:
-                  type: string
-                entities:
-                  items:
-                    type: string
-                  type: array
-                type:
-                  const: graph
-                  type: string
-              required:
-              - bank_id
-              - type
-              - entities
-              type: object
-          type: array
        model:
          type: string
        output_shields:
@ -220,12 +162,17 @@ components:
    Attachment:
      additionalProperties: false
      properties:
+        content:
+          oneOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+          - $ref: '#/components/schemas/URL'
        mime_type:
          type: string
-        url:
-          $ref: '#/components/schemas/URL'
      required:
-      - url
+      - content
      - mime_type
      type: object
    BatchChatCompletionRequest:
@ -537,14 +484,19 @@ components:
          oneOf:
          - additionalProperties: false
            properties:
+              chunk_size_in_tokens:
+                type: integer
              embedding_model:
                type: string
+              overlap_size_in_tokens:
+                type: integer
              type:
                const: vector
                type: string
            required:
            - type
            - embedding_model
+            - chunk_size_in_tokens
            type: object
          - additionalProperties: false
            properties:
@ -655,6 +607,18 @@ components:
      - rank
      - alpha
      type: object
+    EmbeddingsResponse:
+      additionalProperties: false
+      properties:
+        embeddings:
+          items:
+            items:
+              type: number
+            type: array
+          type: array
+      required:
+      - embeddings
+      type: object
    EvaluateQuestionAnsweringRequest:
      additionalProperties: false
      properties:
@ -819,6 +783,8 @@ components:
      properties:
        description:
          type: string
+        function_name:
+          type: string
        input_shields:
          items:
            $ref: '#/components/schemas/ShieldDefinition'
@ -838,6 +804,7 @@ components:
          type: string
      required:
      - type
+      - function_name
      - description
      - parameters
      type: object
@ -965,14 +932,19 @@ components:
          oneOf:
          - additionalProperties: false
            properties:
+              chunk_size_in_tokens:
+                type: integer
              embedding_model:
                type: string
+              overlap_size_in_tokens:
+                type: integer
              type:
                const: vector
                type: string
            required:
            - type
            - embedding_model
+            - chunk_size_in_tokens
            type: object
          - additionalProperties: false
            properties:
@ -1043,18 +1015,16 @@ components:
        completed_at:
          format: date-time
          type: string
-        documents:
-          items:
-            $ref: '#/components/schemas/MemoryBankDocument'
-          type: array
+        inserted_context:
+          oneOf:
+          - type: string
+          - items:
+              type: string
+            type: array
        memory_bank_ids:
          items:
            type: string
          type: array
-        scores:
-          items:
-            type: number
-          type: array
        started_at:
          format: date-time
          type: string
@ -1070,8 +1040,7 @@ components:
      - step_id
      - step_type
      - memory_bank_ids
-      - documents
-      - scores
+      - inserted_context
      type: object
    MemoryToolDefinition:
      additionalProperties: false
@ -1080,9 +1049,67 @@ components:
          items:
            $ref: '#/components/schemas/ShieldDefinition'
          type: array
-        memory_banks:
+        max_chunks:
+          type: integer
+        max_tokens_in_context:
+          type: integer
+        memory_bank_configs:
          items:
-            $ref: '#/components/schemas/MemoryBank'
+            oneOf:
+            - additionalProperties: false
+              properties:
+                bank_id:
+                  type: string
+                type:
+                  const: vector
+                  type: string
+              required:
+              - bank_id
+              - type
+              type: object
+            - additionalProperties: false
+              properties:
+                bank_id:
+                  type: string
+                keys:
+                  items:
+                    type: string
+                  type: array
+                type:
+                  const: keyvalue
+                  type: string
+              required:
+              - bank_id
+              - type
+              - keys
+              type: object
+            - additionalProperties: false
+              properties:
+                bank_id:
+                  type: string
+                type:
+                  const: keyword
+                  type: string
+              required:
+              - bank_id
+              - type
+              type: object
+            - additionalProperties: false
+              properties:
+                bank_id:
+                  type: string
+                entities:
+                  items:
+                    type: string
+                  type: array
+                type:
+                  const: graph
+                  type: string
+              required:
+              - bank_id
+              - type
+              - entities
+              type: object
          type: array
        output_shields:
          items:
@ -1093,7 +1120,9 @@ components:
          type: string
      required:
      - type
-      - memory_banks
+      - memory_bank_configs
+      - max_tokens_in_context
+      - max_chunks
      type: object
    Metric:
      additionalProperties: false
@ -1406,11 +1435,14 @@ components:
                - items:
                    type: string
                  type: array
+              document_id:
+                type: string
              token_count:
                type: integer
            required:
            - content
            - token_count
+            - document_id
            type: object
          type: array
        scores:
@ -1575,6 +1607,8 @@ components:
    Session:
      additionalProperties: false
      properties:
+        memory_bank:
+          $ref: '#/components/schemas/MemoryBank'
        session_id:
          type: string
        session_name:
@ -1869,11 +1903,12 @@ components:
        : {...}\n        }\n    }\n\n`function_tag` --\n    This is an example of\
        \ how you could define\n    your own user defined format for making tool calls.\n\
        \    The function_tag format looks like this,\n    <function=function_name>(parameters)</function>\n\
-        \nThe detailed prompts for each of these formats are defined in `system_prompt.py`"
+        \nThe detailed prompts for each of these formats are added to llama cli"
      enum:
      - json
      - function_tag
-      title: This Enum refers to the prompt format for calling zero shot tools
+      title: This Enum refers to the prompt format for calling custom / zero shot
+        tools
      type: string
    ToolResponse:
      additionalProperties: false
@ -2104,6 +2139,12 @@ components:
          - items:
              type: string
            type: array
+        context:
+          oneOf:
+          - type: string
+          - items:
+              type: string
+            type: array
        role:
          const: user
          type: string
@ -2134,7 +2175,7 @@ info:
  description: "This is the specification of the llama stack that provides\n     \
    \           a set of endpoints and their corresponding interfaces that are tailored\
    \ to\n                best leverage Llama Models. The specification is still in\
-    \ draft and subject to change.\n                Generated at 2024-08-23 06:36:10.417114"
+    \ draft and subject to change.\n                Generated at 2024-09-03 21:36:00.770405"
  title: '[DRAFT] Llama Stack Specification'
  version: 0.0.1
 jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@ -2327,6 +2368,42 @@ paths:
          description: OK
      tags:
      - Observability
+  /batch_inference/chat_completion:
+    post:
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/BatchChatCompletionRequest'
+        required: true
+      responses:
+        '200':
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchChatCompletionResponse'
+          description: OK
+      tags:
+      - BatchInference
+  /batch_inference/completion:
+    post:
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/BatchCompletionRequest'
+        required: true
+      responses:
+        '200':
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchCompletionResponse'
+          description: OK
+      tags:
+      - BatchInference
  /datasets/create:
    post:
      parameters: []
@ -2619,42 +2696,6 @@ paths:
          description: OK
      tags:
      - Observability
-  /inference/batch_chat_completion:
-    post:
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/BatchChatCompletionRequest'
-        required: true
-      responses:
-        '200':
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/BatchChatCompletionResponse'
-          description: OK
-      tags:
-      - Inference
-  /inference/batch_completion:
-    post:
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/BatchCompletionRequest'
-        required: true
-      responses:
-        '200':
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/BatchCompletionResponse'
-          description: OK
-      tags:
-      - Inference
  /inference/chat_completion:
    post:
      parameters: []
@ -2691,6 +2732,35 @@ paths:
          description: streamed completion response.
      tags:
      - Inference
+  /inference/embeddings:
+    post:
+      parameters:
+      - in: query
+        name: model
+        required: true
+        schema:
+          type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              items:
+                oneOf:
+                - type: string
+                - items:
+                    type: string
+                  type: array
+              type: array
+        required: true
+      responses:
+        '200':
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/EmbeddingsResponse'
+          description: OK
+      tags:
+      - Inference
  /logging/get_logs:
    post:
      parameters: []
@ -2777,6 +2847,11 @@ paths:
        required: true
        schema:
          type: string
+      - in: query
+        name: ttl_seconds
+        required: false
+        schema:
+          type: integer
      requestBody:
        content:
          application/json:
@ -2887,7 +2962,9 @@ paths:
          content:
            application/json:
              schema:
-                $ref: '#/components/schemas/MemoryBank'
+                oneOf:
+                - $ref: '#/components/schemas/MemoryBank'
+                - type: 'null'
          description: OK
      tags:
      - Memory
@ -3105,15 +3182,16 @@ security:
 servers:
 - url: http://any-hosted-llama-stack.com
 tags:
- name: Observability
+- name: BatchInference
 - name: AgenticSystem
 - name: PostTraining
 - name: Memory
- name: Evaluations
- name: Datasets
- name: RewardScoring
- name: Inference
+- name: Observability
 - name: SyntheticDataGeneration
+- name: Evaluations
+- name: RewardScoring
+- name: Datasets
+- name: Inference
 - description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
    />
  name: BatchChatCompletionRequest
@ -3140,16 +3218,16 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/ToolParamDefinition"
    />
  name: ToolParamDefinition
- description: "This Enum refers to the prompt format for calling zero shot tools\n\
-    \n`json` --\n    Refers to the json format for calling tools.\n    The json format\
-    \ takes the form like\n    {\n        \"type\": \"function\",\n        \"function\"\
-    \ : {\n            \"name\": \"function_name\",\n            \"description\":\
-    \ \"function_description\",\n            \"parameters\": {...}\n        }\n  \
-    \  }\n\n`function_tag` --\n    This is an example of how you could define\n  \
-    \  your own user defined format for making tool calls.\n    The function_tag format\
-    \ looks like this,\n    <function=function_name>(parameters)</function>\n\nThe\
-    \ detailed prompts for each of these formats are defined in `system_prompt.py`\n\
-    \n<SchemaDefinition schemaRef=\"#/components/schemas/ToolPromptFormat\" />"
+- description: "This Enum refers to the prompt format for calling custom / zero shot\
+    \ tools\n\n`json` --\n    Refers to the json format for calling tools.\n    The\
+    \ json format takes the form like\n    {\n        \"type\": \"function\",\n  \
+    \      \"function\" : {\n            \"name\": \"function_name\",\n          \
+    \  \"description\": \"function_description\",\n            \"parameters\": {...}\n\
+    \        }\n    }\n\n`function_tag` --\n    This is an example of how you could\
+    \ define\n    your own user defined format for making tool calls.\n    The function_tag\
+    \ format looks like this,\n    <function=function_name>(parameters)</function>\n\
+    \nThe detailed prompts for each of these formats are added to llama cli\n\n<SchemaDefinition\
+    \ schemaRef=\"#/components/schemas/ToolPromptFormat\" />"
  name: ToolPromptFormat
 - description: <SchemaDefinition schemaRef="#/components/schemas/ToolResponseMessage"
    />
@ -3212,8 +3290,6 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/FunctionCallToolDefinition"
    />
  name: FunctionCallToolDefinition
- description: <SchemaDefinition schemaRef="#/components/schemas/MemoryBank" />
-  name: MemoryBank
 - description: <SchemaDefinition schemaRef="#/components/schemas/MemoryToolDefinition"
    />
  name: MemoryToolDefinition
@ -3277,11 +3353,16 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/CreateMemoryBankRequest"
    />
  name: CreateMemoryBankRequest
+- description: <SchemaDefinition schemaRef="#/components/schemas/MemoryBank" />
+  name: MemoryBank
 - description: <SchemaDefinition schemaRef="#/components/schemas/CreateRunRequest"
    />
  name: CreateRunRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/Run" />
  name: Run
+- description: <SchemaDefinition schemaRef="#/components/schemas/EmbeddingsResponse"
+    />
+  name: EmbeddingsResponse
 - description: 'Checkpoint created during training runs


@ -3309,9 +3390,6 @@ tags:
  name: EvaluateTextGenerationRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/InferenceStep" />
  name: InferenceStep
- description: <SchemaDefinition schemaRef="#/components/schemas/MemoryBankDocument"
-    />
-  name: MemoryBankDocument
 - description: <SchemaDefinition schemaRef="#/components/schemas/MemoryRetrievalStep"
    />
  name: MemoryRetrievalStep
@ -3341,6 +3419,9 @@ tags:
  name: Artifact
 - description: <SchemaDefinition schemaRef="#/components/schemas/ArtifactType" />
  name: ArtifactType
+- description: <SchemaDefinition schemaRef="#/components/schemas/MemoryBankDocument"
+    />
+  name: MemoryBankDocument
 - description: 'Artifacts of a evaluation job.


@ -3474,6 +3555,7 @@ x-tagGroups:
 - name: Operations
  tags:
  - AgenticSystem
+  - BatchInference
  - Datasets
  - Evaluations
  - Inference
@ -3516,6 +3598,7 @@ x-tagGroups:
  - DPOAlignmentConfig
  - DialogGenerations
  - DoraFinetuningConfig
+  - EmbeddingsResponse
  - EvaluateQuestionAnsweringRequest
  - EvaluateSummarizationRequest
  - EvaluateTextGenerationRequest