diff --git a/docs/llama-stack-spec.html b/docs/llama-stack-spec.html
index a7ab57343..c4b9790ae 100644
--- a/docs/llama-stack-spec.html
+++ b/docs/llama-stack-spec.html
@@ -21,7 +21,7 @@
"info": {
"title": "[DRAFT] Llama Stack Specification",
"version": "0.0.1",
- "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-09-18 19:27:39.955190"
+ "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-09-20 13:32:23.841908"
},
"servers": [
{
@@ -46,7 +46,17 @@
"tags": [
"BatchInference"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -76,7 +86,17 @@
"tags": [
"BatchInference"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -99,7 +119,17 @@
"tags": [
"Evaluations"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -122,7 +152,17 @@
"tags": [
"PostTraining"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -159,7 +199,17 @@
"tags": [
"Inference"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -196,7 +246,17 @@
"tags": [
"Inference"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -226,7 +286,17 @@
"tags": [
"Agents"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -256,7 +326,17 @@
"tags": [
"Agents"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -286,7 +366,17 @@
"tags": [
"Agents"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -309,7 +399,17 @@
"tags": [
"Datasets"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -339,7 +439,17 @@
"tags": [
"Memory"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -362,7 +472,17 @@
"tags": [
"Agents"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -385,7 +505,17 @@
"tags": [
"Agents"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -408,7 +538,17 @@
"tags": [
"Datasets"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -431,7 +571,17 @@
"tags": [
"Memory"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -461,7 +611,17 @@
"tags": [
"Memory"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -491,7 +651,17 @@
"tags": [
"Inference"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -521,7 +691,17 @@
"tags": [
"Evaluations"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -551,7 +731,17 @@
"tags": [
"Evaluations"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -581,7 +771,17 @@
"tags": [
"Evaluations"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -627,6 +827,15 @@
"schema": {
"type": "string"
}
+ },
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
}
],
"requestBody": {
@@ -682,6 +891,15 @@
"schema": {
"type": "string"
}
+ },
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
}
]
}
@@ -719,6 +937,15 @@
"schema": {
"type": "string"
}
+ },
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
}
]
}
@@ -748,6 +975,15 @@
"schema": {
"type": "string"
}
+ },
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
}
]
}
@@ -777,6 +1013,15 @@
"schema": {
"type": "string"
}
+ },
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
}
],
"requestBody": {
@@ -816,6 +1061,15 @@
"schema": {
"type": "string"
}
+ },
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
}
]
}
@@ -845,6 +1099,15 @@
"schema": {
"type": "string"
}
+ },
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
}
]
}
@@ -874,6 +1137,15 @@
"schema": {
"type": "string"
}
+ },
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
}
]
}
@@ -895,7 +1167,17 @@
"tags": [
"Evaluations"
],
- "parameters": []
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
}
},
"/memory_banks/get": {
@@ -930,6 +1212,15 @@
"schema": {
"type": "string"
}
+ },
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
}
]
}
@@ -959,6 +1250,15 @@
"schema": {
"type": "string"
}
+ },
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
}
]
}
@@ -988,6 +1288,15 @@
"schema": {
"type": "string"
}
+ },
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
}
]
}
@@ -1017,6 +1326,15 @@
"schema": {
"type": "string"
}
+ },
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
}
]
}
@@ -1046,6 +1364,15 @@
"schema": {
"type": "string"
}
+ },
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
}
]
}
@@ -1067,7 +1394,17 @@
"tags": [
"PostTraining"
],
- "parameters": []
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
}
},
"/memory_bank/insert": {
@@ -1080,7 +1417,17 @@
"tags": [
"Memory"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -1110,7 +1457,17 @@
"tags": [
"Memory"
],
- "parameters": []
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
}
},
"/telemetry/log_event": {
@@ -1123,7 +1480,17 @@
"tags": [
"Telemetry"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -1153,7 +1520,17 @@
"tags": [
"PostTraining"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -1183,7 +1560,17 @@
"tags": [
"Memory"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -1213,7 +1600,17 @@
"tags": [
"RewardScoring"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -1243,7 +1640,17 @@
"tags": [
"Safety"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -1273,7 +1680,17 @@
"tags": [
"PostTraining"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -1303,7 +1720,17 @@
"tags": [
"SyntheticDataGeneration"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -1326,7 +1753,17 @@
"tags": [
"Memory"
],
- "parameters": [],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
"requestBody": {
"content": {
"application/json": {
@@ -5155,31 +5592,31 @@
"name": "PostTraining"
},
{
- "name": "Inference"
- },
- {
- "name": "Safety"
- },
- {
- "name": "RewardScoring"
- },
- {
- "name": "Telemetry"
+ "name": "Memory"
},
{
"name": "Evaluations"
},
+ {
+ "name": "Datasets"
+ },
{
"name": "SyntheticDataGeneration"
},
{
- "name": "Memory"
+ "name": "RewardScoring"
},
{
"name": "Agents"
},
{
- "name": "Datasets"
+ "name": "Safety"
+ },
+ {
+ "name": "Inference"
+ },
+ {
+ "name": "Telemetry"
},
{
"name": "BuiltinTool",
diff --git a/docs/llama-stack-spec.yaml b/docs/llama-stack-spec.yaml
index 33d7d9a3a..171d7780c 100644
--- a/docs/llama-stack-spec.yaml
+++ b/docs/llama-stack-spec.yaml
@@ -2362,7 +2362,7 @@ info:
description: "This is the specification of the llama stack that provides\n \
\ a set of endpoints and their corresponding interfaces that are tailored\
\ to\n best leverage Llama Models. The specification is still in\
- \ draft and subject to change.\n Generated at 2024-09-18 19:27:39.955190"
+ \ draft and subject to change.\n Generated at 2024-09-20 13:32:23.841908"
title: '[DRAFT] Llama Stack Specification'
version: 0.0.1
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@@ -2370,7 +2370,14 @@ openapi: 3.1.0
paths:
/agents/create:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -2388,7 +2395,14 @@ paths:
- Agents
/agents/delete:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -2402,7 +2416,14 @@ paths:
- Agents
/agents/session/create:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -2420,7 +2441,14 @@ paths:
- Agents
/agents/session/delete:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -2445,6 +2473,13 @@ paths:
required: true
schema:
type: string
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -2478,6 +2513,13 @@ paths:
required: true
schema:
type: string
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
responses:
'200':
content:
@@ -2489,7 +2531,14 @@ paths:
- Agents
/agents/turn/create:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -2518,6 +2567,13 @@ paths:
required: true
schema:
type: string
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
responses:
'200':
content:
@@ -2529,7 +2585,14 @@ paths:
- Agents
/batch_inference/chat_completion:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -2547,7 +2610,14 @@ paths:
- BatchInference
/batch_inference/completion:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -2565,7 +2635,14 @@ paths:
- BatchInference
/datasets/create:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -2579,7 +2656,14 @@ paths:
- Datasets
/datasets/delete:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -2599,6 +2683,13 @@ paths:
required: true
schema:
type: string
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
responses:
'200':
content:
@@ -2616,6 +2707,13 @@ paths:
required: true
schema:
type: string
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
responses:
'200':
content:
@@ -2627,7 +2725,14 @@ paths:
- Evaluations
/evaluate/job/cancel:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -2647,6 +2752,13 @@ paths:
required: true
schema:
type: string
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
responses:
'200':
content:
@@ -2664,6 +2776,13 @@ paths:
required: true
schema:
type: string
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
responses:
'200':
content:
@@ -2675,7 +2794,14 @@ paths:
- Evaluations
/evaluate/jobs:
get:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
responses:
'200':
content:
@@ -2687,7 +2813,14 @@ paths:
- Evaluations
/evaluate/question_answering/:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -2705,7 +2838,14 @@ paths:
- Evaluations
/evaluate/summarization/:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -2723,7 +2863,14 @@ paths:
- Evaluations
/evaluate/text_generation/:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -2741,7 +2888,14 @@ paths:
- Evaluations
/inference/chat_completion:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -2761,7 +2915,14 @@ paths:
- Inference
/inference/completion:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -2781,7 +2942,14 @@ paths:
- Inference
/inference/embeddings:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -2799,7 +2967,14 @@ paths:
- Inference
/memory_bank/documents/delete:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -2819,6 +2994,13 @@ paths:
required: true
schema:
type: string
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -2836,7 +3018,14 @@ paths:
- Memory
/memory_bank/insert:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -2850,7 +3039,14 @@ paths:
- Memory
/memory_bank/query:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -2868,7 +3064,14 @@ paths:
- Memory
/memory_bank/update:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -2882,7 +3085,14 @@ paths:
- Memory
/memory_banks/create:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -2900,7 +3110,14 @@ paths:
- Memory
/memory_banks/drop:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -2924,6 +3141,13 @@ paths:
required: true
schema:
type: string
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
responses:
'200':
content:
@@ -2937,7 +3161,14 @@ paths:
- Memory
/memory_banks/list:
get:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
responses:
'200':
content:
@@ -2955,6 +3186,13 @@ paths:
required: true
schema:
type: string
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
responses:
'200':
content:
@@ -2966,7 +3204,14 @@ paths:
- PostTraining
/post_training/job/cancel:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -2986,6 +3231,13 @@ paths:
required: true
schema:
type: string
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
responses:
'200':
content:
@@ -3003,6 +3255,13 @@ paths:
required: true
schema:
type: string
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
responses:
'200':
content:
@@ -3014,7 +3273,14 @@ paths:
- PostTraining
/post_training/jobs:
get:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
responses:
'200':
content:
@@ -3026,7 +3292,14 @@ paths:
- PostTraining
/post_training/preference_optimize:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -3044,7 +3317,14 @@ paths:
- PostTraining
/post_training/supervised_fine_tune:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -3062,7 +3342,14 @@ paths:
- PostTraining
/reward_scoring/score:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -3080,7 +3367,14 @@ paths:
- RewardScoring
/safety/run_shields:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -3098,7 +3392,14 @@ paths:
- Safety
/synthetic_data_generation/generate:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -3122,6 +3423,13 @@ paths:
required: true
schema:
type: string
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
responses:
'200':
content:
@@ -3133,7 +3441,14 @@ paths:
- Telemetry
/telemetry/log_event:
post:
- parameters: []
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
requestBody:
content:
application/json:
@@ -3152,15 +3467,15 @@ servers:
tags:
- name: BatchInference
- name: PostTraining
-- name: Inference
-- name: Safety
-- name: RewardScoring
-- name: Telemetry
-- name: Evaluations
-- name: SyntheticDataGeneration
- name: Memory
-- name: Agents
+- name: Evaluations
- name: Datasets
+- name: SyntheticDataGeneration
+- name: RewardScoring
+- name: Agents
+- name: Safety
+- name: Inference
+- name: Telemetry
- description:
name: BuiltinTool
- description: Optional[str]:
+ if self.adapter:
+ return self.adapter.provider_data_validator
+ return None
+
# Can avoid this by using Pydantic computed_field
def remote_provider_spec(
diff --git a/llama_stack/distribution/request_headers.py b/llama_stack/distribution/request_headers.py
new file mode 100644
index 000000000..5a4fb19a0
--- /dev/null
+++ b/llama_stack/distribution/request_headers.py
@@ -0,0 +1,49 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import json
+import threading
+from typing import Any, Dict, Optional
+
+from .utils.dynamic import instantiate_class_type
+
+_THREAD_LOCAL = threading.local()
+
+
+def get_request_provider_data() -> Any:
+ return getattr(_THREAD_LOCAL, "provider_data", None)
+
+
+def set_request_provider_data(headers: Dict[str, str], validator_class: Optional[str]):
+ if not validator_class:
+ return
+
+ keys = [
+ "X-LlamaStack-ProviderData",
+ "x-llamastack-providerdata",
+ ]
+ for key in keys:
+ val = headers.get(key, None)
+ if val:
+ break
+
+ if not val:
+ return
+
+ try:
+ val = json.loads(val)
+ except json.JSONDecodeError:
+ print("Provider data not encoded as a JSON object!", val)
+ return
+
+ validator = instantiate_class_type(validator_class)
+ try:
+ provider_data = validator(**val)
+ except Exception as e:
+ print("Error parsing provider data", e)
+ return
+
+ _THREAD_LOCAL.provider_data = provider_data
diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py
index 86036798f..645e5ed34 100644
--- a/llama_stack/distribution/server/server.py
+++ b/llama_stack/distribution/server/server.py
@@ -49,6 +49,7 @@ from typing_extensions import Annotated
from llama_stack.distribution.datatypes import * # noqa: F403
from llama_stack.distribution.distribution import api_endpoints, api_providers
+from llama_stack.distribution.request_headers import set_request_provider_data
from llama_stack.distribution.utils.dynamic import instantiate_provider
@@ -177,9 +178,9 @@ def create_dynamic_passthrough(
return endpoint
-def create_dynamic_typed_route(func: Any, method: str):
- cprint(f"> create_dynamic_typed_route func={func}", "red")
- cprint(f"> create_dynamic_typed_route method={method}", "red")
+def create_dynamic_typed_route(
+ func: Any, method: str, provider_data_validator: Optional[str]
+):
hints = get_type_hints(func)
response_model = hints.get("return")
@@ -191,9 +192,11 @@ def create_dynamic_typed_route(func: Any, method: str):
if is_streaming:
- async def endpoint(**kwargs):
+ async def endpoint(request: Request, **kwargs):
await start_trace(func.__name__)
+ set_request_provider_data(request.headers, provider_data_validator)
+
async def sse_generator(event_gen):
try:
async for item in event_gen:
@@ -220,8 +223,11 @@ def create_dynamic_typed_route(func: Any, method: str):
else:
- async def endpoint(**kwargs):
+ async def endpoint(request: Request, **kwargs):
await start_trace(func.__name__)
+
+ set_request_provider_data(request.headers, provider_data_validator)
+
try:
return (
await func(**kwargs)
@@ -235,20 +241,23 @@ def create_dynamic_typed_route(func: Any, method: str):
await end_trace()
sig = inspect.signature(func)
+ new_params = [
+ inspect.Parameter(
+ "request", inspect.Parameter.POSITIONAL_OR_KEYWORD, annotation=Request
+ )
+ ]
+ new_params.extend(sig.parameters.values())
+
if method == "post":
# make sure every parameter is annotated with Body() so FASTAPI doesn't
# do anything too intelligent and ask for some parameters in the query
# and some in the body
- endpoint.__signature__ = sig.replace(
- parameters=[
- param.replace(
- annotation=Annotated[param.annotation, Body(..., embed=True)]
- )
- for param in sig.parameters.values()
- ]
- )
- else:
- endpoint.__signature__ = sig
+ new_params = [new_params[0]] + [
+ param.replace(annotation=Annotated[param.annotation, Body(..., embed=True)])
+ for param in new_params[1:]
+ ]
+
+ endpoint.__signature__ = sig.replace(parameters=new_params)
return endpoint
@@ -420,7 +429,11 @@ def run_main_DEPRECATED(
impl_method = getattr(impl, endpoint.name)
getattr(app, endpoint.method)(endpoint.route, response_model=None)(
- create_dynamic_typed_route(impl_method, endpoint.method)
+ create_dynamic_typed_route(
+ impl_method,
+ endpoint.method,
+ provider_spec.provider_data_validator,
+ )
)
for route in app.routes:
diff --git a/llama_stack/providers/adapters/inference/together/__init__.py b/llama_stack/providers/adapters/inference/together/__init__.py
index 05ea91e58..c964ddffb 100644
--- a/llama_stack/providers/adapters/inference/together/__init__.py
+++ b/llama_stack/providers/adapters/inference/together/__init__.py
@@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
-from .config import TogetherImplConfig
+from .config import TogetherImplConfig, TogetherHeaderExtractor
async def get_adapter_impl(config: TogetherImplConfig, _deps):
diff --git a/llama_stack/providers/adapters/inference/together/config.py b/llama_stack/providers/adapters/inference/together/config.py
index 03ee047d2..c58f722bc 100644
--- a/llama_stack/providers/adapters/inference/together/config.py
+++ b/llama_stack/providers/adapters/inference/together/config.py
@@ -4,9 +4,18 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
-from llama_models.schema_utils import json_schema_type
from pydantic import BaseModel, Field
+from llama_models.schema_utils import json_schema_type
+
+from llama_stack.distribution.request_headers import annotate_header
+
+
+class TogetherHeaderExtractor(BaseModel):
+ api_key: annotate_header(
+ "X-LlamaStack-Together-ApiKey", str, "The API Key for the request"
+ )
+
@json_schema_type
class TogetherImplConfig(BaseModel):
diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py
index 2fa8c98dc..10b3d6ccc 100644
--- a/llama_stack/providers/registry/inference.py
+++ b/llama_stack/providers/registry/inference.py
@@ -63,6 +63,7 @@ def available_providers() -> List[ProviderSpec]:
],
module="llama_stack.providers.adapters.inference.together",
config_class="llama_stack.providers.adapters.inference.together.TogetherImplConfig",
+ header_extractor_class="llama_stack.providers.adapters.inference.together.TogetherHeaderExtractor",
),
),
]
diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py
index 1e7a01b12..929c91bda 100644
--- a/llama_stack/providers/utils/memory/vector_store.py
+++ b/llama_stack/providers/utils/memory/vector_store.py
@@ -16,6 +16,7 @@ import httpx
import numpy as np
from numpy.typing import NDArray
from pypdf import PdfReader
+from termcolor import cprint
from llama_models.llama3.api.datatypes import * # noqa: F403
from llama_models.llama3.api.tokenizer import Tokenizer
@@ -160,6 +161,8 @@ class BankWithIndex:
self.bank.config.overlap_size_in_tokens
or (self.bank.config.chunk_size_in_tokens // 4),
)
+ if not chunks:
+ continue
embeddings = model.encode([x.content for x in chunks]).astype(np.float32)
await self.index.add_chunks(chunks, embeddings)