From 32beecb20d48d4efc446216e8db83404e8c4607f Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Wed, 18 Sep 2024 09:17:59 -0700 Subject: [PATCH] Add a special header per-client call to parser provider data --- docs/llama-stack-spec.html | 527 ++++++++++++++++-- docs/llama-stack-spec.yaml | 395 +++++++++++-- docs/openapi_generator/pyopenapi/generator.py | 9 + .../run_openapi_generator.sh | 5 +- llama_stack/distribution/datatypes.py | 12 + llama_stack/distribution/request_headers.py | 49 ++ llama_stack/distribution/server/server.py | 45 +- .../adapters/inference/together/__init__.py | 2 +- .../adapters/inference/together/config.py | 11 +- llama_stack/providers/registry/inference.py | 1 + .../providers/utils/memory/vector_store.py | 3 + 11 files changed, 955 insertions(+), 104 deletions(-) create mode 100644 llama_stack/distribution/request_headers.py diff --git a/docs/llama-stack-spec.html b/docs/llama-stack-spec.html index a7ab57343..c4b9790ae 100644 --- a/docs/llama-stack-spec.html +++ b/docs/llama-stack-spec.html @@ -21,7 +21,7 @@ "info": { "title": "[DRAFT] Llama Stack Specification", "version": "0.0.1", - "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-09-18 19:27:39.955190" + "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-09-20 13:32:23.841908" }, "servers": [ { @@ -46,7 +46,17 @@ "tags": [ "BatchInference" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -76,7 +86,17 @@ "tags": [ "BatchInference" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -99,7 +119,17 @@ "tags": [ "Evaluations" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -122,7 +152,17 @@ "tags": [ "PostTraining" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -159,7 +199,17 @@ "tags": [ "Inference" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -196,7 +246,17 @@ "tags": [ "Inference" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -226,7 +286,17 @@ "tags": [ "Agents" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -256,7 +326,17 @@ "tags": [ "Agents" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -286,7 +366,17 @@ "tags": [ "Agents" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -309,7 +399,17 @@ "tags": [ "Datasets" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -339,7 +439,17 @@ "tags": [ "Memory" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -362,7 +472,17 @@ "tags": [ "Agents" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -385,7 +505,17 @@ "tags": [ "Agents" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -408,7 +538,17 @@ "tags": [ "Datasets" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -431,7 +571,17 @@ "tags": [ "Memory" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -461,7 +611,17 @@ "tags": [ "Memory" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -491,7 +651,17 @@ "tags": [ "Inference" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -521,7 +691,17 @@ "tags": [ "Evaluations" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -551,7 +731,17 @@ "tags": [ "Evaluations" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -581,7 +771,17 @@ "tags": [ "Evaluations" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -627,6 +827,15 @@ "schema": { "type": "string" } + }, + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } } ], "requestBody": { @@ -682,6 +891,15 @@ "schema": { "type": "string" } + }, + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } } ] } @@ -719,6 +937,15 @@ "schema": { "type": "string" } + }, + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } } ] } @@ -748,6 +975,15 @@ "schema": { "type": "string" } + }, + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } } ] } @@ -777,6 +1013,15 @@ "schema": { "type": "string" } + }, + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } } ], "requestBody": { @@ -816,6 +1061,15 @@ "schema": { "type": "string" } + }, + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } } ] } @@ -845,6 +1099,15 @@ "schema": { "type": "string" } + }, + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } } ] } @@ -874,6 +1137,15 @@ "schema": { "type": "string" } + }, + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } } ] } @@ -895,7 +1167,17 @@ "tags": [ "Evaluations" ], - "parameters": [] + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ] } }, "/memory_banks/get": { @@ -930,6 +1212,15 @@ "schema": { "type": "string" } + }, + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } } ] } @@ -959,6 +1250,15 @@ "schema": { "type": "string" } + }, + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } } ] } @@ -988,6 +1288,15 @@ "schema": { "type": "string" } + }, + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } } ] } @@ -1017,6 +1326,15 @@ "schema": { "type": "string" } + }, + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } } ] } @@ -1046,6 +1364,15 @@ "schema": { "type": "string" } + }, + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } } ] } @@ -1067,7 +1394,17 @@ "tags": [ "PostTraining" ], - "parameters": [] + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ] } }, "/memory_bank/insert": { @@ -1080,7 +1417,17 @@ "tags": [ "Memory" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -1110,7 +1457,17 @@ "tags": [ "Memory" ], - "parameters": [] + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ] } }, "/telemetry/log_event": { @@ -1123,7 +1480,17 @@ "tags": [ "Telemetry" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -1153,7 +1520,17 @@ "tags": [ "PostTraining" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -1183,7 +1560,17 @@ "tags": [ "Memory" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -1213,7 +1600,17 @@ "tags": [ "RewardScoring" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -1243,7 +1640,17 @@ "tags": [ "Safety" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -1273,7 +1680,17 @@ "tags": [ "PostTraining" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -1303,7 +1720,17 @@ "tags": [ "SyntheticDataGeneration" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -1326,7 +1753,17 @@ "tags": [ "Memory" ], - "parameters": [], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -5155,31 +5592,31 @@ "name": "PostTraining" }, { - "name": "Inference" - }, - { - "name": "Safety" - }, - { - "name": "RewardScoring" - }, - { - "name": "Telemetry" + "name": "Memory" }, { "name": "Evaluations" }, + { + "name": "Datasets" + }, { "name": "SyntheticDataGeneration" }, { - "name": "Memory" + "name": "RewardScoring" }, { "name": "Agents" }, { - "name": "Datasets" + "name": "Safety" + }, + { + "name": "Inference" + }, + { + "name": "Telemetry" }, { "name": "BuiltinTool", diff --git a/docs/llama-stack-spec.yaml b/docs/llama-stack-spec.yaml index 33d7d9a3a..171d7780c 100644 --- a/docs/llama-stack-spec.yaml +++ b/docs/llama-stack-spec.yaml @@ -2362,7 +2362,7 @@ info: description: "This is the specification of the llama stack that provides\n \ \ a set of endpoints and their corresponding interfaces that are tailored\ \ to\n best leverage Llama Models. The specification is still in\ - \ draft and subject to change.\n Generated at 2024-09-18 19:27:39.955190" + \ draft and subject to change.\n Generated at 2024-09-20 13:32:23.841908" title: '[DRAFT] Llama Stack Specification' version: 0.0.1 jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema @@ -2370,7 +2370,14 @@ openapi: 3.1.0 paths: /agents/create: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -2388,7 +2395,14 @@ paths: - Agents /agents/delete: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -2402,7 +2416,14 @@ paths: - Agents /agents/session/create: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -2420,7 +2441,14 @@ paths: - Agents /agents/session/delete: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -2445,6 +2473,13 @@ paths: required: true schema: type: string + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -2478,6 +2513,13 @@ paths: required: true schema: type: string + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string responses: '200': content: @@ -2489,7 +2531,14 @@ paths: - Agents /agents/turn/create: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -2518,6 +2567,13 @@ paths: required: true schema: type: string + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string responses: '200': content: @@ -2529,7 +2585,14 @@ paths: - Agents /batch_inference/chat_completion: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -2547,7 +2610,14 @@ paths: - BatchInference /batch_inference/completion: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -2565,7 +2635,14 @@ paths: - BatchInference /datasets/create: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -2579,7 +2656,14 @@ paths: - Datasets /datasets/delete: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -2599,6 +2683,13 @@ paths: required: true schema: type: string + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string responses: '200': content: @@ -2616,6 +2707,13 @@ paths: required: true schema: type: string + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string responses: '200': content: @@ -2627,7 +2725,14 @@ paths: - Evaluations /evaluate/job/cancel: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -2647,6 +2752,13 @@ paths: required: true schema: type: string + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string responses: '200': content: @@ -2664,6 +2776,13 @@ paths: required: true schema: type: string + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string responses: '200': content: @@ -2675,7 +2794,14 @@ paths: - Evaluations /evaluate/jobs: get: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string responses: '200': content: @@ -2687,7 +2813,14 @@ paths: - Evaluations /evaluate/question_answering/: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -2705,7 +2838,14 @@ paths: - Evaluations /evaluate/summarization/: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -2723,7 +2863,14 @@ paths: - Evaluations /evaluate/text_generation/: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -2741,7 +2888,14 @@ paths: - Evaluations /inference/chat_completion: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -2761,7 +2915,14 @@ paths: - Inference /inference/completion: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -2781,7 +2942,14 @@ paths: - Inference /inference/embeddings: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -2799,7 +2967,14 @@ paths: - Inference /memory_bank/documents/delete: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -2819,6 +2994,13 @@ paths: required: true schema: type: string + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -2836,7 +3018,14 @@ paths: - Memory /memory_bank/insert: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -2850,7 +3039,14 @@ paths: - Memory /memory_bank/query: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -2868,7 +3064,14 @@ paths: - Memory /memory_bank/update: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -2882,7 +3085,14 @@ paths: - Memory /memory_banks/create: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -2900,7 +3110,14 @@ paths: - Memory /memory_banks/drop: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -2924,6 +3141,13 @@ paths: required: true schema: type: string + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string responses: '200': content: @@ -2937,7 +3161,14 @@ paths: - Memory /memory_banks/list: get: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string responses: '200': content: @@ -2955,6 +3186,13 @@ paths: required: true schema: type: string + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string responses: '200': content: @@ -2966,7 +3204,14 @@ paths: - PostTraining /post_training/job/cancel: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -2986,6 +3231,13 @@ paths: required: true schema: type: string + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string responses: '200': content: @@ -3003,6 +3255,13 @@ paths: required: true schema: type: string + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string responses: '200': content: @@ -3014,7 +3273,14 @@ paths: - PostTraining /post_training/jobs: get: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string responses: '200': content: @@ -3026,7 +3292,14 @@ paths: - PostTraining /post_training/preference_optimize: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -3044,7 +3317,14 @@ paths: - PostTraining /post_training/supervised_fine_tune: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -3062,7 +3342,14 @@ paths: - PostTraining /reward_scoring/score: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -3080,7 +3367,14 @@ paths: - RewardScoring /safety/run_shields: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -3098,7 +3392,14 @@ paths: - Safety /synthetic_data_generation/generate: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -3122,6 +3423,13 @@ paths: required: true schema: type: string + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string responses: '200': content: @@ -3133,7 +3441,14 @@ paths: - Telemetry /telemetry/log_event: post: - parameters: [] + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string requestBody: content: application/json: @@ -3152,15 +3467,15 @@ servers: tags: - name: BatchInference - name: PostTraining -- name: Inference -- name: Safety -- name: RewardScoring -- name: Telemetry -- name: Evaluations -- name: SyntheticDataGeneration - name: Memory -- name: Agents +- name: Evaluations - name: Datasets +- name: SyntheticDataGeneration +- name: RewardScoring +- name: Agents +- name: Safety +- name: Inference +- name: Telemetry - description: name: BuiltinTool - description: Optional[str]: + if self.adapter: + return self.adapter.provider_data_validator + return None + # Can avoid this by using Pydantic computed_field def remote_provider_spec( diff --git a/llama_stack/distribution/request_headers.py b/llama_stack/distribution/request_headers.py new file mode 100644 index 000000000..5a4fb19a0 --- /dev/null +++ b/llama_stack/distribution/request_headers.py @@ -0,0 +1,49 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import json +import threading +from typing import Any, Dict, Optional + +from .utils.dynamic import instantiate_class_type + +_THREAD_LOCAL = threading.local() + + +def get_request_provider_data() -> Any: + return getattr(_THREAD_LOCAL, "provider_data", None) + + +def set_request_provider_data(headers: Dict[str, str], validator_class: Optional[str]): + if not validator_class: + return + + keys = [ + "X-LlamaStack-ProviderData", + "x-llamastack-providerdata", + ] + for key in keys: + val = headers.get(key, None) + if val: + break + + if not val: + return + + try: + val = json.loads(val) + except json.JSONDecodeError: + print("Provider data not encoded as a JSON object!", val) + return + + validator = instantiate_class_type(validator_class) + try: + provider_data = validator(**val) + except Exception as e: + print("Error parsing provider data", e) + return + + _THREAD_LOCAL.provider_data = provider_data diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py index 86036798f..645e5ed34 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/distribution/server/server.py @@ -49,6 +49,7 @@ from typing_extensions import Annotated from llama_stack.distribution.datatypes import * # noqa: F403 from llama_stack.distribution.distribution import api_endpoints, api_providers +from llama_stack.distribution.request_headers import set_request_provider_data from llama_stack.distribution.utils.dynamic import instantiate_provider @@ -177,9 +178,9 @@ def create_dynamic_passthrough( return endpoint -def create_dynamic_typed_route(func: Any, method: str): - cprint(f"> create_dynamic_typed_route func={func}", "red") - cprint(f"> create_dynamic_typed_route method={method}", "red") +def create_dynamic_typed_route( + func: Any, method: str, provider_data_validator: Optional[str] +): hints = get_type_hints(func) response_model = hints.get("return") @@ -191,9 +192,11 @@ def create_dynamic_typed_route(func: Any, method: str): if is_streaming: - async def endpoint(**kwargs): + async def endpoint(request: Request, **kwargs): await start_trace(func.__name__) + set_request_provider_data(request.headers, provider_data_validator) + async def sse_generator(event_gen): try: async for item in event_gen: @@ -220,8 +223,11 @@ def create_dynamic_typed_route(func: Any, method: str): else: - async def endpoint(**kwargs): + async def endpoint(request: Request, **kwargs): await start_trace(func.__name__) + + set_request_provider_data(request.headers, provider_data_validator) + try: return ( await func(**kwargs) @@ -235,20 +241,23 @@ def create_dynamic_typed_route(func: Any, method: str): await end_trace() sig = inspect.signature(func) + new_params = [ + inspect.Parameter( + "request", inspect.Parameter.POSITIONAL_OR_KEYWORD, annotation=Request + ) + ] + new_params.extend(sig.parameters.values()) + if method == "post": # make sure every parameter is annotated with Body() so FASTAPI doesn't # do anything too intelligent and ask for some parameters in the query # and some in the body - endpoint.__signature__ = sig.replace( - parameters=[ - param.replace( - annotation=Annotated[param.annotation, Body(..., embed=True)] - ) - for param in sig.parameters.values() - ] - ) - else: - endpoint.__signature__ = sig + new_params = [new_params[0]] + [ + param.replace(annotation=Annotated[param.annotation, Body(..., embed=True)]) + for param in new_params[1:] + ] + + endpoint.__signature__ = sig.replace(parameters=new_params) return endpoint @@ -420,7 +429,11 @@ def run_main_DEPRECATED( impl_method = getattr(impl, endpoint.name) getattr(app, endpoint.method)(endpoint.route, response_model=None)( - create_dynamic_typed_route(impl_method, endpoint.method) + create_dynamic_typed_route( + impl_method, + endpoint.method, + provider_spec.provider_data_validator, + ) ) for route in app.routes: diff --git a/llama_stack/providers/adapters/inference/together/__init__.py b/llama_stack/providers/adapters/inference/together/__init__.py index 05ea91e58..c964ddffb 100644 --- a/llama_stack/providers/adapters/inference/together/__init__.py +++ b/llama_stack/providers/adapters/inference/together/__init__.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .config import TogetherImplConfig +from .config import TogetherImplConfig, TogetherHeaderExtractor async def get_adapter_impl(config: TogetherImplConfig, _deps): diff --git a/llama_stack/providers/adapters/inference/together/config.py b/llama_stack/providers/adapters/inference/together/config.py index 03ee047d2..c58f722bc 100644 --- a/llama_stack/providers/adapters/inference/together/config.py +++ b/llama_stack/providers/adapters/inference/together/config.py @@ -4,9 +4,18 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_models.schema_utils import json_schema_type from pydantic import BaseModel, Field +from llama_models.schema_utils import json_schema_type + +from llama_stack.distribution.request_headers import annotate_header + + +class TogetherHeaderExtractor(BaseModel): + api_key: annotate_header( + "X-LlamaStack-Together-ApiKey", str, "The API Key for the request" + ) + @json_schema_type class TogetherImplConfig(BaseModel): diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py index 2fa8c98dc..10b3d6ccc 100644 --- a/llama_stack/providers/registry/inference.py +++ b/llama_stack/providers/registry/inference.py @@ -63,6 +63,7 @@ def available_providers() -> List[ProviderSpec]: ], module="llama_stack.providers.adapters.inference.together", config_class="llama_stack.providers.adapters.inference.together.TogetherImplConfig", + header_extractor_class="llama_stack.providers.adapters.inference.together.TogetherHeaderExtractor", ), ), ] diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py index 1e7a01b12..929c91bda 100644 --- a/llama_stack/providers/utils/memory/vector_store.py +++ b/llama_stack/providers/utils/memory/vector_store.py @@ -16,6 +16,7 @@ import httpx import numpy as np from numpy.typing import NDArray from pypdf import PdfReader +from termcolor import cprint from llama_models.llama3.api.datatypes import * # noqa: F403 from llama_models.llama3.api.tokenizer import Tokenizer @@ -160,6 +161,8 @@ class BankWithIndex: self.bank.config.overlap_size_in_tokens or (self.bank.config.chunk_size_in_tokens // 4), ) + if not chunks: + continue embeddings = model.encode([x.content for x in chunks]).astype(np.float32) await self.index.add_chunks(chunks, embeddings)