mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-29 15:23:51 +00:00
[api_updates_3] fix CLI for routing_table, bug fixes for memory & safety (#90)
* fix llama stack build * fix configure * fix configure for simple case * configure w/ routing * move examples config * fix memory router naming * issue w/ safety * fix config w/ safety * update memory endpoints * allow providers in api_providers * configure script works * all endpoints w/ build->configure->run simple local works * new example run.yaml * run openapi generator
This commit is contained in:
parent
8cf634e615
commit
ddebf9b6e7
18 changed files with 725 additions and 605 deletions
|
@ -21,7 +21,7 @@
|
||||||
"info": {
|
"info": {
|
||||||
"title": "[DRAFT] Llama Stack Specification",
|
"title": "[DRAFT] Llama Stack Specification",
|
||||||
"version": "0.0.1",
|
"version": "0.0.1",
|
||||||
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-09-20 14:53:17.090953"
|
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-09-23 01:08:55.758597"
|
||||||
},
|
},
|
||||||
"servers": [
|
"servers": [
|
||||||
{
|
{
|
||||||
|
@ -422,7 +422,7 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/memory_banks/create": {
|
"/memory/create": {
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
|
@ -561,7 +561,7 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/memory_bank/documents/delete": {
|
"/memory/documents/delete": {
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
|
@ -594,7 +594,7 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/memory_banks/drop": {
|
"/memory/drop": {
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
|
@ -988,7 +988,7 @@
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/memory_bank/documents/get": {
|
"/memory/documents/get": {
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
|
@ -1180,7 +1180,7 @@
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/memory_banks/get": {
|
"/memory/get": {
|
||||||
"get": {
|
"get": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
|
@ -1407,7 +1407,7 @@
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/memory_bank/insert": {
|
"/memory/insert": {
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
|
@ -1440,7 +1440,7 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/memory_banks/list": {
|
"/memory/list": {
|
||||||
"get": {
|
"get": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
|
@ -1543,7 +1543,7 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/memory_bank/query": {
|
"/memory/query": {
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
|
@ -1743,7 +1743,7 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/memory_bank/update": {
|
"/memory/update": {
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
|
@ -2584,183 +2584,7 @@
|
||||||
"$ref": "#/components/schemas/FunctionCallToolDefinition"
|
"$ref": "#/components/schemas/FunctionCallToolDefinition"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "object",
|
"$ref": "#/components/schemas/MemoryToolDefinition"
|
||||||
"properties": {
|
|
||||||
"input_shields": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"output_shields": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"type": {
|
|
||||||
"type": "string",
|
|
||||||
"const": "memory"
|
|
||||||
},
|
|
||||||
"memory_bank_configs": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"oneOf": [
|
|
||||||
{
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"bank_id": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"type": {
|
|
||||||
"type": "string",
|
|
||||||
"const": "vector"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"bank_id",
|
|
||||||
"type"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"bank_id": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"type": {
|
|
||||||
"type": "string",
|
|
||||||
"const": "keyvalue"
|
|
||||||
},
|
|
||||||
"keys": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"bank_id",
|
|
||||||
"type",
|
|
||||||
"keys"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"bank_id": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"type": {
|
|
||||||
"type": "string",
|
|
||||||
"const": "keyword"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"bank_id",
|
|
||||||
"type"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"bank_id": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"type": {
|
|
||||||
"type": "string",
|
|
||||||
"const": "graph"
|
|
||||||
},
|
|
||||||
"entities": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"bank_id",
|
|
||||||
"type",
|
|
||||||
"entities"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"query_generator_config": {
|
|
||||||
"oneOf": [
|
|
||||||
{
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"type": {
|
|
||||||
"type": "string",
|
|
||||||
"const": "default"
|
|
||||||
},
|
|
||||||
"sep": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"type",
|
|
||||||
"sep"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"type": {
|
|
||||||
"type": "string",
|
|
||||||
"const": "llm"
|
|
||||||
},
|
|
||||||
"model": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"template": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"type",
|
|
||||||
"model",
|
|
||||||
"template"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"type": {
|
|
||||||
"type": "string",
|
|
||||||
"const": "custom"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"type"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"max_tokens_in_context": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"max_chunks": {
|
|
||||||
"type": "integer"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"type",
|
|
||||||
"memory_bank_configs",
|
|
||||||
"query_generator_config",
|
|
||||||
"max_tokens_in_context",
|
|
||||||
"max_chunks"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
@ -2771,17 +2595,25 @@
|
||||||
"tool_prompt_format": {
|
"tool_prompt_format": {
|
||||||
"$ref": "#/components/schemas/ToolPromptFormat"
|
"$ref": "#/components/schemas/ToolPromptFormat"
|
||||||
},
|
},
|
||||||
|
"max_infer_iters": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
"model": {
|
"model": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
"instructions": {
|
"instructions": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
},
|
||||||
|
"enable_session_persistence": {
|
||||||
|
"type": "boolean"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
|
"max_infer_iters",
|
||||||
"model",
|
"model",
|
||||||
"instructions"
|
"instructions",
|
||||||
|
"enable_session_persistence"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"CodeInterpreterToolDefinition": {
|
"CodeInterpreterToolDefinition": {
|
||||||
|
@ -2859,6 +2691,185 @@
|
||||||
"parameters"
|
"parameters"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
"MemoryToolDefinition": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"input_shields": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"output_shields": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "memory"
|
||||||
|
},
|
||||||
|
"memory_bank_configs": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"bank_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "vector"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"bank_id",
|
||||||
|
"type"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"bank_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "keyvalue"
|
||||||
|
},
|
||||||
|
"keys": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"bank_id",
|
||||||
|
"type",
|
||||||
|
"keys"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"bank_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "keyword"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"bank_id",
|
||||||
|
"type"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"bank_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "graph"
|
||||||
|
},
|
||||||
|
"entities": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"bank_id",
|
||||||
|
"type",
|
||||||
|
"entities"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"query_generator_config": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "default"
|
||||||
|
},
|
||||||
|
"sep": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"type",
|
||||||
|
"sep"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "llm"
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"template": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"type",
|
||||||
|
"model",
|
||||||
|
"template"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "custom"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"type"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"max_tokens_in_context": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"max_chunks": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"type",
|
||||||
|
"memory_bank_configs",
|
||||||
|
"query_generator_config",
|
||||||
|
"max_tokens_in_context",
|
||||||
|
"max_chunks"
|
||||||
|
]
|
||||||
|
},
|
||||||
"PhotogenToolDefinition": {
|
"PhotogenToolDefinition": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
@ -5569,31 +5580,28 @@
|
||||||
],
|
],
|
||||||
"tags": [
|
"tags": [
|
||||||
{
|
{
|
||||||
"name": "Agents"
|
"name": "PostTraining"
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "RewardScoring"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Evaluations"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Safety"
|
"name": "Safety"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Telemetry"
|
"name": "SyntheticDataGeneration"
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "PostTraining"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Datasets"
|
"name": "Datasets"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Inference"
|
"name": "Telemetry"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "SyntheticDataGeneration"
|
"name": "Evaluations"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "RewardScoring"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Agents"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Memory"
|
"name": "Memory"
|
||||||
|
@ -5601,6 +5609,9 @@
|
||||||
{
|
{
|
||||||
"name": "BatchInference"
|
"name": "BatchInference"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "Inference"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "BuiltinTool",
|
"name": "BuiltinTool",
|
||||||
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/BuiltinTool\" />"
|
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/BuiltinTool\" />"
|
||||||
|
@ -5733,6 +5744,10 @@
|
||||||
"name": "FunctionCallToolDefinition",
|
"name": "FunctionCallToolDefinition",
|
||||||
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/FunctionCallToolDefinition\" />"
|
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/FunctionCallToolDefinition\" />"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "MemoryToolDefinition",
|
||||||
|
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/MemoryToolDefinition\" />"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "PhotogenToolDefinition",
|
"name": "PhotogenToolDefinition",
|
||||||
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/PhotogenToolDefinition\" />"
|
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/PhotogenToolDefinition\" />"
|
||||||
|
@ -6174,6 +6189,7 @@
|
||||||
"MemoryBank",
|
"MemoryBank",
|
||||||
"MemoryBankDocument",
|
"MemoryBankDocument",
|
||||||
"MemoryRetrievalStep",
|
"MemoryRetrievalStep",
|
||||||
|
"MemoryToolDefinition",
|
||||||
"MetricEvent",
|
"MetricEvent",
|
||||||
"OptimizerConfig",
|
"OptimizerConfig",
|
||||||
"PhotogenToolDefinition",
|
"PhotogenToolDefinition",
|
||||||
|
|
|
@ -4,12 +4,16 @@ components:
|
||||||
AgentConfig:
|
AgentConfig:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
properties:
|
properties:
|
||||||
|
enable_session_persistence:
|
||||||
|
type: boolean
|
||||||
input_shields:
|
input_shields:
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
type: array
|
type: array
|
||||||
instructions:
|
instructions:
|
||||||
type: string
|
type: string
|
||||||
|
max_infer_iters:
|
||||||
|
type: integer
|
||||||
model:
|
model:
|
||||||
type: string
|
type: string
|
||||||
output_shields:
|
output_shields:
|
||||||
|
@ -30,127 +34,13 @@ components:
|
||||||
- $ref: '#/components/schemas/PhotogenToolDefinition'
|
- $ref: '#/components/schemas/PhotogenToolDefinition'
|
||||||
- $ref: '#/components/schemas/CodeInterpreterToolDefinition'
|
- $ref: '#/components/schemas/CodeInterpreterToolDefinition'
|
||||||
- $ref: '#/components/schemas/FunctionCallToolDefinition'
|
- $ref: '#/components/schemas/FunctionCallToolDefinition'
|
||||||
- additionalProperties: false
|
- $ref: '#/components/schemas/MemoryToolDefinition'
|
||||||
properties:
|
|
||||||
input_shields:
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
max_chunks:
|
|
||||||
type: integer
|
|
||||||
max_tokens_in_context:
|
|
||||||
type: integer
|
|
||||||
memory_bank_configs:
|
|
||||||
items:
|
|
||||||
oneOf:
|
|
||||||
- additionalProperties: false
|
|
||||||
properties:
|
|
||||||
bank_id:
|
|
||||||
type: string
|
|
||||||
type:
|
|
||||||
const: vector
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- bank_id
|
|
||||||
- type
|
|
||||||
type: object
|
|
||||||
- additionalProperties: false
|
|
||||||
properties:
|
|
||||||
bank_id:
|
|
||||||
type: string
|
|
||||||
keys:
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
type:
|
|
||||||
const: keyvalue
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- bank_id
|
|
||||||
- type
|
|
||||||
- keys
|
|
||||||
type: object
|
|
||||||
- additionalProperties: false
|
|
||||||
properties:
|
|
||||||
bank_id:
|
|
||||||
type: string
|
|
||||||
type:
|
|
||||||
const: keyword
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- bank_id
|
|
||||||
- type
|
|
||||||
type: object
|
|
||||||
- additionalProperties: false
|
|
||||||
properties:
|
|
||||||
bank_id:
|
|
||||||
type: string
|
|
||||||
entities:
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
type:
|
|
||||||
const: graph
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- bank_id
|
|
||||||
- type
|
|
||||||
- entities
|
|
||||||
type: object
|
|
||||||
type: array
|
|
||||||
output_shields:
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
query_generator_config:
|
|
||||||
oneOf:
|
|
||||||
- additionalProperties: false
|
|
||||||
properties:
|
|
||||||
sep:
|
|
||||||
type: string
|
|
||||||
type:
|
|
||||||
const: default
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- type
|
|
||||||
- sep
|
|
||||||
type: object
|
|
||||||
- additionalProperties: false
|
|
||||||
properties:
|
|
||||||
model:
|
|
||||||
type: string
|
|
||||||
template:
|
|
||||||
type: string
|
|
||||||
type:
|
|
||||||
const: llm
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- type
|
|
||||||
- model
|
|
||||||
- template
|
|
||||||
type: object
|
|
||||||
- additionalProperties: false
|
|
||||||
properties:
|
|
||||||
type:
|
|
||||||
const: custom
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- type
|
|
||||||
type: object
|
|
||||||
type:
|
|
||||||
const: memory
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- type
|
|
||||||
- memory_bank_configs
|
|
||||||
- query_generator_config
|
|
||||||
- max_tokens_in_context
|
|
||||||
- max_chunks
|
|
||||||
type: object
|
|
||||||
type: array
|
type: array
|
||||||
required:
|
required:
|
||||||
|
- max_infer_iters
|
||||||
- model
|
- model
|
||||||
- instructions
|
- instructions
|
||||||
|
- enable_session_persistence
|
||||||
type: object
|
type: object
|
||||||
AgentCreateResponse:
|
AgentCreateResponse:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
|
@ -1182,6 +1072,124 @@ components:
|
||||||
- memory_bank_ids
|
- memory_bank_ids
|
||||||
- inserted_context
|
- inserted_context
|
||||||
type: object
|
type: object
|
||||||
|
MemoryToolDefinition:
|
||||||
|
additionalProperties: false
|
||||||
|
properties:
|
||||||
|
input_shields:
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
max_chunks:
|
||||||
|
type: integer
|
||||||
|
max_tokens_in_context:
|
||||||
|
type: integer
|
||||||
|
memory_bank_configs:
|
||||||
|
items:
|
||||||
|
oneOf:
|
||||||
|
- additionalProperties: false
|
||||||
|
properties:
|
||||||
|
bank_id:
|
||||||
|
type: string
|
||||||
|
type:
|
||||||
|
const: vector
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- bank_id
|
||||||
|
- type
|
||||||
|
type: object
|
||||||
|
- additionalProperties: false
|
||||||
|
properties:
|
||||||
|
bank_id:
|
||||||
|
type: string
|
||||||
|
keys:
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
type:
|
||||||
|
const: keyvalue
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- bank_id
|
||||||
|
- type
|
||||||
|
- keys
|
||||||
|
type: object
|
||||||
|
- additionalProperties: false
|
||||||
|
properties:
|
||||||
|
bank_id:
|
||||||
|
type: string
|
||||||
|
type:
|
||||||
|
const: keyword
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- bank_id
|
||||||
|
- type
|
||||||
|
type: object
|
||||||
|
- additionalProperties: false
|
||||||
|
properties:
|
||||||
|
bank_id:
|
||||||
|
type: string
|
||||||
|
entities:
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
type:
|
||||||
|
const: graph
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- bank_id
|
||||||
|
- type
|
||||||
|
- entities
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
output_shields:
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
query_generator_config:
|
||||||
|
oneOf:
|
||||||
|
- additionalProperties: false
|
||||||
|
properties:
|
||||||
|
sep:
|
||||||
|
type: string
|
||||||
|
type:
|
||||||
|
const: default
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
- sep
|
||||||
|
type: object
|
||||||
|
- additionalProperties: false
|
||||||
|
properties:
|
||||||
|
model:
|
||||||
|
type: string
|
||||||
|
template:
|
||||||
|
type: string
|
||||||
|
type:
|
||||||
|
const: llm
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
- model
|
||||||
|
- template
|
||||||
|
type: object
|
||||||
|
- additionalProperties: false
|
||||||
|
properties:
|
||||||
|
type:
|
||||||
|
const: custom
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
type: object
|
||||||
|
type:
|
||||||
|
const: memory
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
- memory_bank_configs
|
||||||
|
- query_generator_config
|
||||||
|
- max_tokens_in_context
|
||||||
|
- max_chunks
|
||||||
|
type: object
|
||||||
MetricEvent:
|
MetricEvent:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
properties:
|
properties:
|
||||||
|
@ -2341,7 +2349,7 @@ info:
|
||||||
description: "This is the specification of the llama stack that provides\n \
|
description: "This is the specification of the llama stack that provides\n \
|
||||||
\ a set of endpoints and their corresponding interfaces that are tailored\
|
\ a set of endpoints and their corresponding interfaces that are tailored\
|
||||||
\ to\n best leverage Llama Models. The specification is still in\
|
\ to\n best leverage Llama Models. The specification is still in\
|
||||||
\ draft and subject to change.\n Generated at 2024-09-20 14:53:17.090953"
|
\ draft and subject to change.\n Generated at 2024-09-23 01:08:55.758597"
|
||||||
title: '[DRAFT] Llama Stack Specification'
|
title: '[DRAFT] Llama Stack Specification'
|
||||||
version: 0.0.1
|
version: 0.0.1
|
||||||
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
|
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
|
||||||
|
@ -2944,7 +2952,32 @@ paths:
|
||||||
description: OK
|
description: OK
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
/memory_bank/documents/delete:
|
/memory/create:
|
||||||
|
post:
|
||||||
|
parameters:
|
||||||
|
- description: JSON-encoded provider data which will be made available to the
|
||||||
|
adapter servicing the API
|
||||||
|
in: header
|
||||||
|
name: X-LlamaStack-ProviderData
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/CreateMemoryBankRequest'
|
||||||
|
required: true
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/MemoryBank'
|
||||||
|
description: OK
|
||||||
|
tags:
|
||||||
|
- Memory
|
||||||
|
/memory/documents/delete:
|
||||||
post:
|
post:
|
||||||
parameters:
|
parameters:
|
||||||
- description: JSON-encoded provider data which will be made available to the
|
- description: JSON-encoded provider data which will be made available to the
|
||||||
|
@ -2965,7 +2998,7 @@ paths:
|
||||||
description: OK
|
description: OK
|
||||||
tags:
|
tags:
|
||||||
- Memory
|
- Memory
|
||||||
/memory_bank/documents/get:
|
/memory/documents/get:
|
||||||
post:
|
post:
|
||||||
parameters:
|
parameters:
|
||||||
- in: query
|
- in: query
|
||||||
|
@ -2995,99 +3028,7 @@ paths:
|
||||||
description: OK
|
description: OK
|
||||||
tags:
|
tags:
|
||||||
- Memory
|
- Memory
|
||||||
/memory_bank/insert:
|
/memory/drop:
|
||||||
post:
|
|
||||||
parameters:
|
|
||||||
- description: JSON-encoded provider data which will be made available to the
|
|
||||||
adapter servicing the API
|
|
||||||
in: header
|
|
||||||
name: X-LlamaStack-ProviderData
|
|
||||||
required: false
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/InsertDocumentsRequest'
|
|
||||||
required: true
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: OK
|
|
||||||
tags:
|
|
||||||
- Memory
|
|
||||||
/memory_bank/query:
|
|
||||||
post:
|
|
||||||
parameters:
|
|
||||||
- description: JSON-encoded provider data which will be made available to the
|
|
||||||
adapter servicing the API
|
|
||||||
in: header
|
|
||||||
name: X-LlamaStack-ProviderData
|
|
||||||
required: false
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/QueryDocumentsRequest'
|
|
||||||
required: true
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/QueryDocumentsResponse'
|
|
||||||
description: OK
|
|
||||||
tags:
|
|
||||||
- Memory
|
|
||||||
/memory_bank/update:
|
|
||||||
post:
|
|
||||||
parameters:
|
|
||||||
- description: JSON-encoded provider data which will be made available to the
|
|
||||||
adapter servicing the API
|
|
||||||
in: header
|
|
||||||
name: X-LlamaStack-ProviderData
|
|
||||||
required: false
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/UpdateDocumentsRequest'
|
|
||||||
required: true
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: OK
|
|
||||||
tags:
|
|
||||||
- Memory
|
|
||||||
/memory_banks/create:
|
|
||||||
post:
|
|
||||||
parameters:
|
|
||||||
- description: JSON-encoded provider data which will be made available to the
|
|
||||||
adapter servicing the API
|
|
||||||
in: header
|
|
||||||
name: X-LlamaStack-ProviderData
|
|
||||||
required: false
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/CreateMemoryBankRequest'
|
|
||||||
required: true
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/MemoryBank'
|
|
||||||
description: OK
|
|
||||||
tags:
|
|
||||||
- Memory
|
|
||||||
/memory_banks/drop:
|
|
||||||
post:
|
post:
|
||||||
parameters:
|
parameters:
|
||||||
- description: JSON-encoded provider data which will be made available to the
|
- description: JSON-encoded provider data which will be made available to the
|
||||||
|
@ -3112,7 +3053,7 @@ paths:
|
||||||
description: OK
|
description: OK
|
||||||
tags:
|
tags:
|
||||||
- Memory
|
- Memory
|
||||||
/memory_banks/get:
|
/memory/get:
|
||||||
get:
|
get:
|
||||||
parameters:
|
parameters:
|
||||||
- in: query
|
- in: query
|
||||||
|
@ -3138,7 +3079,28 @@ paths:
|
||||||
description: OK
|
description: OK
|
||||||
tags:
|
tags:
|
||||||
- Memory
|
- Memory
|
||||||
/memory_banks/list:
|
/memory/insert:
|
||||||
|
post:
|
||||||
|
parameters:
|
||||||
|
- description: JSON-encoded provider data which will be made available to the
|
||||||
|
adapter servicing the API
|
||||||
|
in: header
|
||||||
|
name: X-LlamaStack-ProviderData
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/InsertDocumentsRequest'
|
||||||
|
required: true
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: OK
|
||||||
|
tags:
|
||||||
|
- Memory
|
||||||
|
/memory/list:
|
||||||
get:
|
get:
|
||||||
parameters:
|
parameters:
|
||||||
- description: JSON-encoded provider data which will be made available to the
|
- description: JSON-encoded provider data which will be made available to the
|
||||||
|
@ -3157,6 +3119,52 @@ paths:
|
||||||
description: OK
|
description: OK
|
||||||
tags:
|
tags:
|
||||||
- Memory
|
- Memory
|
||||||
|
/memory/query:
|
||||||
|
post:
|
||||||
|
parameters:
|
||||||
|
- description: JSON-encoded provider data which will be made available to the
|
||||||
|
adapter servicing the API
|
||||||
|
in: header
|
||||||
|
name: X-LlamaStack-ProviderData
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/QueryDocumentsRequest'
|
||||||
|
required: true
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/QueryDocumentsResponse'
|
||||||
|
description: OK
|
||||||
|
tags:
|
||||||
|
- Memory
|
||||||
|
/memory/update:
|
||||||
|
post:
|
||||||
|
parameters:
|
||||||
|
- description: JSON-encoded provider data which will be made available to the
|
||||||
|
adapter servicing the API
|
||||||
|
in: header
|
||||||
|
name: X-LlamaStack-ProviderData
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/UpdateDocumentsRequest'
|
||||||
|
required: true
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: OK
|
||||||
|
tags:
|
||||||
|
- Memory
|
||||||
/post_training/job/artifacts:
|
/post_training/job/artifacts:
|
||||||
get:
|
get:
|
||||||
parameters:
|
parameters:
|
||||||
|
@ -3444,17 +3452,17 @@ security:
|
||||||
servers:
|
servers:
|
||||||
- url: http://any-hosted-llama-stack.com
|
- url: http://any-hosted-llama-stack.com
|
||||||
tags:
|
tags:
|
||||||
- name: Agents
|
|
||||||
- name: RewardScoring
|
|
||||||
- name: Evaluations
|
|
||||||
- name: Safety
|
|
||||||
- name: Telemetry
|
|
||||||
- name: PostTraining
|
- name: PostTraining
|
||||||
- name: Datasets
|
- name: Safety
|
||||||
- name: Inference
|
|
||||||
- name: SyntheticDataGeneration
|
- name: SyntheticDataGeneration
|
||||||
|
- name: Datasets
|
||||||
|
- name: Telemetry
|
||||||
|
- name: Evaluations
|
||||||
|
- name: RewardScoring
|
||||||
|
- name: Agents
|
||||||
- name: Memory
|
- name: Memory
|
||||||
- name: BatchInference
|
- name: BatchInference
|
||||||
|
- name: Inference
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" />
|
- description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" />
|
||||||
name: BuiltinTool
|
name: BuiltinTool
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"
|
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"
|
||||||
|
@ -3564,6 +3572,9 @@ tags:
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/FunctionCallToolDefinition"
|
- description: <SchemaDefinition schemaRef="#/components/schemas/FunctionCallToolDefinition"
|
||||||
/>
|
/>
|
||||||
name: FunctionCallToolDefinition
|
name: FunctionCallToolDefinition
|
||||||
|
- description: <SchemaDefinition schemaRef="#/components/schemas/MemoryToolDefinition"
|
||||||
|
/>
|
||||||
|
name: MemoryToolDefinition
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/PhotogenToolDefinition"
|
- description: <SchemaDefinition schemaRef="#/components/schemas/PhotogenToolDefinition"
|
||||||
/>
|
/>
|
||||||
name: PhotogenToolDefinition
|
name: PhotogenToolDefinition
|
||||||
|
@ -3922,6 +3933,7 @@ x-tagGroups:
|
||||||
- MemoryBank
|
- MemoryBank
|
||||||
- MemoryBankDocument
|
- MemoryBankDocument
|
||||||
- MemoryRetrievalStep
|
- MemoryRetrievalStep
|
||||||
|
- MemoryToolDefinition
|
||||||
- MetricEvent
|
- MetricEvent
|
||||||
- OptimizerConfig
|
- OptimizerConfig
|
||||||
- PhotogenToolDefinition
|
- PhotogenToolDefinition
|
||||||
|
|
|
@ -38,7 +38,7 @@ class MemoryClient(Memory):
|
||||||
async def get_memory_bank(self, bank_id: str) -> Optional[MemoryBank]:
|
async def get_memory_bank(self, bank_id: str) -> Optional[MemoryBank]:
|
||||||
async with httpx.AsyncClient() as client:
|
async with httpx.AsyncClient() as client:
|
||||||
r = await client.get(
|
r = await client.get(
|
||||||
f"{self.base_url}/memory_banks/get",
|
f"{self.base_url}/memory/get",
|
||||||
params={
|
params={
|
||||||
"bank_id": bank_id,
|
"bank_id": bank_id,
|
||||||
},
|
},
|
||||||
|
@ -59,7 +59,7 @@ class MemoryClient(Memory):
|
||||||
) -> MemoryBank:
|
) -> MemoryBank:
|
||||||
async with httpx.AsyncClient() as client:
|
async with httpx.AsyncClient() as client:
|
||||||
r = await client.post(
|
r = await client.post(
|
||||||
f"{self.base_url}/memory_banks/create",
|
f"{self.base_url}/memory/create",
|
||||||
json={
|
json={
|
||||||
"name": name,
|
"name": name,
|
||||||
"config": config.dict(),
|
"config": config.dict(),
|
||||||
|
@ -81,7 +81,7 @@ class MemoryClient(Memory):
|
||||||
) -> None:
|
) -> None:
|
||||||
async with httpx.AsyncClient() as client:
|
async with httpx.AsyncClient() as client:
|
||||||
r = await client.post(
|
r = await client.post(
|
||||||
f"{self.base_url}/memory_bank/insert",
|
f"{self.base_url}/memory/insert",
|
||||||
json={
|
json={
|
||||||
"bank_id": bank_id,
|
"bank_id": bank_id,
|
||||||
"documents": [d.dict() for d in documents],
|
"documents": [d.dict() for d in documents],
|
||||||
|
@ -99,7 +99,7 @@ class MemoryClient(Memory):
|
||||||
) -> QueryDocumentsResponse:
|
) -> QueryDocumentsResponse:
|
||||||
async with httpx.AsyncClient() as client:
|
async with httpx.AsyncClient() as client:
|
||||||
r = await client.post(
|
r = await client.post(
|
||||||
f"{self.base_url}/memory_bank/query",
|
f"{self.base_url}/memory/query",
|
||||||
json={
|
json={
|
||||||
"bank_id": bank_id,
|
"bank_id": bank_id,
|
||||||
"query": query,
|
"query": query,
|
||||||
|
|
|
@ -96,7 +96,7 @@ class MemoryBank(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
class Memory(Protocol):
|
class Memory(Protocol):
|
||||||
@webmethod(route="/memory_banks/create")
|
@webmethod(route="/memory/create")
|
||||||
async def create_memory_bank(
|
async def create_memory_bank(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
|
@ -104,13 +104,13 @@ class Memory(Protocol):
|
||||||
url: Optional[URL] = None,
|
url: Optional[URL] = None,
|
||||||
) -> MemoryBank: ...
|
) -> MemoryBank: ...
|
||||||
|
|
||||||
@webmethod(route="/memory_banks/list", method="GET")
|
@webmethod(route="/memory/list", method="GET")
|
||||||
async def list_memory_banks(self) -> List[MemoryBank]: ...
|
async def list_memory_banks(self) -> List[MemoryBank]: ...
|
||||||
|
|
||||||
@webmethod(route="/memory_banks/get", method="GET")
|
@webmethod(route="/memory/get", method="GET")
|
||||||
async def get_memory_bank(self, bank_id: str) -> Optional[MemoryBank]: ...
|
async def get_memory_bank(self, bank_id: str) -> Optional[MemoryBank]: ...
|
||||||
|
|
||||||
@webmethod(route="/memory_banks/drop", method="DELETE")
|
@webmethod(route="/memory/drop", method="DELETE")
|
||||||
async def drop_memory_bank(
|
async def drop_memory_bank(
|
||||||
self,
|
self,
|
||||||
bank_id: str,
|
bank_id: str,
|
||||||
|
@ -118,7 +118,7 @@ class Memory(Protocol):
|
||||||
|
|
||||||
# this will just block now until documents are inserted, but it should
|
# this will just block now until documents are inserted, but it should
|
||||||
# probably return a Job instance which can be polled for completion
|
# probably return a Job instance which can be polled for completion
|
||||||
@webmethod(route="/memory_bank/insert")
|
@webmethod(route="/memory/insert")
|
||||||
async def insert_documents(
|
async def insert_documents(
|
||||||
self,
|
self,
|
||||||
bank_id: str,
|
bank_id: str,
|
||||||
|
@ -126,14 +126,14 @@ class Memory(Protocol):
|
||||||
ttl_seconds: Optional[int] = None,
|
ttl_seconds: Optional[int] = None,
|
||||||
) -> None: ...
|
) -> None: ...
|
||||||
|
|
||||||
@webmethod(route="/memory_bank/update")
|
@webmethod(route="/memory/update")
|
||||||
async def update_documents(
|
async def update_documents(
|
||||||
self,
|
self,
|
||||||
bank_id: str,
|
bank_id: str,
|
||||||
documents: List[MemoryBankDocument],
|
documents: List[MemoryBankDocument],
|
||||||
) -> None: ...
|
) -> None: ...
|
||||||
|
|
||||||
@webmethod(route="/memory_bank/query")
|
@webmethod(route="/memory/query")
|
||||||
async def query_documents(
|
async def query_documents(
|
||||||
self,
|
self,
|
||||||
bank_id: str,
|
bank_id: str,
|
||||||
|
@ -141,14 +141,14 @@ class Memory(Protocol):
|
||||||
params: Optional[Dict[str, Any]] = None,
|
params: Optional[Dict[str, Any]] = None,
|
||||||
) -> QueryDocumentsResponse: ...
|
) -> QueryDocumentsResponse: ...
|
||||||
|
|
||||||
@webmethod(route="/memory_bank/documents/get", method="GET")
|
@webmethod(route="/memory/documents/get", method="GET")
|
||||||
async def get_documents(
|
async def get_documents(
|
||||||
self,
|
self,
|
||||||
bank_id: str,
|
bank_id: str,
|
||||||
document_ids: List[str],
|
document_ids: List[str],
|
||||||
) -> List[MemoryBankDocument]: ...
|
) -> List[MemoryBankDocument]: ...
|
||||||
|
|
||||||
@webmethod(route="/memory_bank/documents/delete", method="DELETE")
|
@webmethod(route="/memory/documents/delete", method="DELETE")
|
||||||
async def delete_documents(
|
async def delete_documents(
|
||||||
self,
|
self,
|
||||||
bank_id: str,
|
bank_id: str,
|
||||||
|
|
|
@ -7,11 +7,11 @@
|
||||||
from typing import List, Optional, Protocol
|
from typing import List, Optional, Protocol
|
||||||
|
|
||||||
from llama_models.schema_utils import json_schema_type, webmethod
|
from llama_models.schema_utils import json_schema_type, webmethod
|
||||||
from pydantic import BaseModel, Field
|
|
||||||
|
|
||||||
from llama_stack.apis.memory import MemoryBankType
|
from llama_stack.apis.memory import MemoryBankType
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import GenericProviderConfig
|
from llama_stack.distribution.datatypes import GenericProviderConfig
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
|
|
|
@ -160,7 +160,11 @@ class StackBuild(Subcommand):
|
||||||
|
|
||||||
def _run_stack_build_command(self, args: argparse.Namespace) -> None:
|
def _run_stack_build_command(self, args: argparse.Namespace) -> None:
|
||||||
import yaml
|
import yaml
|
||||||
from llama_stack.distribution.distribution import Api, api_providers
|
from llama_stack.distribution.distribution import (
|
||||||
|
Api,
|
||||||
|
api_providers,
|
||||||
|
builtin_automatically_routed_apis,
|
||||||
|
)
|
||||||
from llama_stack.distribution.utils.dynamic import instantiate_class_type
|
from llama_stack.distribution.utils.dynamic import instantiate_class_type
|
||||||
from prompt_toolkit import prompt
|
from prompt_toolkit import prompt
|
||||||
from prompt_toolkit.validation import Validator
|
from prompt_toolkit.validation import Validator
|
||||||
|
@ -213,8 +217,15 @@ class StackBuild(Subcommand):
|
||||||
)
|
)
|
||||||
|
|
||||||
providers = dict()
|
providers = dict()
|
||||||
|
all_providers = api_providers()
|
||||||
|
routing_table_apis = set(
|
||||||
|
x.routing_table_api for x in builtin_automatically_routed_apis()
|
||||||
|
)
|
||||||
|
|
||||||
for api in Api:
|
for api in Api:
|
||||||
all_providers = api_providers()
|
if api in routing_table_apis:
|
||||||
|
continue
|
||||||
|
|
||||||
providers_for_api = all_providers[api]
|
providers_for_api = all_providers[api]
|
||||||
|
|
||||||
api_provider = prompt(
|
api_provider = prompt(
|
||||||
|
|
|
@ -145,7 +145,7 @@ class StackConfigure(Subcommand):
|
||||||
built_at=datetime.now(),
|
built_at=datetime.now(),
|
||||||
image_name=image_name,
|
image_name=image_name,
|
||||||
apis_to_serve=[],
|
apis_to_serve=[],
|
||||||
provider_map={},
|
api_providers={},
|
||||||
)
|
)
|
||||||
|
|
||||||
config = configure_api_providers(config, build_config.distribution_spec)
|
config = configure_api_providers(config, build_config.distribution_spec)
|
||||||
|
|
|
@ -9,12 +9,21 @@ from typing import Any
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import * # noqa: F403
|
from llama_stack.distribution.datatypes import * # noqa: F403
|
||||||
from termcolor import cprint
|
from llama_stack.apis.memory.memory import MemoryBankType
|
||||||
|
from llama_stack.distribution.distribution import (
|
||||||
from llama_stack.distribution.distribution import api_providers, stack_apis
|
api_providers,
|
||||||
|
builtin_automatically_routed_apis,
|
||||||
|
stack_apis,
|
||||||
|
)
|
||||||
from llama_stack.distribution.utils.dynamic import instantiate_class_type
|
from llama_stack.distribution.utils.dynamic import instantiate_class_type
|
||||||
|
|
||||||
from llama_stack.distribution.utils.prompt_for_config import prompt_for_config
|
from llama_stack.distribution.utils.prompt_for_config import prompt_for_config
|
||||||
|
from llama_stack.providers.impls.meta_reference.safety.config import (
|
||||||
|
MetaReferenceShieldType,
|
||||||
|
)
|
||||||
|
from prompt_toolkit import prompt
|
||||||
|
from prompt_toolkit.validation import Validator
|
||||||
|
from termcolor import cprint
|
||||||
|
|
||||||
|
|
||||||
def make_routing_entry_type(config_class: Any):
|
def make_routing_entry_type(config_class: Any):
|
||||||
|
@ -25,71 +34,139 @@ def make_routing_entry_type(config_class: Any):
|
||||||
return BaseModelWithConfig
|
return BaseModelWithConfig
|
||||||
|
|
||||||
|
|
||||||
|
def get_builtin_apis(provider_backed_apis: List[str]) -> List[str]:
|
||||||
|
"""Get corresponding builtin APIs given provider backed APIs"""
|
||||||
|
res = []
|
||||||
|
for inf in builtin_automatically_routed_apis():
|
||||||
|
if inf.router_api.value in provider_backed_apis:
|
||||||
|
res.append(inf.routing_table_api.value)
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
# TODO: make sure we can deal with existing configuration values correctly
|
# TODO: make sure we can deal with existing configuration values correctly
|
||||||
# instead of just overwriting them
|
# instead of just overwriting them
|
||||||
def configure_api_providers(
|
def configure_api_providers(
|
||||||
config: StackRunConfig, spec: DistributionSpec
|
config: StackRunConfig, spec: DistributionSpec
|
||||||
) -> StackRunConfig:
|
) -> StackRunConfig:
|
||||||
apis = config.apis_to_serve or list(spec.providers.keys())
|
apis = config.apis_to_serve or list(spec.providers.keys())
|
||||||
config.apis_to_serve = [a for a in apis if a != "telemetry"]
|
# append the bulitin routing APIs
|
||||||
|
apis += get_builtin_apis(apis)
|
||||||
|
|
||||||
|
router_api2builtin_api = {
|
||||||
|
inf.router_api.value: inf.routing_table_api.value
|
||||||
|
for inf in builtin_automatically_routed_apis()
|
||||||
|
}
|
||||||
|
|
||||||
|
config.apis_to_serve = list(set([a for a in apis if a != "telemetry"]))
|
||||||
|
|
||||||
apis = [v.value for v in stack_apis()]
|
apis = [v.value for v in stack_apis()]
|
||||||
all_providers = api_providers()
|
all_providers = api_providers()
|
||||||
|
|
||||||
|
# configure simple case for with non-routing providers to api_providers
|
||||||
for api_str in spec.providers.keys():
|
for api_str in spec.providers.keys():
|
||||||
if api_str not in apis:
|
if api_str not in apis:
|
||||||
raise ValueError(f"Unknown API `{api_str}`")
|
raise ValueError(f"Unknown API `{api_str}`")
|
||||||
|
|
||||||
cprint(f"Configuring API `{api_str}`...\n", "white", attrs=["bold"])
|
cprint(f"Configuring API `{api_str}`...", "green", attrs=["bold"])
|
||||||
api = Api(api_str)
|
api = Api(api_str)
|
||||||
|
|
||||||
provider_or_providers = spec.providers[api_str]
|
p = spec.providers[api_str]
|
||||||
if isinstance(provider_or_providers, list) and len(provider_or_providers) > 1:
|
cprint(f"=== Configuring provider `{p}` for API {api_str}...", "green")
|
||||||
print(
|
|
||||||
"You have specified multiple providers for this API. We will configure a routing table now. For each provider, provide a routing key followed by provider configuration.\n"
|
if isinstance(p, list):
|
||||||
|
cprint(
|
||||||
|
f"[WARN] Interactive configuration of multiple providers {p} is not supported, configuring {p[0]} only, please manually configure {p[1:]} in routing_table of run.yaml",
|
||||||
|
"yellow",
|
||||||
)
|
)
|
||||||
|
p = p[0]
|
||||||
|
|
||||||
|
provider_spec = all_providers[api][p]
|
||||||
|
config_type = instantiate_class_type(provider_spec.config_class)
|
||||||
|
try:
|
||||||
|
provider_config = config.api_providers.get(api_str)
|
||||||
|
if provider_config:
|
||||||
|
existing = config_type(**provider_config.config)
|
||||||
|
else:
|
||||||
|
existing = None
|
||||||
|
except Exception:
|
||||||
|
existing = None
|
||||||
|
cfg = prompt_for_config(config_type, existing)
|
||||||
|
|
||||||
|
if api_str in router_api2builtin_api:
|
||||||
|
# a routing api, we need to infer and assign it a routing_key and put it in the routing_table
|
||||||
|
routing_key = "<PLEASE_FILL_ROUTING_KEY>"
|
||||||
routing_entries = []
|
routing_entries = []
|
||||||
for p in provider_or_providers:
|
if api_str == "inference":
|
||||||
print(f"Configuring provider `{p}`...")
|
if hasattr(cfg, "model"):
|
||||||
provider_spec = all_providers[api][p]
|
routing_key = cfg.model
|
||||||
config_type = instantiate_class_type(provider_spec.config_class)
|
else:
|
||||||
|
routing_key = prompt(
|
||||||
# TODO: we need to validate the routing keys, and
|
"> Please enter the supported model your provider has for inference: ",
|
||||||
# perhaps it is better if we break this out into asking
|
default="Meta-Llama3.1-8B-Instruct",
|
||||||
# for a routing key separately from the associated config
|
)
|
||||||
wrapper_type = make_routing_entry_type(config_type)
|
|
||||||
rt_entry = prompt_for_config(wrapper_type, None)
|
|
||||||
|
|
||||||
routing_entries.append(
|
routing_entries.append(
|
||||||
ProviderRoutingEntry(
|
RoutableProviderConfig(
|
||||||
|
routing_key=routing_key,
|
||||||
provider_id=p,
|
provider_id=p,
|
||||||
routing_key=rt_entry.routing_key,
|
config=cfg.dict(),
|
||||||
config=rt_entry.config.dict(),
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
config.provider_map[api_str] = routing_entries
|
|
||||||
else:
|
if api_str == "safety":
|
||||||
p = (
|
# TODO: add support for other safety providers, and simplify safety provider config
|
||||||
provider_or_providers[0]
|
if p == "meta-reference":
|
||||||
if isinstance(provider_or_providers, list)
|
for shield_type in MetaReferenceShieldType:
|
||||||
else provider_or_providers
|
routing_entries.append(
|
||||||
)
|
RoutableProviderConfig(
|
||||||
print(f"Configuring provider `{p}`...")
|
routing_key=shield_type.value,
|
||||||
provider_spec = all_providers[api][p]
|
provider_id=p,
|
||||||
config_type = instantiate_class_type(provider_spec.config_class)
|
config=cfg.dict(),
|
||||||
try:
|
)
|
||||||
provider_config = config.provider_map.get(api_str)
|
)
|
||||||
if provider_config:
|
|
||||||
existing = config_type(**provider_config.config)
|
|
||||||
else:
|
else:
|
||||||
existing = None
|
cprint(
|
||||||
except Exception:
|
f"[WARN] Interactive configuration of safety provider {p} is not supported, please manually configure safety shields types in routing_table of run.yaml",
|
||||||
existing = None
|
"yellow",
|
||||||
cfg = prompt_for_config(config_type, existing)
|
)
|
||||||
config.provider_map[api_str] = GenericProviderConfig(
|
routing_entries.append(
|
||||||
|
RoutableProviderConfig(
|
||||||
|
routing_key=routing_key,
|
||||||
|
provider_id=p,
|
||||||
|
config=cfg.dict(),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if api_str == "memory":
|
||||||
|
bank_types = list([x.value for x in MemoryBankType])
|
||||||
|
routing_key = prompt(
|
||||||
|
"> Please enter the supported memory bank type your provider has for memory: ",
|
||||||
|
default="vector",
|
||||||
|
validator=Validator.from_callable(
|
||||||
|
lambda x: x in bank_types,
|
||||||
|
error_message="Invalid provider, please enter one of the following: {}".format(
|
||||||
|
bank_types
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
routing_entries.append(
|
||||||
|
RoutableProviderConfig(
|
||||||
|
routing_key=routing_key,
|
||||||
|
provider_id=p,
|
||||||
|
config=cfg.dict(),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
config.routing_table[api_str] = routing_entries
|
||||||
|
config.api_providers[api_str] = PlaceholderProviderConfig(
|
||||||
|
providers=p if isinstance(p, list) else [p]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
config.api_providers[api_str] = GenericProviderConfig(
|
||||||
provider_id=p,
|
provider_id=p,
|
||||||
config=cfg.dict(),
|
config=cfg.dict(),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
print("")
|
||||||
|
|
||||||
return config
|
return config
|
||||||
|
|
|
@ -59,17 +59,16 @@ class GenericProviderConfig(BaseModel):
|
||||||
config: Dict[str, Any]
|
config: Dict[str, Any]
|
||||||
|
|
||||||
|
|
||||||
|
class PlaceholderProviderConfig(BaseModel):
|
||||||
|
"""Placeholder provider config for API whose provider are defined in routing_table"""
|
||||||
|
|
||||||
|
providers: List[str]
|
||||||
|
|
||||||
|
|
||||||
class RoutableProviderConfig(GenericProviderConfig):
|
class RoutableProviderConfig(GenericProviderConfig):
|
||||||
routing_key: str
|
routing_key: str
|
||||||
|
|
||||||
|
|
||||||
class RoutingTableConfig(BaseModel):
|
|
||||||
entries: List[RoutableProviderConfig] = Field(...)
|
|
||||||
keys: Optional[List[str]] = Field(
|
|
||||||
default=None,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# Example: /inference, /safety
|
# Example: /inference, /safety
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class AutoRoutedProviderSpec(ProviderSpec):
|
class AutoRoutedProviderSpec(ProviderSpec):
|
||||||
|
@ -270,12 +269,14 @@ this could be just a hash
|
||||||
The list of APIs to serve. If not specified, all APIs specified in the provider_map will be served""",
|
The list of APIs to serve. If not specified, all APIs specified in the provider_map will be served""",
|
||||||
)
|
)
|
||||||
|
|
||||||
api_providers: Dict[str, GenericProviderConfig] = Field(
|
api_providers: Dict[
|
||||||
|
str, Union[GenericProviderConfig, PlaceholderProviderConfig]
|
||||||
|
] = Field(
|
||||||
description="""
|
description="""
|
||||||
Provider configurations for each of the APIs provided by this package.
|
Provider configurations for each of the APIs provided by this package.
|
||||||
""",
|
""",
|
||||||
)
|
)
|
||||||
routing_tables: Dict[str, RoutingTableConfig] = Field(
|
routing_table: Dict[str, List[RoutableProviderConfig]] = Field(
|
||||||
default_factory=dict,
|
default_factory=dict,
|
||||||
description="""
|
description="""
|
||||||
|
|
||||||
|
|
|
@ -8,8 +8,6 @@ import importlib
|
||||||
import inspect
|
import inspect
|
||||||
from typing import Dict, List
|
from typing import Dict, List
|
||||||
|
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
from llama_stack.apis.agents import Agents
|
from llama_stack.apis.agents import Agents
|
||||||
from llama_stack.apis.inference import Inference
|
from llama_stack.apis.inference import Inference
|
||||||
from llama_stack.apis.memory import Memory
|
from llama_stack.apis.memory import Memory
|
||||||
|
@ -19,6 +17,8 @@ from llama_stack.apis.safety import Safety
|
||||||
from llama_stack.apis.shields import Shields
|
from llama_stack.apis.shields import Shields
|
||||||
from llama_stack.apis.telemetry import Telemetry
|
from llama_stack.apis.telemetry import Telemetry
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from .datatypes import Api, ApiEndpoint, ProviderSpec, remote_provider_spec
|
from .datatypes import Api, ApiEndpoint, ProviderSpec, remote_provider_spec
|
||||||
|
|
||||||
# These are the dependencies needed by the distribution server.
|
# These are the dependencies needed by the distribution server.
|
||||||
|
|
|
@ -12,7 +12,7 @@ from llama_stack.distribution.datatypes import * # noqa: F403
|
||||||
async def get_routing_table_impl(
|
async def get_routing_table_impl(
|
||||||
api: Api,
|
api: Api,
|
||||||
inner_impls: List[Tuple[str, Any]],
|
inner_impls: List[Tuple[str, Any]],
|
||||||
routing_table_config: RoutingTableConfig,
|
routing_table_config: Dict[str, List[RoutableProviderConfig]],
|
||||||
_deps,
|
_deps,
|
||||||
) -> Any:
|
) -> Any:
|
||||||
from .routing_tables import (
|
from .routing_tables import (
|
||||||
|
|
|
@ -46,9 +46,9 @@ class MemoryRouter(Memory):
|
||||||
url: Optional[URL] = None,
|
url: Optional[URL] = None,
|
||||||
) -> MemoryBank:
|
) -> MemoryBank:
|
||||||
bank_type = config.type
|
bank_type = config.type
|
||||||
provider = await self.routing_table.get_provider_impl(
|
bank = await self.routing_table.get_provider_impl(bank_type).create_memory_bank(
|
||||||
bank_type
|
name, config, url
|
||||||
).create_memory_bank(name, config, url)
|
)
|
||||||
self.bank_id_to_type[bank.bank_id] = bank_type
|
self.bank_id_to_type[bank.bank_id] = bank_type
|
||||||
return bank
|
return bank
|
||||||
|
|
||||||
|
@ -162,6 +162,7 @@ class SafetyRouter(Safety):
|
||||||
messages: List[Message],
|
messages: List[Message],
|
||||||
params: Dict[str, Any] = None,
|
params: Dict[str, Any] = None,
|
||||||
) -> RunShieldResponse:
|
) -> RunShieldResponse:
|
||||||
|
print(f"Running shield {shield_type}")
|
||||||
return await self.routing_table.get_provider_impl(shield_type).run_shield(
|
return await self.routing_table.get_provider_impl(shield_type).run_shield(
|
||||||
shield_type=shield_type,
|
shield_type=shield_type,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
|
|
@ -20,7 +20,7 @@ class CommonRoutingTableImpl(RoutingTable):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
inner_impls: List[Tuple[str, Any]],
|
inner_impls: List[Tuple[str, Any]],
|
||||||
routing_table_config: RoutingTableConfig,
|
routing_table_config: Dict[str, List[RoutableProviderConfig]],
|
||||||
) -> None:
|
) -> None:
|
||||||
self.providers = {k: v for k, v in inner_impls}
|
self.providers = {k: v for k, v in inner_impls}
|
||||||
self.routing_keys = list(self.providers.keys())
|
self.routing_keys = list(self.providers.keys())
|
||||||
|
@ -40,7 +40,7 @@ class CommonRoutingTableImpl(RoutingTable):
|
||||||
return self.routing_keys
|
return self.routing_keys
|
||||||
|
|
||||||
def get_provider_config(self, routing_key: str) -> Optional[GenericProviderConfig]:
|
def get_provider_config(self, routing_key: str) -> Optional[GenericProviderConfig]:
|
||||||
for entry in self.routing_table_config.entries:
|
for entry in self.routing_table_config:
|
||||||
if entry.routing_key == routing_key:
|
if entry.routing_key == routing_key:
|
||||||
return entry
|
return entry
|
||||||
return None
|
return None
|
||||||
|
@ -50,7 +50,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
|
||||||
|
|
||||||
async def list_models(self) -> List[ModelServingSpec]:
|
async def list_models(self) -> List[ModelServingSpec]:
|
||||||
specs = []
|
specs = []
|
||||||
for entry in self.routing_table_config.entries:
|
for entry in self.routing_table_config:
|
||||||
model_id = entry.routing_key
|
model_id = entry.routing_key
|
||||||
specs.append(
|
specs.append(
|
||||||
ModelServingSpec(
|
ModelServingSpec(
|
||||||
|
@ -61,7 +61,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
|
||||||
return specs
|
return specs
|
||||||
|
|
||||||
async def get_model(self, core_model_id: str) -> Optional[ModelServingSpec]:
|
async def get_model(self, core_model_id: str) -> Optional[ModelServingSpec]:
|
||||||
for entry in self.routing_table_config.entries:
|
for entry in self.routing_table_config:
|
||||||
if entry.routing_key == core_model_id:
|
if entry.routing_key == core_model_id:
|
||||||
return ModelServingSpec(
|
return ModelServingSpec(
|
||||||
llama_model=resolve_model(core_model_id),
|
llama_model=resolve_model(core_model_id),
|
||||||
|
@ -74,7 +74,7 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
|
||||||
|
|
||||||
async def list_shields(self) -> List[ShieldSpec]:
|
async def list_shields(self) -> List[ShieldSpec]:
|
||||||
specs = []
|
specs = []
|
||||||
for entry in self.routing_table_config.entries:
|
for entry in self.routing_table_config:
|
||||||
specs.append(
|
specs.append(
|
||||||
ShieldSpec(
|
ShieldSpec(
|
||||||
shield_type=entry.routing_key,
|
shield_type=entry.routing_key,
|
||||||
|
@ -84,7 +84,7 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
|
||||||
return specs
|
return specs
|
||||||
|
|
||||||
async def get_shield(self, shield_type: str) -> Optional[ShieldSpec]:
|
async def get_shield(self, shield_type: str) -> Optional[ShieldSpec]:
|
||||||
for entry in self.routing_table_config.entries:
|
for entry in self.routing_table_config:
|
||||||
if entry.routing_key == shield_type:
|
if entry.routing_key == shield_type:
|
||||||
return ShieldSpec(
|
return ShieldSpec(
|
||||||
shield_type=entry.routing_key,
|
shield_type=entry.routing_key,
|
||||||
|
@ -97,7 +97,7 @@ class MemoryBanksRoutingTable(CommonRoutingTableImpl, MemoryBanks):
|
||||||
|
|
||||||
async def list_memory_banks(self) -> List[MemoryBankSpec]:
|
async def list_memory_banks(self) -> List[MemoryBankSpec]:
|
||||||
specs = []
|
specs = []
|
||||||
for entry in self.routing_table_config.entries:
|
for entry in self.routing_table_config:
|
||||||
specs.append(
|
specs.append(
|
||||||
MemoryBankSpec(
|
MemoryBankSpec(
|
||||||
bank_type=entry.routing_key,
|
bank_type=entry.routing_key,
|
||||||
|
@ -107,7 +107,7 @@ class MemoryBanksRoutingTable(CommonRoutingTableImpl, MemoryBanks):
|
||||||
return specs
|
return specs
|
||||||
|
|
||||||
async def get_memory_bank(self, bank_type: str) -> Optional[MemoryBankSpec]:
|
async def get_memory_bank(self, bank_type: str) -> Optional[MemoryBankSpec]:
|
||||||
for entry in self.routing_table_config.entries:
|
for entry in self.routing_table_config:
|
||||||
if entry.routing_key == bank_type:
|
if entry.routing_key == bank_type:
|
||||||
return MemoryBankSpec(
|
return MemoryBankSpec(
|
||||||
bank_type=entry.routing_key,
|
bank_type=entry.routing_key,
|
||||||
|
|
|
@ -35,9 +35,6 @@ from fastapi import Body, FastAPI, HTTPException, Request, Response
|
||||||
from fastapi.exceptions import RequestValidationError
|
from fastapi.exceptions import RequestValidationError
|
||||||
from fastapi.responses import JSONResponse, StreamingResponse
|
from fastapi.responses import JSONResponse, StreamingResponse
|
||||||
from fastapi.routing import APIRoute
|
from fastapi.routing import APIRoute
|
||||||
from pydantic import BaseModel, ValidationError
|
|
||||||
from termcolor import cprint
|
|
||||||
from typing_extensions import Annotated
|
|
||||||
|
|
||||||
from llama_stack.providers.utils.telemetry.tracing import (
|
from llama_stack.providers.utils.telemetry.tracing import (
|
||||||
end_trace,
|
end_trace,
|
||||||
|
@ -45,6 +42,9 @@ from llama_stack.providers.utils.telemetry.tracing import (
|
||||||
SpanStatus,
|
SpanStatus,
|
||||||
start_trace,
|
start_trace,
|
||||||
)
|
)
|
||||||
|
from pydantic import BaseModel, ValidationError
|
||||||
|
from termcolor import cprint
|
||||||
|
from typing_extensions import Annotated
|
||||||
from llama_stack.distribution.datatypes import * # noqa: F403
|
from llama_stack.distribution.datatypes import * # noqa: F403
|
||||||
|
|
||||||
from llama_stack.distribution.distribution import (
|
from llama_stack.distribution.distribution import (
|
||||||
|
@ -307,6 +307,10 @@ async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, An
|
||||||
# TODO: check that these APIs are not in the routing table part of the config
|
# TODO: check that these APIs are not in the routing table part of the config
|
||||||
providers = all_providers[api]
|
providers = all_providers[api]
|
||||||
|
|
||||||
|
# skip checks for API whose provider config is specified in routing_table
|
||||||
|
if isinstance(config, PlaceholderProviderConfig):
|
||||||
|
continue
|
||||||
|
|
||||||
if config.provider_id not in providers:
|
if config.provider_id not in providers:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Unknown provider `{config.provider_id}` is not available for API `{api}`"
|
f"Unknown provider `{config.provider_id}` is not available for API `{api}`"
|
||||||
|
@ -315,9 +319,8 @@ async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, An
|
||||||
configs[api] = config
|
configs[api] = config
|
||||||
|
|
||||||
apis_to_serve = run_config.apis_to_serve or set(
|
apis_to_serve = run_config.apis_to_serve or set(
|
||||||
list(specs.keys()) + list(run_config.routing_tables.keys())
|
list(specs.keys()) + list(run_config.routing_table.keys())
|
||||||
)
|
)
|
||||||
print("apis_to_serve", apis_to_serve)
|
|
||||||
for info in builtin_automatically_routed_apis():
|
for info in builtin_automatically_routed_apis():
|
||||||
source_api = info.routing_table_api
|
source_api = info.routing_table_api
|
||||||
|
|
||||||
|
@ -331,15 +334,16 @@ async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, An
|
||||||
if info.router_api.value not in apis_to_serve:
|
if info.router_api.value not in apis_to_serve:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if source_api.value not in run_config.routing_tables:
|
print("router_api", info.router_api)
|
||||||
|
if info.router_api.value not in run_config.routing_table:
|
||||||
raise ValueError(f"Routing table for `{source_api.value}` is not provided?")
|
raise ValueError(f"Routing table for `{source_api.value}` is not provided?")
|
||||||
|
|
||||||
routing_table = run_config.routing_tables[source_api.value]
|
routing_table = run_config.routing_table[info.router_api.value]
|
||||||
|
|
||||||
providers = all_providers[info.router_api]
|
providers = all_providers[info.router_api]
|
||||||
|
|
||||||
inner_specs = []
|
inner_specs = []
|
||||||
for rt_entry in routing_table.entries:
|
for rt_entry in routing_table:
|
||||||
if rt_entry.provider_id not in providers:
|
if rt_entry.provider_id not in providers:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Unknown provider `{rt_entry.provider_id}` is not available for API `{api}`"
|
f"Unknown provider `{rt_entry.provider_id}` is not available for API `{api}`"
|
||||||
|
|
|
@ -8,6 +8,7 @@ import importlib
|
||||||
from typing import Any, Dict
|
from typing import Any, Dict
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import * # noqa: F403
|
from llama_stack.distribution.datatypes import * # noqa: F403
|
||||||
|
from termcolor import cprint
|
||||||
|
|
||||||
|
|
||||||
def instantiate_class_type(fully_qualified_name):
|
def instantiate_class_type(fully_qualified_name):
|
||||||
|
@ -43,12 +44,12 @@ async def instantiate_provider(
|
||||||
elif isinstance(provider_spec, RoutingTableProviderSpec):
|
elif isinstance(provider_spec, RoutingTableProviderSpec):
|
||||||
method = "get_routing_table_impl"
|
method = "get_routing_table_impl"
|
||||||
|
|
||||||
assert isinstance(provider_config, RoutingTableConfig)
|
assert isinstance(provider_config, List)
|
||||||
routing_table = provider_config
|
routing_table = provider_config
|
||||||
|
|
||||||
inner_specs = {x.provider_id: x for x in provider_spec.inner_specs}
|
inner_specs = {x.provider_id: x for x in provider_spec.inner_specs}
|
||||||
inner_impls = []
|
inner_impls = []
|
||||||
for routing_entry in routing_table.entries:
|
for routing_entry in routing_table:
|
||||||
impl = await instantiate_provider(
|
impl = await instantiate_provider(
|
||||||
inner_specs[routing_entry.provider_id],
|
inner_specs[routing_entry.provider_id],
|
||||||
deps,
|
deps,
|
||||||
|
|
87
tests/examples/local-run.yaml
Normal file
87
tests/examples/local-run.yaml
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
built_at: '2024-09-23T00:54:40.551416'
|
||||||
|
image_name: test-2
|
||||||
|
docker_image: null
|
||||||
|
conda_env: test-2
|
||||||
|
apis_to_serve:
|
||||||
|
- shields
|
||||||
|
- agents
|
||||||
|
- models
|
||||||
|
- memory
|
||||||
|
- memory_banks
|
||||||
|
- inference
|
||||||
|
- safety
|
||||||
|
api_providers:
|
||||||
|
inference:
|
||||||
|
providers:
|
||||||
|
- meta-reference
|
||||||
|
safety:
|
||||||
|
providers:
|
||||||
|
- meta-reference
|
||||||
|
agents:
|
||||||
|
provider_id: meta-reference
|
||||||
|
config:
|
||||||
|
persistence_store:
|
||||||
|
namespace: null
|
||||||
|
type: sqlite
|
||||||
|
db_path: /home/xiyan/.llama/runtime/kvstore.db
|
||||||
|
memory:
|
||||||
|
providers:
|
||||||
|
- meta-reference
|
||||||
|
telemetry:
|
||||||
|
provider_id: meta-reference
|
||||||
|
config: {}
|
||||||
|
routing_table:
|
||||||
|
inference:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
config:
|
||||||
|
model: Meta-Llama3.1-8B-Instruct
|
||||||
|
quantization: null
|
||||||
|
torch_seed: null
|
||||||
|
max_seq_len: 4096
|
||||||
|
max_batch_size: 1
|
||||||
|
routing_key: Meta-Llama3.1-8B-Instruct
|
||||||
|
safety:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
config:
|
||||||
|
llama_guard_shield:
|
||||||
|
model: Llama-Guard-3-8B
|
||||||
|
excluded_categories: []
|
||||||
|
disable_input_check: false
|
||||||
|
disable_output_check: false
|
||||||
|
prompt_guard_shield:
|
||||||
|
model: Prompt-Guard-86M
|
||||||
|
routing_key: llama_guard
|
||||||
|
- provider_id: meta-reference
|
||||||
|
config:
|
||||||
|
llama_guard_shield:
|
||||||
|
model: Llama-Guard-3-8B
|
||||||
|
excluded_categories: []
|
||||||
|
disable_input_check: false
|
||||||
|
disable_output_check: false
|
||||||
|
prompt_guard_shield:
|
||||||
|
model: Prompt-Guard-86M
|
||||||
|
routing_key: code_scanner_guard
|
||||||
|
- provider_id: meta-reference
|
||||||
|
config:
|
||||||
|
llama_guard_shield:
|
||||||
|
model: Llama-Guard-3-8B
|
||||||
|
excluded_categories: []
|
||||||
|
disable_input_check: false
|
||||||
|
disable_output_check: false
|
||||||
|
prompt_guard_shield:
|
||||||
|
model: Prompt-Guard-86M
|
||||||
|
routing_key: injection_shield
|
||||||
|
- provider_id: meta-reference
|
||||||
|
config:
|
||||||
|
llama_guard_shield:
|
||||||
|
model: Llama-Guard-3-8B
|
||||||
|
excluded_categories: []
|
||||||
|
disable_input_check: false
|
||||||
|
disable_output_check: false
|
||||||
|
prompt_guard_shield:
|
||||||
|
model: Prompt-Guard-86M
|
||||||
|
routing_key: jailbreak_shield
|
||||||
|
memory:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
config: {}
|
||||||
|
routing_key: vector
|
|
@ -1,50 +0,0 @@
|
||||||
built_at: '2024-09-18T13:41:17.656743'
|
|
||||||
image_name: local
|
|
||||||
docker_image: null
|
|
||||||
conda_env: local
|
|
||||||
apis_to_serve:
|
|
||||||
- inference
|
|
||||||
- memory
|
|
||||||
- telemetry
|
|
||||||
- agents
|
|
||||||
- safety
|
|
||||||
- models
|
|
||||||
provider_map:
|
|
||||||
telemetry:
|
|
||||||
provider_id: meta-reference
|
|
||||||
config: {}
|
|
||||||
safety:
|
|
||||||
provider_id: meta-reference
|
|
||||||
config:
|
|
||||||
llama_guard_shield:
|
|
||||||
model: Llama-Guard-3-8B
|
|
||||||
excluded_categories: []
|
|
||||||
disable_input_check: false
|
|
||||||
disable_output_check: false
|
|
||||||
prompt_guard_shield:
|
|
||||||
model: Prompt-Guard-86M
|
|
||||||
agents:
|
|
||||||
provider_id: meta-reference
|
|
||||||
config: {}
|
|
||||||
provider_routing_table:
|
|
||||||
inference:
|
|
||||||
- routing_key: Meta-Llama3.1-8B-Instruct
|
|
||||||
provider_id: meta-reference
|
|
||||||
config:
|
|
||||||
model: Meta-Llama3.1-8B-Instruct
|
|
||||||
quantization: null
|
|
||||||
torch_seed: null
|
|
||||||
max_seq_len: 4096
|
|
||||||
max_batch_size: 1
|
|
||||||
- routing_key: Meta-Llama3.1-8B
|
|
||||||
provider_id: meta-reference
|
|
||||||
config:
|
|
||||||
model: Meta-Llama3.1-8B
|
|
||||||
quantization: null
|
|
||||||
torch_seed: null
|
|
||||||
max_seq_len: 4096
|
|
||||||
max_batch_size: 1
|
|
||||||
memory:
|
|
||||||
- routing_key: vector
|
|
||||||
provider_id: meta-reference
|
|
||||||
config: {}
|
|
|
@ -1,40 +0,0 @@
|
||||||
built_at: '2024-09-19T22:50:36.239761'
|
|
||||||
image_name: simple-local
|
|
||||||
docker_image: null
|
|
||||||
conda_env: simple-local
|
|
||||||
apis_to_serve:
|
|
||||||
- inference
|
|
||||||
- safety
|
|
||||||
- agents
|
|
||||||
- memory
|
|
||||||
- models
|
|
||||||
- telemetry
|
|
||||||
provider_map:
|
|
||||||
inference:
|
|
||||||
provider_id: meta-reference
|
|
||||||
config:
|
|
||||||
model: Meta-Llama3.1-8B-Instruct
|
|
||||||
quantization: null
|
|
||||||
torch_seed: null
|
|
||||||
max_seq_len: 4096
|
|
||||||
max_batch_size: 1
|
|
||||||
safety:
|
|
||||||
provider_id: meta-reference
|
|
||||||
config:
|
|
||||||
llama_guard_shield:
|
|
||||||
model: Llama-Guard-3-8B
|
|
||||||
excluded_categories: []
|
|
||||||
disable_input_check: false
|
|
||||||
disable_output_check: false
|
|
||||||
prompt_guard_shield:
|
|
||||||
model: Prompt-Guard-86M
|
|
||||||
agents:
|
|
||||||
provider_id: meta-reference
|
|
||||||
config: {}
|
|
||||||
memory:
|
|
||||||
provider_id: meta-reference
|
|
||||||
config: {}
|
|
||||||
telemetry:
|
|
||||||
provider_id: meta-reference
|
|
||||||
config: {}
|
|
||||||
provider_routing_table: {}
|
|
Loading…
Add table
Add a link
Reference in a new issue