[api_updates_3] fix CLI for routing_table, bug fixes for memory & safety (#90)

* fix llama stack build

* fix configure

* fix configure for simple case

* configure w/ routing

* move examples config

* fix memory router naming

* issue w/ safety

* fix config w/ safety

* update memory endpoints

* allow providers in api_providers

* configure script works

* all endpoints w/ build->configure->run simple local works

* new example run.yaml

* run openapi generator
This commit is contained in:
Xi Yan 2024-09-23 08:46:33 -07:00 committed by GitHub
parent 8cf634e615
commit ddebf9b6e7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 725 additions and 605 deletions

View file

@ -21,7 +21,7 @@
"info": { "info": {
"title": "[DRAFT] Llama Stack Specification", "title": "[DRAFT] Llama Stack Specification",
"version": "0.0.1", "version": "0.0.1",
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-09-20 14:53:17.090953" "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-09-23 01:08:55.758597"
}, },
"servers": [ "servers": [
{ {
@ -422,7 +422,7 @@
} }
} }
}, },
"/memory_banks/create": { "/memory/create": {
"post": { "post": {
"responses": { "responses": {
"200": { "200": {
@ -561,7 +561,7 @@
} }
} }
}, },
"/memory_bank/documents/delete": { "/memory/documents/delete": {
"post": { "post": {
"responses": { "responses": {
"200": { "200": {
@ -594,7 +594,7 @@
} }
} }
}, },
"/memory_banks/drop": { "/memory/drop": {
"post": { "post": {
"responses": { "responses": {
"200": { "200": {
@ -988,7 +988,7 @@
] ]
} }
}, },
"/memory_bank/documents/get": { "/memory/documents/get": {
"post": { "post": {
"responses": { "responses": {
"200": { "200": {
@ -1180,7 +1180,7 @@
] ]
} }
}, },
"/memory_banks/get": { "/memory/get": {
"get": { "get": {
"responses": { "responses": {
"200": { "200": {
@ -1407,7 +1407,7 @@
] ]
} }
}, },
"/memory_bank/insert": { "/memory/insert": {
"post": { "post": {
"responses": { "responses": {
"200": { "200": {
@ -1440,7 +1440,7 @@
} }
} }
}, },
"/memory_banks/list": { "/memory/list": {
"get": { "get": {
"responses": { "responses": {
"200": { "200": {
@ -1543,7 +1543,7 @@
} }
} }
}, },
"/memory_bank/query": { "/memory/query": {
"post": { "post": {
"responses": { "responses": {
"200": { "200": {
@ -1743,7 +1743,7 @@
} }
} }
}, },
"/memory_bank/update": { "/memory/update": {
"post": { "post": {
"responses": { "responses": {
"200": { "200": {
@ -2584,183 +2584,7 @@
"$ref": "#/components/schemas/FunctionCallToolDefinition" "$ref": "#/components/schemas/FunctionCallToolDefinition"
}, },
{ {
"type": "object", "$ref": "#/components/schemas/MemoryToolDefinition"
"properties": {
"input_shields": {
"type": "array",
"items": {
"type": "string"
}
},
"output_shields": {
"type": "array",
"items": {
"type": "string"
}
},
"type": {
"type": "string",
"const": "memory"
},
"memory_bank_configs": {
"type": "array",
"items": {
"oneOf": [
{
"type": "object",
"properties": {
"bank_id": {
"type": "string"
},
"type": {
"type": "string",
"const": "vector"
}
},
"additionalProperties": false,
"required": [
"bank_id",
"type"
]
},
{
"type": "object",
"properties": {
"bank_id": {
"type": "string"
},
"type": {
"type": "string",
"const": "keyvalue"
},
"keys": {
"type": "array",
"items": {
"type": "string"
}
}
},
"additionalProperties": false,
"required": [
"bank_id",
"type",
"keys"
]
},
{
"type": "object",
"properties": {
"bank_id": {
"type": "string"
},
"type": {
"type": "string",
"const": "keyword"
}
},
"additionalProperties": false,
"required": [
"bank_id",
"type"
]
},
{
"type": "object",
"properties": {
"bank_id": {
"type": "string"
},
"type": {
"type": "string",
"const": "graph"
},
"entities": {
"type": "array",
"items": {
"type": "string"
}
}
},
"additionalProperties": false,
"required": [
"bank_id",
"type",
"entities"
]
}
]
}
},
"query_generator_config": {
"oneOf": [
{
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "default"
},
"sep": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"type",
"sep"
]
},
{
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "llm"
},
"model": {
"type": "string"
},
"template": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"type",
"model",
"template"
]
},
{
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "custom"
}
},
"additionalProperties": false,
"required": [
"type"
]
}
]
},
"max_tokens_in_context": {
"type": "integer"
},
"max_chunks": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"type",
"memory_bank_configs",
"query_generator_config",
"max_tokens_in_context",
"max_chunks"
]
} }
] ]
} }
@ -2771,17 +2595,25 @@
"tool_prompt_format": { "tool_prompt_format": {
"$ref": "#/components/schemas/ToolPromptFormat" "$ref": "#/components/schemas/ToolPromptFormat"
}, },
"max_infer_iters": {
"type": "integer"
},
"model": { "model": {
"type": "string" "type": "string"
}, },
"instructions": { "instructions": {
"type": "string" "type": "string"
},
"enable_session_persistence": {
"type": "boolean"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"max_infer_iters",
"model", "model",
"instructions" "instructions",
"enable_session_persistence"
] ]
}, },
"CodeInterpreterToolDefinition": { "CodeInterpreterToolDefinition": {
@ -2859,6 +2691,185 @@
"parameters" "parameters"
] ]
}, },
"MemoryToolDefinition": {
"type": "object",
"properties": {
"input_shields": {
"type": "array",
"items": {
"type": "string"
}
},
"output_shields": {
"type": "array",
"items": {
"type": "string"
}
},
"type": {
"type": "string",
"const": "memory"
},
"memory_bank_configs": {
"type": "array",
"items": {
"oneOf": [
{
"type": "object",
"properties": {
"bank_id": {
"type": "string"
},
"type": {
"type": "string",
"const": "vector"
}
},
"additionalProperties": false,
"required": [
"bank_id",
"type"
]
},
{
"type": "object",
"properties": {
"bank_id": {
"type": "string"
},
"type": {
"type": "string",
"const": "keyvalue"
},
"keys": {
"type": "array",
"items": {
"type": "string"
}
}
},
"additionalProperties": false,
"required": [
"bank_id",
"type",
"keys"
]
},
{
"type": "object",
"properties": {
"bank_id": {
"type": "string"
},
"type": {
"type": "string",
"const": "keyword"
}
},
"additionalProperties": false,
"required": [
"bank_id",
"type"
]
},
{
"type": "object",
"properties": {
"bank_id": {
"type": "string"
},
"type": {
"type": "string",
"const": "graph"
},
"entities": {
"type": "array",
"items": {
"type": "string"
}
}
},
"additionalProperties": false,
"required": [
"bank_id",
"type",
"entities"
]
}
]
}
},
"query_generator_config": {
"oneOf": [
{
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "default"
},
"sep": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"type",
"sep"
]
},
{
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "llm"
},
"model": {
"type": "string"
},
"template": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"type",
"model",
"template"
]
},
{
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "custom"
}
},
"additionalProperties": false,
"required": [
"type"
]
}
]
},
"max_tokens_in_context": {
"type": "integer"
},
"max_chunks": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"type",
"memory_bank_configs",
"query_generator_config",
"max_tokens_in_context",
"max_chunks"
]
},
"PhotogenToolDefinition": { "PhotogenToolDefinition": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -5569,31 +5580,28 @@
], ],
"tags": [ "tags": [
{ {
"name": "Agents" "name": "PostTraining"
},
{
"name": "RewardScoring"
},
{
"name": "Evaluations"
}, },
{ {
"name": "Safety" "name": "Safety"
}, },
{ {
"name": "Telemetry" "name": "SyntheticDataGeneration"
},
{
"name": "PostTraining"
}, },
{ {
"name": "Datasets" "name": "Datasets"
}, },
{ {
"name": "Inference" "name": "Telemetry"
}, },
{ {
"name": "SyntheticDataGeneration" "name": "Evaluations"
},
{
"name": "RewardScoring"
},
{
"name": "Agents"
}, },
{ {
"name": "Memory" "name": "Memory"
@ -5601,6 +5609,9 @@
{ {
"name": "BatchInference" "name": "BatchInference"
}, },
{
"name": "Inference"
},
{ {
"name": "BuiltinTool", "name": "BuiltinTool",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/BuiltinTool\" />" "description": "<SchemaDefinition schemaRef=\"#/components/schemas/BuiltinTool\" />"
@ -5733,6 +5744,10 @@
"name": "FunctionCallToolDefinition", "name": "FunctionCallToolDefinition",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/FunctionCallToolDefinition\" />" "description": "<SchemaDefinition schemaRef=\"#/components/schemas/FunctionCallToolDefinition\" />"
}, },
{
"name": "MemoryToolDefinition",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/MemoryToolDefinition\" />"
},
{ {
"name": "PhotogenToolDefinition", "name": "PhotogenToolDefinition",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/PhotogenToolDefinition\" />" "description": "<SchemaDefinition schemaRef=\"#/components/schemas/PhotogenToolDefinition\" />"
@ -6174,6 +6189,7 @@
"MemoryBank", "MemoryBank",
"MemoryBankDocument", "MemoryBankDocument",
"MemoryRetrievalStep", "MemoryRetrievalStep",
"MemoryToolDefinition",
"MetricEvent", "MetricEvent",
"OptimizerConfig", "OptimizerConfig",
"PhotogenToolDefinition", "PhotogenToolDefinition",

View file

@ -4,12 +4,16 @@ components:
AgentConfig: AgentConfig:
additionalProperties: false additionalProperties: false
properties: properties:
enable_session_persistence:
type: boolean
input_shields: input_shields:
items: items:
type: string type: string
type: array type: array
instructions: instructions:
type: string type: string
max_infer_iters:
type: integer
model: model:
type: string type: string
output_shields: output_shields:
@ -30,127 +34,13 @@ components:
- $ref: '#/components/schemas/PhotogenToolDefinition' - $ref: '#/components/schemas/PhotogenToolDefinition'
- $ref: '#/components/schemas/CodeInterpreterToolDefinition' - $ref: '#/components/schemas/CodeInterpreterToolDefinition'
- $ref: '#/components/schemas/FunctionCallToolDefinition' - $ref: '#/components/schemas/FunctionCallToolDefinition'
- additionalProperties: false - $ref: '#/components/schemas/MemoryToolDefinition'
properties:
input_shields:
items:
type: string
type: array
max_chunks:
type: integer
max_tokens_in_context:
type: integer
memory_bank_configs:
items:
oneOf:
- additionalProperties: false
properties:
bank_id:
type: string
type:
const: vector
type: string
required:
- bank_id
- type
type: object
- additionalProperties: false
properties:
bank_id:
type: string
keys:
items:
type: string
type: array
type:
const: keyvalue
type: string
required:
- bank_id
- type
- keys
type: object
- additionalProperties: false
properties:
bank_id:
type: string
type:
const: keyword
type: string
required:
- bank_id
- type
type: object
- additionalProperties: false
properties:
bank_id:
type: string
entities:
items:
type: string
type: array
type:
const: graph
type: string
required:
- bank_id
- type
- entities
type: object
type: array
output_shields:
items:
type: string
type: array
query_generator_config:
oneOf:
- additionalProperties: false
properties:
sep:
type: string
type:
const: default
type: string
required:
- type
- sep
type: object
- additionalProperties: false
properties:
model:
type: string
template:
type: string
type:
const: llm
type: string
required:
- type
- model
- template
type: object
- additionalProperties: false
properties:
type:
const: custom
type: string
required:
- type
type: object
type:
const: memory
type: string
required:
- type
- memory_bank_configs
- query_generator_config
- max_tokens_in_context
- max_chunks
type: object
type: array type: array
required: required:
- max_infer_iters
- model - model
- instructions - instructions
- enable_session_persistence
type: object type: object
AgentCreateResponse: AgentCreateResponse:
additionalProperties: false additionalProperties: false
@ -1182,6 +1072,124 @@ components:
- memory_bank_ids - memory_bank_ids
- inserted_context - inserted_context
type: object type: object
MemoryToolDefinition:
additionalProperties: false
properties:
input_shields:
items:
type: string
type: array
max_chunks:
type: integer
max_tokens_in_context:
type: integer
memory_bank_configs:
items:
oneOf:
- additionalProperties: false
properties:
bank_id:
type: string
type:
const: vector
type: string
required:
- bank_id
- type
type: object
- additionalProperties: false
properties:
bank_id:
type: string
keys:
items:
type: string
type: array
type:
const: keyvalue
type: string
required:
- bank_id
- type
- keys
type: object
- additionalProperties: false
properties:
bank_id:
type: string
type:
const: keyword
type: string
required:
- bank_id
- type
type: object
- additionalProperties: false
properties:
bank_id:
type: string
entities:
items:
type: string
type: array
type:
const: graph
type: string
required:
- bank_id
- type
- entities
type: object
type: array
output_shields:
items:
type: string
type: array
query_generator_config:
oneOf:
- additionalProperties: false
properties:
sep:
type: string
type:
const: default
type: string
required:
- type
- sep
type: object
- additionalProperties: false
properties:
model:
type: string
template:
type: string
type:
const: llm
type: string
required:
- type
- model
- template
type: object
- additionalProperties: false
properties:
type:
const: custom
type: string
required:
- type
type: object
type:
const: memory
type: string
required:
- type
- memory_bank_configs
- query_generator_config
- max_tokens_in_context
- max_chunks
type: object
MetricEvent: MetricEvent:
additionalProperties: false additionalProperties: false
properties: properties:
@ -2341,7 +2349,7 @@ info:
description: "This is the specification of the llama stack that provides\n \ description: "This is the specification of the llama stack that provides\n \
\ a set of endpoints and their corresponding interfaces that are tailored\ \ a set of endpoints and their corresponding interfaces that are tailored\
\ to\n best leverage Llama Models. The specification is still in\ \ to\n best leverage Llama Models. The specification is still in\
\ draft and subject to change.\n Generated at 2024-09-20 14:53:17.090953" \ draft and subject to change.\n Generated at 2024-09-23 01:08:55.758597"
title: '[DRAFT] Llama Stack Specification' title: '[DRAFT] Llama Stack Specification'
version: 0.0.1 version: 0.0.1
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@ -2944,7 +2952,32 @@ paths:
description: OK description: OK
tags: tags:
- Inference - Inference
/memory_bank/documents/delete: /memory/create:
post:
parameters:
- description: JSON-encoded provider data which will be made available to the
adapter servicing the API
in: header
name: X-LlamaStack-ProviderData
required: false
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CreateMemoryBankRequest'
required: true
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/MemoryBank'
description: OK
tags:
- Memory
/memory/documents/delete:
post: post:
parameters: parameters:
- description: JSON-encoded provider data which will be made available to the - description: JSON-encoded provider data which will be made available to the
@ -2965,7 +2998,7 @@ paths:
description: OK description: OK
tags: tags:
- Memory - Memory
/memory_bank/documents/get: /memory/documents/get:
post: post:
parameters: parameters:
- in: query - in: query
@ -2995,99 +3028,7 @@ paths:
description: OK description: OK
tags: tags:
- Memory - Memory
/memory_bank/insert: /memory/drop:
post:
parameters:
- description: JSON-encoded provider data which will be made available to the
adapter servicing the API
in: header
name: X-LlamaStack-ProviderData
required: false
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/InsertDocumentsRequest'
required: true
responses:
'200':
description: OK
tags:
- Memory
/memory_bank/query:
post:
parameters:
- description: JSON-encoded provider data which will be made available to the
adapter servicing the API
in: header
name: X-LlamaStack-ProviderData
required: false
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/QueryDocumentsRequest'
required: true
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/QueryDocumentsResponse'
description: OK
tags:
- Memory
/memory_bank/update:
post:
parameters:
- description: JSON-encoded provider data which will be made available to the
adapter servicing the API
in: header
name: X-LlamaStack-ProviderData
required: false
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/UpdateDocumentsRequest'
required: true
responses:
'200':
description: OK
tags:
- Memory
/memory_banks/create:
post:
parameters:
- description: JSON-encoded provider data which will be made available to the
adapter servicing the API
in: header
name: X-LlamaStack-ProviderData
required: false
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CreateMemoryBankRequest'
required: true
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/MemoryBank'
description: OK
tags:
- Memory
/memory_banks/drop:
post: post:
parameters: parameters:
- description: JSON-encoded provider data which will be made available to the - description: JSON-encoded provider data which will be made available to the
@ -3112,7 +3053,7 @@ paths:
description: OK description: OK
tags: tags:
- Memory - Memory
/memory_banks/get: /memory/get:
get: get:
parameters: parameters:
- in: query - in: query
@ -3138,7 +3079,28 @@ paths:
description: OK description: OK
tags: tags:
- Memory - Memory
/memory_banks/list: /memory/insert:
post:
parameters:
- description: JSON-encoded provider data which will be made available to the
adapter servicing the API
in: header
name: X-LlamaStack-ProviderData
required: false
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/InsertDocumentsRequest'
required: true
responses:
'200':
description: OK
tags:
- Memory
/memory/list:
get: get:
parameters: parameters:
- description: JSON-encoded provider data which will be made available to the - description: JSON-encoded provider data which will be made available to the
@ -3157,6 +3119,52 @@ paths:
description: OK description: OK
tags: tags:
- Memory - Memory
/memory/query:
post:
parameters:
- description: JSON-encoded provider data which will be made available to the
adapter servicing the API
in: header
name: X-LlamaStack-ProviderData
required: false
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/QueryDocumentsRequest'
required: true
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/QueryDocumentsResponse'
description: OK
tags:
- Memory
/memory/update:
post:
parameters:
- description: JSON-encoded provider data which will be made available to the
adapter servicing the API
in: header
name: X-LlamaStack-ProviderData
required: false
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/UpdateDocumentsRequest'
required: true
responses:
'200':
description: OK
tags:
- Memory
/post_training/job/artifacts: /post_training/job/artifacts:
get: get:
parameters: parameters:
@ -3444,17 +3452,17 @@ security:
servers: servers:
- url: http://any-hosted-llama-stack.com - url: http://any-hosted-llama-stack.com
tags: tags:
- name: Agents
- name: RewardScoring
- name: Evaluations
- name: Safety
- name: Telemetry
- name: PostTraining - name: PostTraining
- name: Datasets - name: Safety
- name: Inference
- name: SyntheticDataGeneration - name: SyntheticDataGeneration
- name: Datasets
- name: Telemetry
- name: Evaluations
- name: RewardScoring
- name: Agents
- name: Memory - name: Memory
- name: BatchInference - name: BatchInference
- name: Inference
- description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" /> - description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" />
name: BuiltinTool name: BuiltinTool
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage" - description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"
@ -3564,6 +3572,9 @@ tags:
- description: <SchemaDefinition schemaRef="#/components/schemas/FunctionCallToolDefinition" - description: <SchemaDefinition schemaRef="#/components/schemas/FunctionCallToolDefinition"
/> />
name: FunctionCallToolDefinition name: FunctionCallToolDefinition
- description: <SchemaDefinition schemaRef="#/components/schemas/MemoryToolDefinition"
/>
name: MemoryToolDefinition
- description: <SchemaDefinition schemaRef="#/components/schemas/PhotogenToolDefinition" - description: <SchemaDefinition schemaRef="#/components/schemas/PhotogenToolDefinition"
/> />
name: PhotogenToolDefinition name: PhotogenToolDefinition
@ -3922,6 +3933,7 @@ x-tagGroups:
- MemoryBank - MemoryBank
- MemoryBankDocument - MemoryBankDocument
- MemoryRetrievalStep - MemoryRetrievalStep
- MemoryToolDefinition
- MetricEvent - MetricEvent
- OptimizerConfig - OptimizerConfig
- PhotogenToolDefinition - PhotogenToolDefinition

View file

@ -38,7 +38,7 @@ class MemoryClient(Memory):
async def get_memory_bank(self, bank_id: str) -> Optional[MemoryBank]: async def get_memory_bank(self, bank_id: str) -> Optional[MemoryBank]:
async with httpx.AsyncClient() as client: async with httpx.AsyncClient() as client:
r = await client.get( r = await client.get(
f"{self.base_url}/memory_banks/get", f"{self.base_url}/memory/get",
params={ params={
"bank_id": bank_id, "bank_id": bank_id,
}, },
@ -59,7 +59,7 @@ class MemoryClient(Memory):
) -> MemoryBank: ) -> MemoryBank:
async with httpx.AsyncClient() as client: async with httpx.AsyncClient() as client:
r = await client.post( r = await client.post(
f"{self.base_url}/memory_banks/create", f"{self.base_url}/memory/create",
json={ json={
"name": name, "name": name,
"config": config.dict(), "config": config.dict(),
@ -81,7 +81,7 @@ class MemoryClient(Memory):
) -> None: ) -> None:
async with httpx.AsyncClient() as client: async with httpx.AsyncClient() as client:
r = await client.post( r = await client.post(
f"{self.base_url}/memory_bank/insert", f"{self.base_url}/memory/insert",
json={ json={
"bank_id": bank_id, "bank_id": bank_id,
"documents": [d.dict() for d in documents], "documents": [d.dict() for d in documents],
@ -99,7 +99,7 @@ class MemoryClient(Memory):
) -> QueryDocumentsResponse: ) -> QueryDocumentsResponse:
async with httpx.AsyncClient() as client: async with httpx.AsyncClient() as client:
r = await client.post( r = await client.post(
f"{self.base_url}/memory_bank/query", f"{self.base_url}/memory/query",
json={ json={
"bank_id": bank_id, "bank_id": bank_id,
"query": query, "query": query,

View file

@ -96,7 +96,7 @@ class MemoryBank(BaseModel):
class Memory(Protocol): class Memory(Protocol):
@webmethod(route="/memory_banks/create") @webmethod(route="/memory/create")
async def create_memory_bank( async def create_memory_bank(
self, self,
name: str, name: str,
@ -104,13 +104,13 @@ class Memory(Protocol):
url: Optional[URL] = None, url: Optional[URL] = None,
) -> MemoryBank: ... ) -> MemoryBank: ...
@webmethod(route="/memory_banks/list", method="GET") @webmethod(route="/memory/list", method="GET")
async def list_memory_banks(self) -> List[MemoryBank]: ... async def list_memory_banks(self) -> List[MemoryBank]: ...
@webmethod(route="/memory_banks/get", method="GET") @webmethod(route="/memory/get", method="GET")
async def get_memory_bank(self, bank_id: str) -> Optional[MemoryBank]: ... async def get_memory_bank(self, bank_id: str) -> Optional[MemoryBank]: ...
@webmethod(route="/memory_banks/drop", method="DELETE") @webmethod(route="/memory/drop", method="DELETE")
async def drop_memory_bank( async def drop_memory_bank(
self, self,
bank_id: str, bank_id: str,
@ -118,7 +118,7 @@ class Memory(Protocol):
# this will just block now until documents are inserted, but it should # this will just block now until documents are inserted, but it should
# probably return a Job instance which can be polled for completion # probably return a Job instance which can be polled for completion
@webmethod(route="/memory_bank/insert") @webmethod(route="/memory/insert")
async def insert_documents( async def insert_documents(
self, self,
bank_id: str, bank_id: str,
@ -126,14 +126,14 @@ class Memory(Protocol):
ttl_seconds: Optional[int] = None, ttl_seconds: Optional[int] = None,
) -> None: ... ) -> None: ...
@webmethod(route="/memory_bank/update") @webmethod(route="/memory/update")
async def update_documents( async def update_documents(
self, self,
bank_id: str, bank_id: str,
documents: List[MemoryBankDocument], documents: List[MemoryBankDocument],
) -> None: ... ) -> None: ...
@webmethod(route="/memory_bank/query") @webmethod(route="/memory/query")
async def query_documents( async def query_documents(
self, self,
bank_id: str, bank_id: str,
@ -141,14 +141,14 @@ class Memory(Protocol):
params: Optional[Dict[str, Any]] = None, params: Optional[Dict[str, Any]] = None,
) -> QueryDocumentsResponse: ... ) -> QueryDocumentsResponse: ...
@webmethod(route="/memory_bank/documents/get", method="GET") @webmethod(route="/memory/documents/get", method="GET")
async def get_documents( async def get_documents(
self, self,
bank_id: str, bank_id: str,
document_ids: List[str], document_ids: List[str],
) -> List[MemoryBankDocument]: ... ) -> List[MemoryBankDocument]: ...
@webmethod(route="/memory_bank/documents/delete", method="DELETE") @webmethod(route="/memory/documents/delete", method="DELETE")
async def delete_documents( async def delete_documents(
self, self,
bank_id: str, bank_id: str,

View file

@ -7,11 +7,11 @@
from typing import List, Optional, Protocol from typing import List, Optional, Protocol
from llama_models.schema_utils import json_schema_type, webmethod from llama_models.schema_utils import json_schema_type, webmethod
from pydantic import BaseModel, Field
from llama_stack.apis.memory import MemoryBankType from llama_stack.apis.memory import MemoryBankType
from llama_stack.distribution.datatypes import GenericProviderConfig from llama_stack.distribution.datatypes import GenericProviderConfig
from pydantic import BaseModel, Field
@json_schema_type @json_schema_type

View file

@ -160,7 +160,11 @@ class StackBuild(Subcommand):
def _run_stack_build_command(self, args: argparse.Namespace) -> None: def _run_stack_build_command(self, args: argparse.Namespace) -> None:
import yaml import yaml
from llama_stack.distribution.distribution import Api, api_providers from llama_stack.distribution.distribution import (
Api,
api_providers,
builtin_automatically_routed_apis,
)
from llama_stack.distribution.utils.dynamic import instantiate_class_type from llama_stack.distribution.utils.dynamic import instantiate_class_type
from prompt_toolkit import prompt from prompt_toolkit import prompt
from prompt_toolkit.validation import Validator from prompt_toolkit.validation import Validator
@ -213,8 +217,15 @@ class StackBuild(Subcommand):
) )
providers = dict() providers = dict()
all_providers = api_providers()
routing_table_apis = set(
x.routing_table_api for x in builtin_automatically_routed_apis()
)
for api in Api: for api in Api:
all_providers = api_providers() if api in routing_table_apis:
continue
providers_for_api = all_providers[api] providers_for_api = all_providers[api]
api_provider = prompt( api_provider = prompt(

View file

@ -145,7 +145,7 @@ class StackConfigure(Subcommand):
built_at=datetime.now(), built_at=datetime.now(),
image_name=image_name, image_name=image_name,
apis_to_serve=[], apis_to_serve=[],
provider_map={}, api_providers={},
) )
config = configure_api_providers(config, build_config.distribution_spec) config = configure_api_providers(config, build_config.distribution_spec)

View file

@ -9,12 +9,21 @@ from typing import Any
from pydantic import BaseModel from pydantic import BaseModel
from llama_stack.distribution.datatypes import * # noqa: F403 from llama_stack.distribution.datatypes import * # noqa: F403
from termcolor import cprint from llama_stack.apis.memory.memory import MemoryBankType
from llama_stack.distribution.distribution import (
from llama_stack.distribution.distribution import api_providers, stack_apis api_providers,
builtin_automatically_routed_apis,
stack_apis,
)
from llama_stack.distribution.utils.dynamic import instantiate_class_type from llama_stack.distribution.utils.dynamic import instantiate_class_type
from llama_stack.distribution.utils.prompt_for_config import prompt_for_config from llama_stack.distribution.utils.prompt_for_config import prompt_for_config
from llama_stack.providers.impls.meta_reference.safety.config import (
MetaReferenceShieldType,
)
from prompt_toolkit import prompt
from prompt_toolkit.validation import Validator
from termcolor import cprint
def make_routing_entry_type(config_class: Any): def make_routing_entry_type(config_class: Any):
@ -25,71 +34,139 @@ def make_routing_entry_type(config_class: Any):
return BaseModelWithConfig return BaseModelWithConfig
def get_builtin_apis(provider_backed_apis: List[str]) -> List[str]:
"""Get corresponding builtin APIs given provider backed APIs"""
res = []
for inf in builtin_automatically_routed_apis():
if inf.router_api.value in provider_backed_apis:
res.append(inf.routing_table_api.value)
return res
# TODO: make sure we can deal with existing configuration values correctly # TODO: make sure we can deal with existing configuration values correctly
# instead of just overwriting them # instead of just overwriting them
def configure_api_providers( def configure_api_providers(
config: StackRunConfig, spec: DistributionSpec config: StackRunConfig, spec: DistributionSpec
) -> StackRunConfig: ) -> StackRunConfig:
apis = config.apis_to_serve or list(spec.providers.keys()) apis = config.apis_to_serve or list(spec.providers.keys())
config.apis_to_serve = [a for a in apis if a != "telemetry"] # append the bulitin routing APIs
apis += get_builtin_apis(apis)
router_api2builtin_api = {
inf.router_api.value: inf.routing_table_api.value
for inf in builtin_automatically_routed_apis()
}
config.apis_to_serve = list(set([a for a in apis if a != "telemetry"]))
apis = [v.value for v in stack_apis()] apis = [v.value for v in stack_apis()]
all_providers = api_providers() all_providers = api_providers()
# configure simple case for with non-routing providers to api_providers
for api_str in spec.providers.keys(): for api_str in spec.providers.keys():
if api_str not in apis: if api_str not in apis:
raise ValueError(f"Unknown API `{api_str}`") raise ValueError(f"Unknown API `{api_str}`")
cprint(f"Configuring API `{api_str}`...\n", "white", attrs=["bold"]) cprint(f"Configuring API `{api_str}`...", "green", attrs=["bold"])
api = Api(api_str) api = Api(api_str)
provider_or_providers = spec.providers[api_str] p = spec.providers[api_str]
if isinstance(provider_or_providers, list) and len(provider_or_providers) > 1: cprint(f"=== Configuring provider `{p}` for API {api_str}...", "green")
print(
"You have specified multiple providers for this API. We will configure a routing table now. For each provider, provide a routing key followed by provider configuration.\n" if isinstance(p, list):
cprint(
f"[WARN] Interactive configuration of multiple providers {p} is not supported, configuring {p[0]} only, please manually configure {p[1:]} in routing_table of run.yaml",
"yellow",
) )
p = p[0]
provider_spec = all_providers[api][p]
config_type = instantiate_class_type(provider_spec.config_class)
try:
provider_config = config.api_providers.get(api_str)
if provider_config:
existing = config_type(**provider_config.config)
else:
existing = None
except Exception:
existing = None
cfg = prompt_for_config(config_type, existing)
if api_str in router_api2builtin_api:
# a routing api, we need to infer and assign it a routing_key and put it in the routing_table
routing_key = "<PLEASE_FILL_ROUTING_KEY>"
routing_entries = [] routing_entries = []
for p in provider_or_providers: if api_str == "inference":
print(f"Configuring provider `{p}`...") if hasattr(cfg, "model"):
provider_spec = all_providers[api][p] routing_key = cfg.model
config_type = instantiate_class_type(provider_spec.config_class) else:
routing_key = prompt(
# TODO: we need to validate the routing keys, and "> Please enter the supported model your provider has for inference: ",
# perhaps it is better if we break this out into asking default="Meta-Llama3.1-8B-Instruct",
# for a routing key separately from the associated config )
wrapper_type = make_routing_entry_type(config_type)
rt_entry = prompt_for_config(wrapper_type, None)
routing_entries.append( routing_entries.append(
ProviderRoutingEntry( RoutableProviderConfig(
routing_key=routing_key,
provider_id=p, provider_id=p,
routing_key=rt_entry.routing_key, config=cfg.dict(),
config=rt_entry.config.dict(),
) )
) )
config.provider_map[api_str] = routing_entries
else: if api_str == "safety":
p = ( # TODO: add support for other safety providers, and simplify safety provider config
provider_or_providers[0] if p == "meta-reference":
if isinstance(provider_or_providers, list) for shield_type in MetaReferenceShieldType:
else provider_or_providers routing_entries.append(
) RoutableProviderConfig(
print(f"Configuring provider `{p}`...") routing_key=shield_type.value,
provider_spec = all_providers[api][p] provider_id=p,
config_type = instantiate_class_type(provider_spec.config_class) config=cfg.dict(),
try: )
provider_config = config.provider_map.get(api_str) )
if provider_config:
existing = config_type(**provider_config.config)
else: else:
existing = None cprint(
except Exception: f"[WARN] Interactive configuration of safety provider {p} is not supported, please manually configure safety shields types in routing_table of run.yaml",
existing = None "yellow",
cfg = prompt_for_config(config_type, existing) )
config.provider_map[api_str] = GenericProviderConfig( routing_entries.append(
RoutableProviderConfig(
routing_key=routing_key,
provider_id=p,
config=cfg.dict(),
)
)
if api_str == "memory":
bank_types = list([x.value for x in MemoryBankType])
routing_key = prompt(
"> Please enter the supported memory bank type your provider has for memory: ",
default="vector",
validator=Validator.from_callable(
lambda x: x in bank_types,
error_message="Invalid provider, please enter one of the following: {}".format(
bank_types
),
),
)
routing_entries.append(
RoutableProviderConfig(
routing_key=routing_key,
provider_id=p,
config=cfg.dict(),
)
)
config.routing_table[api_str] = routing_entries
config.api_providers[api_str] = PlaceholderProviderConfig(
providers=p if isinstance(p, list) else [p]
)
else:
config.api_providers[api_str] = GenericProviderConfig(
provider_id=p, provider_id=p,
config=cfg.dict(), config=cfg.dict(),
) )
print("")
return config return config

View file

@ -59,17 +59,16 @@ class GenericProviderConfig(BaseModel):
config: Dict[str, Any] config: Dict[str, Any]
class PlaceholderProviderConfig(BaseModel):
"""Placeholder provider config for API whose provider are defined in routing_table"""
providers: List[str]
class RoutableProviderConfig(GenericProviderConfig): class RoutableProviderConfig(GenericProviderConfig):
routing_key: str routing_key: str
class RoutingTableConfig(BaseModel):
entries: List[RoutableProviderConfig] = Field(...)
keys: Optional[List[str]] = Field(
default=None,
)
# Example: /inference, /safety # Example: /inference, /safety
@json_schema_type @json_schema_type
class AutoRoutedProviderSpec(ProviderSpec): class AutoRoutedProviderSpec(ProviderSpec):
@ -270,12 +269,14 @@ this could be just a hash
The list of APIs to serve. If not specified, all APIs specified in the provider_map will be served""", The list of APIs to serve. If not specified, all APIs specified in the provider_map will be served""",
) )
api_providers: Dict[str, GenericProviderConfig] = Field( api_providers: Dict[
str, Union[GenericProviderConfig, PlaceholderProviderConfig]
] = Field(
description=""" description="""
Provider configurations for each of the APIs provided by this package. Provider configurations for each of the APIs provided by this package.
""", """,
) )
routing_tables: Dict[str, RoutingTableConfig] = Field( routing_table: Dict[str, List[RoutableProviderConfig]] = Field(
default_factory=dict, default_factory=dict,
description=""" description="""

View file

@ -8,8 +8,6 @@ import importlib
import inspect import inspect
from typing import Dict, List from typing import Dict, List
from pydantic import BaseModel
from llama_stack.apis.agents import Agents from llama_stack.apis.agents import Agents
from llama_stack.apis.inference import Inference from llama_stack.apis.inference import Inference
from llama_stack.apis.memory import Memory from llama_stack.apis.memory import Memory
@ -19,6 +17,8 @@ from llama_stack.apis.safety import Safety
from llama_stack.apis.shields import Shields from llama_stack.apis.shields import Shields
from llama_stack.apis.telemetry import Telemetry from llama_stack.apis.telemetry import Telemetry
from pydantic import BaseModel
from .datatypes import Api, ApiEndpoint, ProviderSpec, remote_provider_spec from .datatypes import Api, ApiEndpoint, ProviderSpec, remote_provider_spec
# These are the dependencies needed by the distribution server. # These are the dependencies needed by the distribution server.

View file

@ -12,7 +12,7 @@ from llama_stack.distribution.datatypes import * # noqa: F403
async def get_routing_table_impl( async def get_routing_table_impl(
api: Api, api: Api,
inner_impls: List[Tuple[str, Any]], inner_impls: List[Tuple[str, Any]],
routing_table_config: RoutingTableConfig, routing_table_config: Dict[str, List[RoutableProviderConfig]],
_deps, _deps,
) -> Any: ) -> Any:
from .routing_tables import ( from .routing_tables import (

View file

@ -46,9 +46,9 @@ class MemoryRouter(Memory):
url: Optional[URL] = None, url: Optional[URL] = None,
) -> MemoryBank: ) -> MemoryBank:
bank_type = config.type bank_type = config.type
provider = await self.routing_table.get_provider_impl( bank = await self.routing_table.get_provider_impl(bank_type).create_memory_bank(
bank_type name, config, url
).create_memory_bank(name, config, url) )
self.bank_id_to_type[bank.bank_id] = bank_type self.bank_id_to_type[bank.bank_id] = bank_type
return bank return bank
@ -162,6 +162,7 @@ class SafetyRouter(Safety):
messages: List[Message], messages: List[Message],
params: Dict[str, Any] = None, params: Dict[str, Any] = None,
) -> RunShieldResponse: ) -> RunShieldResponse:
print(f"Running shield {shield_type}")
return await self.routing_table.get_provider_impl(shield_type).run_shield( return await self.routing_table.get_provider_impl(shield_type).run_shield(
shield_type=shield_type, shield_type=shield_type,
messages=messages, messages=messages,

View file

@ -20,7 +20,7 @@ class CommonRoutingTableImpl(RoutingTable):
def __init__( def __init__(
self, self,
inner_impls: List[Tuple[str, Any]], inner_impls: List[Tuple[str, Any]],
routing_table_config: RoutingTableConfig, routing_table_config: Dict[str, List[RoutableProviderConfig]],
) -> None: ) -> None:
self.providers = {k: v for k, v in inner_impls} self.providers = {k: v for k, v in inner_impls}
self.routing_keys = list(self.providers.keys()) self.routing_keys = list(self.providers.keys())
@ -40,7 +40,7 @@ class CommonRoutingTableImpl(RoutingTable):
return self.routing_keys return self.routing_keys
def get_provider_config(self, routing_key: str) -> Optional[GenericProviderConfig]: def get_provider_config(self, routing_key: str) -> Optional[GenericProviderConfig]:
for entry in self.routing_table_config.entries: for entry in self.routing_table_config:
if entry.routing_key == routing_key: if entry.routing_key == routing_key:
return entry return entry
return None return None
@ -50,7 +50,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
async def list_models(self) -> List[ModelServingSpec]: async def list_models(self) -> List[ModelServingSpec]:
specs = [] specs = []
for entry in self.routing_table_config.entries: for entry in self.routing_table_config:
model_id = entry.routing_key model_id = entry.routing_key
specs.append( specs.append(
ModelServingSpec( ModelServingSpec(
@ -61,7 +61,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
return specs return specs
async def get_model(self, core_model_id: str) -> Optional[ModelServingSpec]: async def get_model(self, core_model_id: str) -> Optional[ModelServingSpec]:
for entry in self.routing_table_config.entries: for entry in self.routing_table_config:
if entry.routing_key == core_model_id: if entry.routing_key == core_model_id:
return ModelServingSpec( return ModelServingSpec(
llama_model=resolve_model(core_model_id), llama_model=resolve_model(core_model_id),
@ -74,7 +74,7 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
async def list_shields(self) -> List[ShieldSpec]: async def list_shields(self) -> List[ShieldSpec]:
specs = [] specs = []
for entry in self.routing_table_config.entries: for entry in self.routing_table_config:
specs.append( specs.append(
ShieldSpec( ShieldSpec(
shield_type=entry.routing_key, shield_type=entry.routing_key,
@ -84,7 +84,7 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
return specs return specs
async def get_shield(self, shield_type: str) -> Optional[ShieldSpec]: async def get_shield(self, shield_type: str) -> Optional[ShieldSpec]:
for entry in self.routing_table_config.entries: for entry in self.routing_table_config:
if entry.routing_key == shield_type: if entry.routing_key == shield_type:
return ShieldSpec( return ShieldSpec(
shield_type=entry.routing_key, shield_type=entry.routing_key,
@ -97,7 +97,7 @@ class MemoryBanksRoutingTable(CommonRoutingTableImpl, MemoryBanks):
async def list_memory_banks(self) -> List[MemoryBankSpec]: async def list_memory_banks(self) -> List[MemoryBankSpec]:
specs = [] specs = []
for entry in self.routing_table_config.entries: for entry in self.routing_table_config:
specs.append( specs.append(
MemoryBankSpec( MemoryBankSpec(
bank_type=entry.routing_key, bank_type=entry.routing_key,
@ -107,7 +107,7 @@ class MemoryBanksRoutingTable(CommonRoutingTableImpl, MemoryBanks):
return specs return specs
async def get_memory_bank(self, bank_type: str) -> Optional[MemoryBankSpec]: async def get_memory_bank(self, bank_type: str) -> Optional[MemoryBankSpec]:
for entry in self.routing_table_config.entries: for entry in self.routing_table_config:
if entry.routing_key == bank_type: if entry.routing_key == bank_type:
return MemoryBankSpec( return MemoryBankSpec(
bank_type=entry.routing_key, bank_type=entry.routing_key,

View file

@ -35,9 +35,6 @@ from fastapi import Body, FastAPI, HTTPException, Request, Response
from fastapi.exceptions import RequestValidationError from fastapi.exceptions import RequestValidationError
from fastapi.responses import JSONResponse, StreamingResponse from fastapi.responses import JSONResponse, StreamingResponse
from fastapi.routing import APIRoute from fastapi.routing import APIRoute
from pydantic import BaseModel, ValidationError
from termcolor import cprint
from typing_extensions import Annotated
from llama_stack.providers.utils.telemetry.tracing import ( from llama_stack.providers.utils.telemetry.tracing import (
end_trace, end_trace,
@ -45,6 +42,9 @@ from llama_stack.providers.utils.telemetry.tracing import (
SpanStatus, SpanStatus,
start_trace, start_trace,
) )
from pydantic import BaseModel, ValidationError
from termcolor import cprint
from typing_extensions import Annotated
from llama_stack.distribution.datatypes import * # noqa: F403 from llama_stack.distribution.datatypes import * # noqa: F403
from llama_stack.distribution.distribution import ( from llama_stack.distribution.distribution import (
@ -307,6 +307,10 @@ async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, An
# TODO: check that these APIs are not in the routing table part of the config # TODO: check that these APIs are not in the routing table part of the config
providers = all_providers[api] providers = all_providers[api]
# skip checks for API whose provider config is specified in routing_table
if isinstance(config, PlaceholderProviderConfig):
continue
if config.provider_id not in providers: if config.provider_id not in providers:
raise ValueError( raise ValueError(
f"Unknown provider `{config.provider_id}` is not available for API `{api}`" f"Unknown provider `{config.provider_id}` is not available for API `{api}`"
@ -315,9 +319,8 @@ async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, An
configs[api] = config configs[api] = config
apis_to_serve = run_config.apis_to_serve or set( apis_to_serve = run_config.apis_to_serve or set(
list(specs.keys()) + list(run_config.routing_tables.keys()) list(specs.keys()) + list(run_config.routing_table.keys())
) )
print("apis_to_serve", apis_to_serve)
for info in builtin_automatically_routed_apis(): for info in builtin_automatically_routed_apis():
source_api = info.routing_table_api source_api = info.routing_table_api
@ -331,15 +334,16 @@ async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, An
if info.router_api.value not in apis_to_serve: if info.router_api.value not in apis_to_serve:
continue continue
if source_api.value not in run_config.routing_tables: print("router_api", info.router_api)
if info.router_api.value not in run_config.routing_table:
raise ValueError(f"Routing table for `{source_api.value}` is not provided?") raise ValueError(f"Routing table for `{source_api.value}` is not provided?")
routing_table = run_config.routing_tables[source_api.value] routing_table = run_config.routing_table[info.router_api.value]
providers = all_providers[info.router_api] providers = all_providers[info.router_api]
inner_specs = [] inner_specs = []
for rt_entry in routing_table.entries: for rt_entry in routing_table:
if rt_entry.provider_id not in providers: if rt_entry.provider_id not in providers:
raise ValueError( raise ValueError(
f"Unknown provider `{rt_entry.provider_id}` is not available for API `{api}`" f"Unknown provider `{rt_entry.provider_id}` is not available for API `{api}`"

View file

@ -8,6 +8,7 @@ import importlib
from typing import Any, Dict from typing import Any, Dict
from llama_stack.distribution.datatypes import * # noqa: F403 from llama_stack.distribution.datatypes import * # noqa: F403
from termcolor import cprint
def instantiate_class_type(fully_qualified_name): def instantiate_class_type(fully_qualified_name):
@ -43,12 +44,12 @@ async def instantiate_provider(
elif isinstance(provider_spec, RoutingTableProviderSpec): elif isinstance(provider_spec, RoutingTableProviderSpec):
method = "get_routing_table_impl" method = "get_routing_table_impl"
assert isinstance(provider_config, RoutingTableConfig) assert isinstance(provider_config, List)
routing_table = provider_config routing_table = provider_config
inner_specs = {x.provider_id: x for x in provider_spec.inner_specs} inner_specs = {x.provider_id: x for x in provider_spec.inner_specs}
inner_impls = [] inner_impls = []
for routing_entry in routing_table.entries: for routing_entry in routing_table:
impl = await instantiate_provider( impl = await instantiate_provider(
inner_specs[routing_entry.provider_id], inner_specs[routing_entry.provider_id],
deps, deps,

View file

@ -0,0 +1,87 @@
built_at: '2024-09-23T00:54:40.551416'
image_name: test-2
docker_image: null
conda_env: test-2
apis_to_serve:
- shields
- agents
- models
- memory
- memory_banks
- inference
- safety
api_providers:
inference:
providers:
- meta-reference
safety:
providers:
- meta-reference
agents:
provider_id: meta-reference
config:
persistence_store:
namespace: null
type: sqlite
db_path: /home/xiyan/.llama/runtime/kvstore.db
memory:
providers:
- meta-reference
telemetry:
provider_id: meta-reference
config: {}
routing_table:
inference:
- provider_id: meta-reference
config:
model: Meta-Llama3.1-8B-Instruct
quantization: null
torch_seed: null
max_seq_len: 4096
max_batch_size: 1
routing_key: Meta-Llama3.1-8B-Instruct
safety:
- provider_id: meta-reference
config:
llama_guard_shield:
model: Llama-Guard-3-8B
excluded_categories: []
disable_input_check: false
disable_output_check: false
prompt_guard_shield:
model: Prompt-Guard-86M
routing_key: llama_guard
- provider_id: meta-reference
config:
llama_guard_shield:
model: Llama-Guard-3-8B
excluded_categories: []
disable_input_check: false
disable_output_check: false
prompt_guard_shield:
model: Prompt-Guard-86M
routing_key: code_scanner_guard
- provider_id: meta-reference
config:
llama_guard_shield:
model: Llama-Guard-3-8B
excluded_categories: []
disable_input_check: false
disable_output_check: false
prompt_guard_shield:
model: Prompt-Guard-86M
routing_key: injection_shield
- provider_id: meta-reference
config:
llama_guard_shield:
model: Llama-Guard-3-8B
excluded_categories: []
disable_input_check: false
disable_output_check: false
prompt_guard_shield:
model: Prompt-Guard-86M
routing_key: jailbreak_shield
memory:
- provider_id: meta-reference
config: {}
routing_key: vector

View file

@ -1,50 +0,0 @@
built_at: '2024-09-18T13:41:17.656743'
image_name: local
docker_image: null
conda_env: local
apis_to_serve:
- inference
- memory
- telemetry
- agents
- safety
- models
provider_map:
telemetry:
provider_id: meta-reference
config: {}
safety:
provider_id: meta-reference
config:
llama_guard_shield:
model: Llama-Guard-3-8B
excluded_categories: []
disable_input_check: false
disable_output_check: false
prompt_guard_shield:
model: Prompt-Guard-86M
agents:
provider_id: meta-reference
config: {}
provider_routing_table:
inference:
- routing_key: Meta-Llama3.1-8B-Instruct
provider_id: meta-reference
config:
model: Meta-Llama3.1-8B-Instruct
quantization: null
torch_seed: null
max_seq_len: 4096
max_batch_size: 1
- routing_key: Meta-Llama3.1-8B
provider_id: meta-reference
config:
model: Meta-Llama3.1-8B
quantization: null
torch_seed: null
max_seq_len: 4096
max_batch_size: 1
memory:
- routing_key: vector
provider_id: meta-reference
config: {}

View file

@ -1,40 +0,0 @@
built_at: '2024-09-19T22:50:36.239761'
image_name: simple-local
docker_image: null
conda_env: simple-local
apis_to_serve:
- inference
- safety
- agents
- memory
- models
- telemetry
provider_map:
inference:
provider_id: meta-reference
config:
model: Meta-Llama3.1-8B-Instruct
quantization: null
torch_seed: null
max_seq_len: 4096
max_batch_size: 1
safety:
provider_id: meta-reference
config:
llama_guard_shield:
model: Llama-Guard-3-8B
excluded_categories: []
disable_input_check: false
disable_output_check: false
prompt_guard_shield:
model: Prompt-Guard-86M
agents:
provider_id: meta-reference
config: {}
memory:
provider_id: meta-reference
config: {}
telemetry:
provider_id: meta-reference
config: {}
provider_routing_table: {}