memory routers working

This commit is contained in:
Xi Yan 2024-09-21 16:40:23 -07:00
parent 04f480d70c
commit f0580251a3
4 changed files with 97 additions and 39 deletions

View file

@ -3,40 +3,40 @@ image_name: local
docker_image: null
conda_env: local
apis_to_serve:
- inference
# - memory
# - inference
- memory
- telemetry
provider_map:
telemetry:
provider_id: meta-reference
config: {}
provider_routing_table:
inference:
- routing_key: Meta-Llama3.1-8B-Instruct
provider_id: meta-reference
config:
model: Meta-Llama3.1-8B-Instruct
quantization: null
torch_seed: null
max_seq_len: 4096
max_batch_size: 1
# - routing_key: Meta-Llama3.1-8B
# provider_id: meta-reference
# config:
# model: Meta-Llama3.1-8B
# quantization: null
# torch_seed: null
# max_seq_len: 4096
# max_batch_size: 1
# memory:
# - routing_key: keyvalue
# provider_id: remote::pgvector
# config:
# host: localhost
# port: 5432
# db: vectordb
# user: vectoruser
# password: xxxx
# - routing_key: vector
# inference:
# - routing_key: Meta-Llama3.1-8B-Instruct
# provider_id: meta-reference
# config: {}
# config:
# model: Meta-Llama3.1-8B-Instruct
# quantization: null
# torch_seed: null
# max_seq_len: 4096
# max_batch_size: 1
# - routing_key: Meta-Llama3.1-8B
# provider_id: meta-reference
# config:
# model: Meta-Llama3.1-8B
# quantization: null
# torch_seed: null
# max_seq_len: 4096
# max_batch_size: 1
memory:
- routing_key: keyvalue
provider_id: remote::pgvector
config:
host: localhost
port: 5432
db: vectordb
user: vectoruser
password: xxxx
- routing_key: vector
provider_id: meta-reference
config: {}

View file

@ -0,0 +1,38 @@
built_at: '2024-09-19T22:50:36.239761'
image_name: simple-local
docker_image: null
conda_env: simple-local
apis_to_serve:
- inference
- safety
- agents
- memory
provider_map:
inference:
provider_id: meta-reference
config:
model: Meta-Llama3.1-8B-Instruct
quantization: null
torch_seed: null
max_seq_len: 4096
max_batch_size: 1
safety:
provider_id: meta-reference
config:
llama_guard_shield:
model: Llama-Guard-3-8B
excluded_categories: []
disable_input_check: false
disable_output_check: false
prompt_guard_shield:
model: Prompt-Guard-86M
agents:
provider_id: meta-reference
config: {}
memory:
provider_id: meta-reference
config: {}
telemetry:
provider_id: meta-reference
config: {}
provider_routing_table: {}