supported models wip

This commit is contained in:
Xi Yan 2024-09-21 18:37:22 -07:00
parent 20a4302877
commit c0199029e5
10 changed files with 215 additions and 34 deletions

View file

@ -8,6 +8,7 @@ apis_to_serve:
- telemetry
- agents
- safety
- models
provider_map:
telemetry:
provider_id: meta-reference
@ -22,27 +23,39 @@ provider_map:
disable_output_check: false
prompt_guard_shield:
model: Prompt-Guard-86M
# inference:
# provider_id: meta-reference
# config:
# model: Meta-Llama3.1-8B-Instruct
# quantization: null
# torch_seed: null
# max_seq_len: 4096
# max_batch_size: 1
inference:
provider_id: remote::ollama
config:
agents:
provider_id: meta-reference
config: {}
provider_routing_table:
inference:
- routing_key: Meta-Llama3.1-8B-Instruct
provider_id: meta-reference
config:
model: Meta-Llama3.1-8B-Instruct
quantization: null
torch_seed: null
max_seq_len: 4096
max_batch_size: 1
# - routing_key: Meta-Llama3.1-8B
# provider_id: meta-reference
# config:
# model: Meta-Llama3.1-8B
# quantization: null
# torch_seed: null
# max_seq_len: 4096
# max_batch_size: 1
# inference:
# - routing_key: Meta-Llama3.1-8B-Instruct
# provider_id: meta-reference
# config:
# model: Meta-Llama3.1-8B-Instruct
# quantization: null
# torch_seed: null
# max_seq_len: 4096
# max_batch_size: 1
# - routing_key: Meta-Llama3.1-8B-Instruct
# provider_id: meta-reference
# config:
# model: Meta-Llama3.1-8B
# quantization: null
# torch_seed: null
# max_seq_len: 4096
# max_batch_size: 1
memory:
# - routing_key: keyvalue
# provider_id: remote::pgvector