llama-stack-mirror/llama_stack/configs/examples/local-router-run.yaml
2024-09-19 22:22:00 -07:00

64 lines
1.6 KiB
YAML

built_at: '2024-09-18T13:41:17.656743'
image_name: local
docker_image: null
conda_env: local
apis_to_serve:
- inference
- memory
- safety
- telemetry
- agents
- models
provider_map:
inference: models-router
safety:
provider_id: meta-reference
config:
llama_guard_shield:
model: Llama-Guard-3-8B
excluded_categories: []
disable_input_check: false
disable_output_check: false
prompt_guard_shield:
model: Prompt-Guard-86M
telemetry:
provider_id: meta-reference
config: {}
agents:
provider_id: meta-reference
config: {}
models:
provider_id: builtin
config:
models_config:
- core_model_id: Meta-Llama3.1-8B-Instruct
provider_id: meta-reference
api: inference
config:
model: Meta-Llama3.1-8B-Instruct
quantization: null
torch_seed: null
max_seq_len: 4096
max_batch_size: 1
- core_model_id: Meta-Llama3.1-8B
provider_id: meta-reference
api: inference
config:
model: Meta-Llama3.1-8B
quantization: null
torch_seed: null
max_seq_len: 4096
max_batch_size: 1
- core_model_id: Llama-Guard-3-8B
provider_id: meta-reference
api: safety
config:
model: Llama-Guard-3-8B
excluded_categories: []
disable_input_check: false
disable_output_check: false
- core_model_id: Prompt-Guard-86M
provider_id: meta-reference
api: safety
config:
model: Prompt-Guard-86M