llama-stack-mirror/llama_stack/configs/examples/local-router-run.yaml

built_at: '2024-09-18T13:41:17.656743'
image_name: local
docker_image: null
conda_env: local
apis_to_serve:
- inference
- memory
- safety
- telemetry
- agents
- models
provider_map:
  inference: models-router
  safety:
    provider_id: meta-reference
    config:
      llama_guard_shield:
        model: Llama-Guard-3-8B
        excluded_categories: []
        disable_input_check: false
        disable_output_check: false
      prompt_guard_shield:
        model: Prompt-Guard-86M
  telemetry:
    provider_id: meta-reference
    config: {}
  agents:
    provider_id: meta-reference
    config: {}
  models:
    provider_id: builtin
    config:
      models_config:
      - core_model_id: Meta-Llama3.1-8B-Instruct
        provider_id: meta-reference
        api: inference
        config:
          model: Meta-Llama3.1-8B-Instruct
          quantization: null
          torch_seed: null
          max_seq_len: 4096
          max_batch_size: 1
      - core_model_id: Meta-Llama3.1-8B
        provider_id: meta-reference
        api: inference
        config:
          model: Meta-Llama3.1-8B
          quantization: null
          torch_seed: null
          max_seq_len: 4096
          max_batch_size: 1
      - core_model_id: Llama-Guard-3-8B
        provider_id: meta-reference
        api: safety
        config:
          model: Llama-Guard-3-8B
          excluded_categories: []
          disable_input_check: false
          disable_output_check: false
      - core_model_id: Prompt-Guard-86M
        provider_id: meta-reference
        api: safety
        config:
          model: Prompt-Guard-86M