built_at: '2024-09-18T13:41:17.656743' image_name: local docker_image: null conda_env: local apis_to_serve: # - inference - memory - telemetry provider_map: telemetry: provider_id: meta-reference config: {} provider_routing_table: # inference: # - routing_key: Meta-Llama3.1-8B-Instruct # provider_id: meta-reference # config: # model: Meta-Llama3.1-8B-Instruct # quantization: null # torch_seed: null # max_seq_len: 4096 # max_batch_size: 1 # - routing_key: Meta-Llama3.1-8B # provider_id: remote::ollama # config: # url: http:ollama-url-1.com memory: - routing_key: keyvalue provider_id: remote::pgvector config: host: localhost port: 5432 db: vectordb user: vectoruser password: xxxx - routing_key: vector provider_id: meta-reference config: {} # safety: # provider_id: meta-reference # config: # llama_guard_shield: # model: Llama-Guard-3-8B # excluded_categories: [] # disable_input_check: false # disable_output_check: false # prompt_guard_shield: # model: Prompt-Guard-86M # telemetry: # provider_id: meta-reference # config: {} # agents: # provider_id: meta-reference # config: {} # memory: # provider_id: meta-reference # config: {} # models: # provider_id: builtin # config: # models_config: # - core_model_id: Meta-Llama3.1-8B-Instruct # provider_id: meta-reference # api: inference # config: # model: Meta-Llama3.1-8B-Instruct # quantization: null # torch_seed: null # max_seq_len: 4096 # max_batch_size: 1 # - core_model_id: Meta-Llama3.1-8B # provider_id: meta-reference # api: inference # config: # model: Meta-Llama3.1-8B # quantization: null # torch_seed: null # max_seq_len: 4096 # max_batch_size: 1 # - core_model_id: Llama-Guard-3-8B # provider_id: meta-reference # api: safety # config: # model: Llama-Guard-3-8B # excluded_categories: [] # disable_input_check: false # disable_output_check: false # - core_model_id: Prompt-Guard-86M # provider_id: meta-reference # api: safety # config: # model: Prompt-Guard-86M