example inference router run config

This commit is contained in:
Xi Yan 2024-09-19 22:22:00 -07:00
parent bce79617bf
commit e2c7a3cea9
2 changed files with 75 additions and 10 deletions

View file

@ -101,16 +101,17 @@ async def run_main(host: str, port: int, stream: bool):
async for log in EventLogger().log(iterator):
log.print()
cprint(f"User>{message.content}", "green")
iterator = client.chat_completion(
ChatCompletionRequest(
model="Meta-Llama3.1-8B",
messages=[message],
stream=stream,
)
)
async for log in EventLogger().log(iterator):
log.print()
# For testing models routing
# cprint(f"User>{message.content}", "green")
# iterator = client.chat_completion(
# ChatCompletionRequest(
# model="Meta-Llama3.1-8B",
# messages=[message],
# stream=stream,
# )
# )
# async for log in EventLogger().log(iterator):
# log.print()
def main(host: str, port: int, stream: bool = True):

View file

@ -0,0 +1,64 @@
built_at: '2024-09-18T13:41:17.656743'
image_name: local
docker_image: null
conda_env: local
apis_to_serve:
- inference
- memory
- safety
- telemetry
- agents
- models
provider_map:
inference: models-router
safety:
provider_id: meta-reference
config:
llama_guard_shield:
model: Llama-Guard-3-8B
excluded_categories: []
disable_input_check: false
disable_output_check: false
prompt_guard_shield:
model: Prompt-Guard-86M
telemetry:
provider_id: meta-reference
config: {}
agents:
provider_id: meta-reference
config: {}
models:
provider_id: builtin
config:
models_config:
- core_model_id: Meta-Llama3.1-8B-Instruct
provider_id: meta-reference
api: inference
config:
model: Meta-Llama3.1-8B-Instruct
quantization: null
torch_seed: null
max_seq_len: 4096
max_batch_size: 1
- core_model_id: Meta-Llama3.1-8B
provider_id: meta-reference
api: inference
config:
model: Meta-Llama3.1-8B
quantization: null
torch_seed: null
max_seq_len: 4096
max_batch_size: 1
- core_model_id: Llama-Guard-3-8B
provider_id: meta-reference
api: safety
config:
model: Llama-Guard-3-8B
excluded_categories: []
disable_input_check: false
disable_output_check: false
- core_model_id: Prompt-Guard-86M
provider_id: meta-reference
api: safety
config:
model: Prompt-Guard-86M