From e2c7a3cea9414aa5cc849ba5bd100b17fcf5bf34 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Thu, 19 Sep 2024 22:22:00 -0700 Subject: [PATCH] example inference router run config --- llama_stack/apis/inference/client.py | 21 +++--- .../configs/examples/local-router-run.yaml | 64 +++++++++++++++++++ 2 files changed, 75 insertions(+), 10 deletions(-) create mode 100644 llama_stack/configs/examples/local-router-run.yaml diff --git a/llama_stack/apis/inference/client.py b/llama_stack/apis/inference/client.py index 7ebfa4e73..af4849581 100644 --- a/llama_stack/apis/inference/client.py +++ b/llama_stack/apis/inference/client.py @@ -101,16 +101,17 @@ async def run_main(host: str, port: int, stream: bool): async for log in EventLogger().log(iterator): log.print() - cprint(f"User>{message.content}", "green") - iterator = client.chat_completion( - ChatCompletionRequest( - model="Meta-Llama3.1-8B", - messages=[message], - stream=stream, - ) - ) - async for log in EventLogger().log(iterator): - log.print() + # For testing models routing + # cprint(f"User>{message.content}", "green") + # iterator = client.chat_completion( + # ChatCompletionRequest( + # model="Meta-Llama3.1-8B", + # messages=[message], + # stream=stream, + # ) + # ) + # async for log in EventLogger().log(iterator): + # log.print() def main(host: str, port: int, stream: bool = True): diff --git a/llama_stack/configs/examples/local-router-run.yaml b/llama_stack/configs/examples/local-router-run.yaml new file mode 100644 index 000000000..c655c1cfc --- /dev/null +++ b/llama_stack/configs/examples/local-router-run.yaml @@ -0,0 +1,64 @@ +built_at: '2024-09-18T13:41:17.656743' +image_name: local +docker_image: null +conda_env: local +apis_to_serve: +- inference +- memory +- safety +- telemetry +- agents +- models +provider_map: + inference: models-router + safety: + provider_id: meta-reference + config: + llama_guard_shield: + model: Llama-Guard-3-8B + excluded_categories: [] + disable_input_check: false + disable_output_check: false + prompt_guard_shield: + model: Prompt-Guard-86M + telemetry: + provider_id: meta-reference + config: {} + agents: + provider_id: meta-reference + config: {} + models: + provider_id: builtin + config: + models_config: + - core_model_id: Meta-Llama3.1-8B-Instruct + provider_id: meta-reference + api: inference + config: + model: Meta-Llama3.1-8B-Instruct + quantization: null + torch_seed: null + max_seq_len: 4096 + max_batch_size: 1 + - core_model_id: Meta-Llama3.1-8B + provider_id: meta-reference + api: inference + config: + model: Meta-Llama3.1-8B + quantization: null + torch_seed: null + max_seq_len: 4096 + max_batch_size: 1 + - core_model_id: Llama-Guard-3-8B + provider_id: meta-reference + api: safety + config: + model: Llama-Guard-3-8B + excluded_categories: [] + disable_input_check: false + disable_output_check: false + - core_model_id: Prompt-Guard-86M + provider_id: meta-reference + api: safety + config: + model: Prompt-Guard-86M