models routing work

2025-10-04 20:14:13 +00:00 · 2024-09-19 08:48:10 -07:00 · 2024-09-19 08:48:10 -07:00 · 9bdd4e3dd9
commit 9bdd4e3dd9
parent f3ff3a3001
3 changed files with 20 additions and 3 deletions
--- a/llama_stack/apis/inference/client.py
+++ b/llama_stack/apis/inference/client.py
@ -90,7 +90,7 @@ async def run_main(host: str, port: int, stream: bool):
    cprint(f"User>{message.content}", "green")
    iterator = client.chat_completion(
        ChatCompletionRequest(
-            model="Meta-Llama3.1-8B-Instruct",
+            model="ollama-1",
            messages=[message],
            stream=stream,
        )