chore: make cprint write to stderr (#2250)

Also do sys.exit(1) in case of errors
2025-05-24 23:39:57 -07:00 · 2025-05-24 23:39:57 -07:00 · 5a422e236c
commit 5a422e236c
parent c25bd0ad58
11 changed files with 81 additions and 44 deletions
--- a/llama_stack/models/llama/llama3/generation.py
+++ b/llama_stack/models/llama/llama3/generation.py
@ -174,6 +174,7 @@ class Llama3:
                cprint(
                    "Input to model:\n" + self.tokenizer.decode(tokens_to_print) + "\n",
                    "red",
+                    file=sys.stderr,
                )
        prompt_tokens = [inp.tokens for inp in llm_inputs]

@ -184,7 +185,11 @@ class Llama3:
        max_prompt_len = max(len(t) for t in prompt_tokens)

        if max_prompt_len >= params.max_seq_len:
-            cprint(f"Out of token budget {max_prompt_len} vs {params.max_seq_len}", "red")
+            cprint(
+                f"Out of token budget {max_prompt_len} vs {params.max_seq_len}",
+                color="red",
+                file=sys.stderr,
+            )
            return

        total_len = min(max_gen_len + max_prompt_len, params.max_seq_len)