chore: make cprint write to stderr (#2250)

Also do sys.exit(1) in case of errors
This commit is contained in:
raghotham 2025-05-24 23:39:57 -07:00 committed by GitHub
parent c25bd0ad58
commit 5a422e236c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 81 additions and 44 deletions

View file

@ -6,6 +6,7 @@
import asyncio
import os
import sys
from collections.abc import AsyncGenerator
from pydantic import BaseModel
@ -455,9 +456,9 @@ class MetaReferenceInferenceImpl(
first = token_results[0]
if not first.finished and not first.ignore_token:
if os.environ.get("LLAMA_MODELS_DEBUG", "0") in ("1", "2"):
cprint(first.text, "cyan", end="")
cprint(first.text, color="cyan", end="", file=sys.stderr)
if os.environ.get("LLAMA_MODELS_DEBUG", "0") == "2":
cprint(f"<{first.token}>", "magenta", end="")
cprint(f"<{first.token}>", color="magenta", end="", file=sys.stderr)
for result in token_results:
idx = result.batch_idx
@ -519,9 +520,9 @@ class MetaReferenceInferenceImpl(
for token_results in self.generator.chat_completion([request]):
token_result = token_results[0]
if os.environ.get("LLAMA_MODELS_DEBUG", "0") == "1":
cprint(token_result.text, "cyan", end="")
cprint(token_result.text, color="cyan", end="", file=sys.stderr)
if os.environ.get("LLAMA_MODELS_DEBUG", "0") == "2":
cprint(f"<{token_result.token}>", "magenta", end="")
cprint(f"<{token_result.token}>", color="magenta", end="", file=sys.stderr)
if token_result.token == tokenizer.eot_id:
stop_reason = StopReason.end_of_turn