forked from phoenix-oss/llama-stack-mirror
chore: make cprint write to stderr (#2250)
Also do sys.exit(1) in case of errors
This commit is contained in:
parent
c25bd0ad58
commit
5a422e236c
11 changed files with 81 additions and 44 deletions
|
@ -6,6 +6,7 @@
|
|||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
from collections.abc import AsyncGenerator
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
@ -455,9 +456,9 @@ class MetaReferenceInferenceImpl(
|
|||
first = token_results[0]
|
||||
if not first.finished and not first.ignore_token:
|
||||
if os.environ.get("LLAMA_MODELS_DEBUG", "0") in ("1", "2"):
|
||||
cprint(first.text, "cyan", end="")
|
||||
cprint(first.text, color="cyan", end="", file=sys.stderr)
|
||||
if os.environ.get("LLAMA_MODELS_DEBUG", "0") == "2":
|
||||
cprint(f"<{first.token}>", "magenta", end="")
|
||||
cprint(f"<{first.token}>", color="magenta", end="", file=sys.stderr)
|
||||
|
||||
for result in token_results:
|
||||
idx = result.batch_idx
|
||||
|
@ -519,9 +520,9 @@ class MetaReferenceInferenceImpl(
|
|||
for token_results in self.generator.chat_completion([request]):
|
||||
token_result = token_results[0]
|
||||
if os.environ.get("LLAMA_MODELS_DEBUG", "0") == "1":
|
||||
cprint(token_result.text, "cyan", end="")
|
||||
cprint(token_result.text, color="cyan", end="", file=sys.stderr)
|
||||
if os.environ.get("LLAMA_MODELS_DEBUG", "0") == "2":
|
||||
cprint(f"<{token_result.token}>", "magenta", end="")
|
||||
cprint(f"<{token_result.token}>", color="magenta", end="", file=sys.stderr)
|
||||
|
||||
if token_result.token == tokenizer.eot_id:
|
||||
stop_reason = StopReason.end_of_turn
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue