final fixes

2026-01-01 19:30:01 +00:00 · 2025-04-06 18:01:43 -07:00 · 2025-04-06 18:01:43 -07:00 · d3ebc18559
commit d3ebc18559
parent 971566fd74
3 changed files with 11 additions and 15 deletions
--- a/llama_stack/providers/inline/inference/meta_reference/inference.py
+++ b/llama_stack/providers/inline/inference/meta_reference/inference.py
@ -6,8 +6,11 @@

 import asyncio
 import logging
+import os
 from typing import AsyncGenerator, List, Optional, Union

+from termcolor import cprint
+
 from llama_stack.apis.common.content_types import (
    TextDelta,
    ToolCallDelta,
@ -338,9 +341,8 @@ class MetaReferenceInferenceImpl(
            stop_reason = None

            for token_result in self.generator.chat_completion(request):
-                from termcolor import cprint
-
-                cprint(token_result.text, "cyan", end="")
+                if os.environ.get("LLAMA_MODELS_DEBUG", "0") == "1":
+                    cprint(token_result.text, "cyan", end="")

                tokens.append(token_result.token)

@ -390,9 +392,8 @@ class MetaReferenceInferenceImpl(
            ipython = False

            for token_result in self.generator.chat_completion(request):
-                from termcolor import cprint
-
-                cprint(token_result.text, "cyan", end="")
+                if os.environ.get("LLAMA_MODELS_DEBUG", "0") == "1":
+                    cprint(token_result.text, "cyan", end="")

                tokens.append(token_result.token)