diff --git a/llama_stack/providers/impls/meta_reference/inference/generation.py b/llama_stack/providers/impls/meta_reference/inference/generation.py index 96855b041..ea2ae016d 100644 --- a/llama_stack/providers/impls/meta_reference/inference/generation.py +++ b/llama_stack/providers/impls/meta_reference/inference/generation.py @@ -35,11 +35,12 @@ from termcolor import cprint from llama_stack.apis.inference import * # noqa: F403 +from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData + from llama_stack.distribution.utils.model_utils import model_local_dir from llama_stack.providers.utils.inference.prompt_adapter import ( chat_completion_request_to_messages, ) -from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData from .config import ( Fp8QuantizationConfig,