diff --git a/llama_stack/providers/inline/agents/meta_reference/config.py b/llama_stack/providers/inline/agents/meta_reference/config.py index 8ade558c3..2770ed13c 100644 --- a/llama_stack/providers/inline/agents/meta_reference/config.py +++ b/llama_stack/providers/inline/agents/meta_reference/config.py @@ -4,9 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from pydantic import BaseModel, Field + from llama_stack.providers.utils.kvstore import KVStoreConfig from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig -from pydantic import BaseModel, Field class MetaReferenceAgentsImplConfig(BaseModel): diff --git a/llama_stack/providers/inline/agents/meta_reference/persistence.py b/llama_stack/providers/inline/agents/meta_reference/persistence.py index 36ae9b367..37ac75d6a 100644 --- a/llama_stack/providers/inline/agents/meta_reference/persistence.py +++ b/llama_stack/providers/inline/agents/meta_reference/persistence.py @@ -11,9 +11,10 @@ from datetime import datetime from typing import List, Optional from llama_stack.apis.agents import * # noqa: F403 -from llama_stack.providers.utils.kvstore import KVStore from pydantic import BaseModel +from llama_stack.providers.utils.kvstore import KVStore + class AgentSessionInfo(BaseModel): session_id: str diff --git a/llama_stack/providers/inline/agents/meta_reference/rag/context_retriever.py b/llama_stack/providers/inline/agents/meta_reference/rag/context_retriever.py index 3b303f5bd..b668dc0d6 100644 --- a/llama_stack/providers/inline/agents/meta_reference/rag/context_retriever.py +++ b/llama_stack/providers/inline/agents/meta_reference/rag/context_retriever.py @@ -10,13 +10,14 @@ from jinja2 import Template from llama_models.llama3.api import * # noqa: F403 +from termcolor import cprint # noqa: F401 + from llama_stack.apis.agents import ( DefaultMemoryQueryGeneratorConfig, LLMMemoryQueryGeneratorConfig, MemoryQueryGenerator, MemoryQueryGeneratorConfig, ) -from termcolor import cprint # noqa: F401 from llama_stack.apis.inference import * # noqa: F403 diff --git a/llama_stack/providers/inline/inference/__init__.py b/llama_stack/providers/inline/inference/__init__.py index 6f3c1df03..756f351d8 100644 --- a/llama_stack/providers/inline/inference/__init__.py +++ b/llama_stack/providers/inline/inference/__init__.py @@ -2,4 +2,4 @@ # All rights reserved. # # This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. \ No newline at end of file +# the root directory of this source tree. diff --git a/llama_stack/providers/inline/inference/meta_reference/config.py b/llama_stack/providers/inline/inference/meta_reference/config.py index 6ecba22b0..48cba645b 100644 --- a/llama_stack/providers/inline/inference/meta_reference/config.py +++ b/llama_stack/providers/inline/inference/meta_reference/config.py @@ -10,9 +10,10 @@ from llama_models.datatypes import * # noqa: F403 from llama_models.sku_list import resolve_model from llama_stack.apis.inference import * # noqa: F401, F403 -from llama_stack.providers.utils.inference import supported_inference_models from pydantic import BaseModel, Field, field_validator +from llama_stack.providers.utils.inference import supported_inference_models + class MetaReferenceInferenceConfig(BaseModel): model: str = Field( diff --git a/llama_stack/providers/inline/inference/meta_reference/generation.py b/llama_stack/providers/inline/inference/meta_reference/generation.py index 8d6a14fc9..2f296c7c2 100644 --- a/llama_stack/providers/inline/inference/meta_reference/generation.py +++ b/llama_stack/providers/inline/inference/meta_reference/generation.py @@ -35,12 +35,13 @@ from termcolor import cprint from llama_stack.apis.inference import * # noqa: F403 +from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData + from llama_stack.distribution.utils.model_utils import model_local_dir from llama_stack.providers.utils.inference.prompt_adapter import ( augment_content_with_response_format_prompt, chat_completion_request_to_messages, ) -from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData from .config import ( Fp8QuantizationConfig, diff --git a/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py b/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py index 470b6b1ca..62eeefaac 100644 --- a/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +++ b/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py @@ -28,13 +28,13 @@ from fairscale.nn.model_parallel.initialize import ( get_model_parallel_src_rank, ) -from llama_stack.apis.inference import ChatCompletionRequest, CompletionRequest - from pydantic import BaseModel, Field from torch.distributed.launcher.api import elastic_launch, LaunchConfig from typing_extensions import Annotated +from llama_stack.apis.inference import ChatCompletionRequest, CompletionRequest + from .generation import TokenResult diff --git a/llama_stack/providers/inline/inference/meta_reference/quantization/loader.py b/llama_stack/providers/inline/inference/meta_reference/quantization/loader.py index 286224931..3eaac1e71 100644 --- a/llama_stack/providers/inline/inference/meta_reference/quantization/loader.py +++ b/llama_stack/providers/inline/inference/meta_reference/quantization/loader.py @@ -21,13 +21,13 @@ from llama_models.llama3.api.args import ModelArgs from llama_models.llama3.reference_impl.model import Transformer, TransformerBlock from llama_models.sku_list import resolve_model -from llama_stack.apis.inference import QuantizationType - from termcolor import cprint from torch import nn, Tensor from torchao.quantization.GPTQ import Int8DynActInt4WeightLinear +from llama_stack.apis.inference import QuantizationType + from ..config import MetaReferenceQuantizedInferenceConfig diff --git a/llama_stack/providers/inline/inference/vllm/config.py b/llama_stack/providers/inline/inference/vllm/config.py index 22b439f77..a7469ebde 100644 --- a/llama_stack/providers/inline/inference/vllm/config.py +++ b/llama_stack/providers/inline/inference/vllm/config.py @@ -5,9 +5,9 @@ # the root directory of this source tree. from llama_models.schema_utils import json_schema_type +from pydantic import BaseModel, Field, field_validator from llama_stack.providers.utils.inference import supported_inference_models -from pydantic import BaseModel, Field, field_validator @json_schema_type diff --git a/llama_stack/providers/inline/memory/faiss/config.py b/llama_stack/providers/inline/memory/faiss/config.py index fd26272ae..41970b05f 100644 --- a/llama_stack/providers/inline/memory/faiss/config.py +++ b/llama_stack/providers/inline/memory/faiss/config.py @@ -5,13 +5,13 @@ # the root directory of this source tree. from llama_models.schema_utils import json_schema_type +from pydantic import BaseModel from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR from llama_stack.providers.utils.kvstore.config import ( KVStoreConfig, SqliteKVStoreConfig, ) -from pydantic import BaseModel @json_schema_type diff --git a/llama_stack/providers/inline/memory/faiss/faiss.py b/llama_stack/providers/inline/memory/faiss/faiss.py index 5726d6f87..c362eeedb 100644 --- a/llama_stack/providers/inline/memory/faiss/faiss.py +++ b/llama_stack/providers/inline/memory/faiss/faiss.py @@ -8,11 +8,11 @@ import logging from typing import Any, Dict, List, Optional +import faiss + import numpy as np from numpy.typing import NDArray -import faiss - from llama_models.llama3.api.datatypes import * # noqa: F403 from llama_stack.apis.memory import * # noqa: F403