From 694c142b89d71b2320454a9c795c461df8335ada Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Thu, 7 Nov 2024 13:04:53 -0800 Subject: [PATCH] Add provider deprecation support; change directory structure (#397) * Add provider deprecation support; change directory structure * fix a couple dangling imports * move the meta_reference safety dir also --- llama_stack/distribution/resolver.py | 8 ++ llama_stack/providers/datatypes.py | 4 + .../meta_reference}/__init__.py | 0 .../meta_reference}/agent_instance.py | 0 .../meta_reference}/agents.py | 0 .../meta_reference}/config.py | 3 +- .../meta_reference}/persistence.py | 3 +- .../meta_reference}/rag/__init__.py | 0 .../meta_reference}/rag/context_retriever.py | 3 +- .../meta_reference}/safety.py | 0 .../meta_reference}/tests/__init__.py | 0 .../meta_reference}/tests/code_execution.py | 0 .../meta_reference}/tests/test_chat_agent.py | 0 .../meta_reference}/tools/__init__.py | 0 .../meta_reference}/tools/base.py | 0 .../meta_reference}/tools/builtin.py | 0 .../tools/ipython_tool/__init__.py | 0 .../tools/ipython_tool/code_env_prefix.py | 0 .../tools/ipython_tool/code_execution.py | 0 .../ipython_tool/matplotlib_custom_backend.py | 0 .../tools/ipython_tool/utils.py | 0 .../meta_reference}/tools/safety.py | 3 +- .../meta_reference}/__init__.py | 0 .../meta_reference}/config.py | 3 +- .../meta_reference}/generation.py | 3 +- .../meta_reference}/inference.py | 0 .../meta_reference}/model_parallel.py | 0 .../meta_reference}/parallel_utils.py | 4 +- .../meta_reference}/quantization/__init__.py | 0 .../meta_reference}/quantization/fp8_impls.py | 0 .../quantization/fp8_txest_disabled.py | 0 .../quantization/hadamard_utils.py | 0 .../meta_reference}/quantization/loader.py | 9 +-- .../quantization/scripts/__init__.py | 0 .../quantization/scripts/build_conda.sh | 0 .../scripts/quantize_checkpoint.py | 0 .../scripts/run_quantize_checkpoint.sh | 0 .../inline/{ => inference}/vllm/__init__.py | 0 .../inline/{ => inference}/vllm/config.py | 2 +- .../inline/{ => inference}/vllm/vllm.py | 0 .../memory => memory/faiss}/__init__.py | 0 .../memory => memory/faiss}/config.py | 2 +- .../memory => memory/faiss}/faiss.py | 3 +- .../meta_reference/memory/tests/test_faiss.py | 73 ------------------- .../meta_reference}/__init__.py | 0 .../safety => safety/meta_reference}/base.py | 0 .../meta_reference}/config.py | 0 .../meta_reference}/llama_guard.py | 0 .../meta_reference}/prompt_guard.py | 0 .../meta_reference}/safety.py | 0 llama_stack/providers/registry/agents.py | 4 +- llama_stack/providers/registry/inference.py | 26 +++---- llama_stack/providers/registry/memory.py | 12 ++- llama_stack/providers/registry/safety.py | 8 +- .../providers/tests/agents/fixtures.py | 2 +- .../providers/tests/inference/fixtures.py | 2 +- .../providers/tests/memory/fixtures.py | 2 +- .../providers/tests/safety/fixtures.py | 2 +- 58 files changed, 61 insertions(+), 120 deletions(-) rename llama_stack/providers/inline/{meta_reference/agents => agents/meta_reference}/__init__.py (100%) rename llama_stack/providers/inline/{meta_reference/agents => agents/meta_reference}/agent_instance.py (100%) rename llama_stack/providers/inline/{meta_reference/agents => agents/meta_reference}/agents.py (100%) rename llama_stack/providers/inline/{meta_reference/agents => agents/meta_reference}/config.py (99%) rename llama_stack/providers/inline/{meta_reference/agents => agents/meta_reference}/persistence.py (99%) rename llama_stack/providers/inline/{meta_reference/agents => agents/meta_reference}/rag/__init__.py (100%) rename llama_stack/providers/inline/{meta_reference/agents => agents/meta_reference}/rag/context_retriever.py (99%) rename llama_stack/providers/inline/{meta_reference/agents => agents/meta_reference}/safety.py (100%) rename llama_stack/providers/inline/{meta_reference/agents => agents/meta_reference}/tests/__init__.py (100%) rename llama_stack/providers/inline/{meta_reference/agents => agents/meta_reference}/tests/code_execution.py (100%) rename llama_stack/providers/inline/{meta_reference/agents => agents/meta_reference}/tests/test_chat_agent.py (100%) rename llama_stack/providers/inline/{meta_reference/agents => agents/meta_reference}/tools/__init__.py (100%) rename llama_stack/providers/inline/{meta_reference/agents => agents/meta_reference}/tools/base.py (100%) rename llama_stack/providers/inline/{meta_reference/agents => agents/meta_reference}/tools/builtin.py (100%) rename llama_stack/providers/inline/{meta_reference/agents => agents/meta_reference}/tools/ipython_tool/__init__.py (100%) rename llama_stack/providers/inline/{meta_reference/agents => agents/meta_reference}/tools/ipython_tool/code_env_prefix.py (100%) rename llama_stack/providers/inline/{meta_reference/agents => agents/meta_reference}/tools/ipython_tool/code_execution.py (100%) rename llama_stack/providers/inline/{meta_reference/agents => agents/meta_reference}/tools/ipython_tool/matplotlib_custom_backend.py (100%) rename llama_stack/providers/inline/{meta_reference/agents => agents/meta_reference}/tools/ipython_tool/utils.py (100%) rename llama_stack/providers/inline/{meta_reference/agents => agents/meta_reference}/tools/safety.py (93%) rename llama_stack/providers/inline/{meta_reference/inference => inference/meta_reference}/__init__.py (100%) rename llama_stack/providers/inline/{meta_reference/inference => inference/meta_reference}/config.py (99%) rename llama_stack/providers/inline/{meta_reference/inference => inference/meta_reference}/generation.py (99%) rename llama_stack/providers/inline/{meta_reference/inference => inference/meta_reference}/inference.py (100%) rename llama_stack/providers/inline/{meta_reference/inference => inference/meta_reference}/model_parallel.py (100%) rename llama_stack/providers/inline/{meta_reference/inference => inference/meta_reference}/parallel_utils.py (100%) rename llama_stack/providers/inline/{meta_reference/inference => inference/meta_reference}/quantization/__init__.py (100%) rename llama_stack/providers/inline/{meta_reference/inference => inference/meta_reference}/quantization/fp8_impls.py (100%) rename llama_stack/providers/inline/{meta_reference/inference => inference/meta_reference}/quantization/fp8_txest_disabled.py (100%) rename llama_stack/providers/inline/{meta_reference/inference => inference/meta_reference}/quantization/hadamard_utils.py (100%) rename llama_stack/providers/inline/{meta_reference/inference => inference/meta_reference}/quantization/loader.py (99%) rename llama_stack/providers/inline/{meta_reference/inference => inference/meta_reference}/quantization/scripts/__init__.py (100%) rename llama_stack/providers/inline/{meta_reference/inference => inference/meta_reference}/quantization/scripts/build_conda.sh (100%) rename llama_stack/providers/inline/{meta_reference/inference => inference/meta_reference}/quantization/scripts/quantize_checkpoint.py (100%) rename llama_stack/providers/inline/{meta_reference/inference => inference/meta_reference}/quantization/scripts/run_quantize_checkpoint.sh (100%) rename llama_stack/providers/inline/{ => inference}/vllm/__init__.py (100%) rename llama_stack/providers/inline/{ => inference}/vllm/config.py (100%) rename llama_stack/providers/inline/{ => inference}/vllm/vllm.py (100%) rename llama_stack/providers/inline/{meta_reference/memory => memory/faiss}/__init__.py (100%) rename llama_stack/providers/inline/{meta_reference/memory => memory/faiss}/config.py (100%) rename llama_stack/providers/inline/{meta_reference/memory => memory/faiss}/faiss.py (99%) delete mode 100644 llama_stack/providers/inline/meta_reference/memory/tests/test_faiss.py rename llama_stack/providers/inline/{meta_reference/safety => safety/meta_reference}/__init__.py (100%) rename llama_stack/providers/inline/{meta_reference/safety => safety/meta_reference}/base.py (100%) rename llama_stack/providers/inline/{meta_reference/safety => safety/meta_reference}/config.py (100%) rename llama_stack/providers/inline/{meta_reference/safety => safety/meta_reference}/llama_guard.py (100%) rename llama_stack/providers/inline/{meta_reference/safety => safety/meta_reference}/prompt_guard.py (100%) rename llama_stack/providers/inline/{meta_reference/safety => safety/meta_reference}/safety.py (100%) diff --git a/llama_stack/distribution/resolver.py b/llama_stack/distribution/resolver.py index 96b4b81e6..9b8e41561 100644 --- a/llama_stack/distribution/resolver.py +++ b/llama_stack/distribution/resolver.py @@ -8,6 +8,8 @@ import inspect from typing import Any, Dict, List, Set +from termcolor import cprint + from llama_stack.providers.datatypes import * # noqa: F403 from llama_stack.distribution.datatypes import * # noqa: F403 @@ -97,6 +99,12 @@ async def resolve_impls( ) p = provider_registry[api][provider.provider_type] + if p.deprecation_warning: + cprint( + f"Provider `{provider.provider_type}` for API `{api}` is deprecated and will be removed in a future release: {p.deprecation_warning}", + "red", + attrs=["bold"], + ) p.deps__ = [a.value for a in p.api_dependencies] spec = ProviderWithSpec( spec=p, diff --git a/llama_stack/providers/datatypes.py b/llama_stack/providers/datatypes.py index 919507d11..59c5a38fa 100644 --- a/llama_stack/providers/datatypes.py +++ b/llama_stack/providers/datatypes.py @@ -82,6 +82,10 @@ class ProviderSpec(BaseModel): default_factory=list, description="Higher-level API surfaces may depend on other providers to provide their functionality", ) + deprecation_warning: Optional[str] = Field( + default=None, + description="If this provider is deprecated, specify the warning message here", + ) # used internally by the resolver; this is a hack for now deps__: List[str] = Field(default_factory=list) diff --git a/llama_stack/providers/inline/meta_reference/agents/__init__.py b/llama_stack/providers/inline/agents/meta_reference/__init__.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/agents/__init__.py rename to llama_stack/providers/inline/agents/meta_reference/__init__.py diff --git a/llama_stack/providers/inline/meta_reference/agents/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/agents/agent_instance.py rename to llama_stack/providers/inline/agents/meta_reference/agent_instance.py diff --git a/llama_stack/providers/inline/meta_reference/agents/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/agents/agents.py rename to llama_stack/providers/inline/agents/meta_reference/agents.py diff --git a/llama_stack/providers/inline/meta_reference/agents/config.py b/llama_stack/providers/inline/agents/meta_reference/config.py similarity index 99% rename from llama_stack/providers/inline/meta_reference/agents/config.py rename to llama_stack/providers/inline/agents/meta_reference/config.py index 2770ed13c..8ade558c3 100644 --- a/llama_stack/providers/inline/meta_reference/agents/config.py +++ b/llama_stack/providers/inline/agents/meta_reference/config.py @@ -4,10 +4,9 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from pydantic import BaseModel, Field - from llama_stack.providers.utils.kvstore import KVStoreConfig from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig +from pydantic import BaseModel, Field class MetaReferenceAgentsImplConfig(BaseModel): diff --git a/llama_stack/providers/inline/meta_reference/agents/persistence.py b/llama_stack/providers/inline/agents/meta_reference/persistence.py similarity index 99% rename from llama_stack/providers/inline/meta_reference/agents/persistence.py rename to llama_stack/providers/inline/agents/meta_reference/persistence.py index 37ac75d6a..36ae9b367 100644 --- a/llama_stack/providers/inline/meta_reference/agents/persistence.py +++ b/llama_stack/providers/inline/agents/meta_reference/persistence.py @@ -11,9 +11,8 @@ from datetime import datetime from typing import List, Optional from llama_stack.apis.agents import * # noqa: F403 -from pydantic import BaseModel - from llama_stack.providers.utils.kvstore import KVStore +from pydantic import BaseModel class AgentSessionInfo(BaseModel): diff --git a/llama_stack/providers/inline/meta_reference/agents/rag/__init__.py b/llama_stack/providers/inline/agents/meta_reference/rag/__init__.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/agents/rag/__init__.py rename to llama_stack/providers/inline/agents/meta_reference/rag/__init__.py diff --git a/llama_stack/providers/inline/meta_reference/agents/rag/context_retriever.py b/llama_stack/providers/inline/agents/meta_reference/rag/context_retriever.py similarity index 99% rename from llama_stack/providers/inline/meta_reference/agents/rag/context_retriever.py rename to llama_stack/providers/inline/agents/meta_reference/rag/context_retriever.py index b668dc0d6..3b303f5bd 100644 --- a/llama_stack/providers/inline/meta_reference/agents/rag/context_retriever.py +++ b/llama_stack/providers/inline/agents/meta_reference/rag/context_retriever.py @@ -10,14 +10,13 @@ from jinja2 import Template from llama_models.llama3.api import * # noqa: F403 -from termcolor import cprint # noqa: F401 - from llama_stack.apis.agents import ( DefaultMemoryQueryGeneratorConfig, LLMMemoryQueryGeneratorConfig, MemoryQueryGenerator, MemoryQueryGeneratorConfig, ) +from termcolor import cprint # noqa: F401 from llama_stack.apis.inference import * # noqa: F403 diff --git a/llama_stack/providers/inline/meta_reference/agents/safety.py b/llama_stack/providers/inline/agents/meta_reference/safety.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/agents/safety.py rename to llama_stack/providers/inline/agents/meta_reference/safety.py diff --git a/llama_stack/providers/inline/meta_reference/agents/tests/__init__.py b/llama_stack/providers/inline/agents/meta_reference/tests/__init__.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/agents/tests/__init__.py rename to llama_stack/providers/inline/agents/meta_reference/tests/__init__.py diff --git a/llama_stack/providers/inline/meta_reference/agents/tests/code_execution.py b/llama_stack/providers/inline/agents/meta_reference/tests/code_execution.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/agents/tests/code_execution.py rename to llama_stack/providers/inline/agents/meta_reference/tests/code_execution.py diff --git a/llama_stack/providers/inline/meta_reference/agents/tests/test_chat_agent.py b/llama_stack/providers/inline/agents/meta_reference/tests/test_chat_agent.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/agents/tests/test_chat_agent.py rename to llama_stack/providers/inline/agents/meta_reference/tests/test_chat_agent.py diff --git a/llama_stack/providers/inline/meta_reference/agents/tools/__init__.py b/llama_stack/providers/inline/agents/meta_reference/tools/__init__.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/agents/tools/__init__.py rename to llama_stack/providers/inline/agents/meta_reference/tools/__init__.py diff --git a/llama_stack/providers/inline/meta_reference/agents/tools/base.py b/llama_stack/providers/inline/agents/meta_reference/tools/base.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/agents/tools/base.py rename to llama_stack/providers/inline/agents/meta_reference/tools/base.py diff --git a/llama_stack/providers/inline/meta_reference/agents/tools/builtin.py b/llama_stack/providers/inline/agents/meta_reference/tools/builtin.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/agents/tools/builtin.py rename to llama_stack/providers/inline/agents/meta_reference/tools/builtin.py diff --git a/llama_stack/providers/inline/meta_reference/agents/tools/ipython_tool/__init__.py b/llama_stack/providers/inline/agents/meta_reference/tools/ipython_tool/__init__.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/agents/tools/ipython_tool/__init__.py rename to llama_stack/providers/inline/agents/meta_reference/tools/ipython_tool/__init__.py diff --git a/llama_stack/providers/inline/meta_reference/agents/tools/ipython_tool/code_env_prefix.py b/llama_stack/providers/inline/agents/meta_reference/tools/ipython_tool/code_env_prefix.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/agents/tools/ipython_tool/code_env_prefix.py rename to llama_stack/providers/inline/agents/meta_reference/tools/ipython_tool/code_env_prefix.py diff --git a/llama_stack/providers/inline/meta_reference/agents/tools/ipython_tool/code_execution.py b/llama_stack/providers/inline/agents/meta_reference/tools/ipython_tool/code_execution.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/agents/tools/ipython_tool/code_execution.py rename to llama_stack/providers/inline/agents/meta_reference/tools/ipython_tool/code_execution.py diff --git a/llama_stack/providers/inline/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py b/llama_stack/providers/inline/agents/meta_reference/tools/ipython_tool/matplotlib_custom_backend.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py rename to llama_stack/providers/inline/agents/meta_reference/tools/ipython_tool/matplotlib_custom_backend.py diff --git a/llama_stack/providers/inline/meta_reference/agents/tools/ipython_tool/utils.py b/llama_stack/providers/inline/agents/meta_reference/tools/ipython_tool/utils.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/agents/tools/ipython_tool/utils.py rename to llama_stack/providers/inline/agents/meta_reference/tools/ipython_tool/utils.py diff --git a/llama_stack/providers/inline/meta_reference/agents/tools/safety.py b/llama_stack/providers/inline/agents/meta_reference/tools/safety.py similarity index 93% rename from llama_stack/providers/inline/meta_reference/agents/tools/safety.py rename to llama_stack/providers/inline/agents/meta_reference/tools/safety.py index 72530f0e6..1ffc99edd 100644 --- a/llama_stack/providers/inline/meta_reference/agents/tools/safety.py +++ b/llama_stack/providers/inline/agents/meta_reference/tools/safety.py @@ -9,8 +9,7 @@ from typing import List from llama_stack.apis.inference import Message from llama_stack.apis.safety import * # noqa: F403 -from llama_stack.providers.inline.meta_reference.agents.safety import ShieldRunnerMixin - +from ..safety import ShieldRunnerMixin from .builtin import BaseTool diff --git a/llama_stack/providers/inline/meta_reference/inference/__init__.py b/llama_stack/providers/inline/inference/meta_reference/__init__.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/inference/__init__.py rename to llama_stack/providers/inline/inference/meta_reference/__init__.py diff --git a/llama_stack/providers/inline/meta_reference/inference/config.py b/llama_stack/providers/inline/inference/meta_reference/config.py similarity index 99% rename from llama_stack/providers/inline/meta_reference/inference/config.py rename to llama_stack/providers/inline/inference/meta_reference/config.py index 48cba645b..6ecba22b0 100644 --- a/llama_stack/providers/inline/meta_reference/inference/config.py +++ b/llama_stack/providers/inline/inference/meta_reference/config.py @@ -10,9 +10,8 @@ from llama_models.datatypes import * # noqa: F403 from llama_models.sku_list import resolve_model from llama_stack.apis.inference import * # noqa: F401, F403 -from pydantic import BaseModel, Field, field_validator - from llama_stack.providers.utils.inference import supported_inference_models +from pydantic import BaseModel, Field, field_validator class MetaReferenceInferenceConfig(BaseModel): diff --git a/llama_stack/providers/inline/meta_reference/inference/generation.py b/llama_stack/providers/inline/inference/meta_reference/generation.py similarity index 99% rename from llama_stack/providers/inline/meta_reference/inference/generation.py rename to llama_stack/providers/inline/inference/meta_reference/generation.py index 2f296c7c2..8d6a14fc9 100644 --- a/llama_stack/providers/inline/meta_reference/inference/generation.py +++ b/llama_stack/providers/inline/inference/meta_reference/generation.py @@ -35,13 +35,12 @@ from termcolor import cprint from llama_stack.apis.inference import * # noqa: F403 -from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData - from llama_stack.distribution.utils.model_utils import model_local_dir from llama_stack.providers.utils.inference.prompt_adapter import ( augment_content_with_response_format_prompt, chat_completion_request_to_messages, ) +from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData from .config import ( Fp8QuantizationConfig, diff --git a/llama_stack/providers/inline/meta_reference/inference/inference.py b/llama_stack/providers/inline/inference/meta_reference/inference.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/inference/inference.py rename to llama_stack/providers/inline/inference/meta_reference/inference.py diff --git a/llama_stack/providers/inline/meta_reference/inference/model_parallel.py b/llama_stack/providers/inline/inference/meta_reference/model_parallel.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/inference/model_parallel.py rename to llama_stack/providers/inline/inference/meta_reference/model_parallel.py diff --git a/llama_stack/providers/inline/meta_reference/inference/parallel_utils.py b/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/inference/parallel_utils.py rename to llama_stack/providers/inline/inference/meta_reference/parallel_utils.py index 62eeefaac..470b6b1ca 100644 --- a/llama_stack/providers/inline/meta_reference/inference/parallel_utils.py +++ b/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py @@ -28,13 +28,13 @@ from fairscale.nn.model_parallel.initialize import ( get_model_parallel_src_rank, ) +from llama_stack.apis.inference import ChatCompletionRequest, CompletionRequest + from pydantic import BaseModel, Field from torch.distributed.launcher.api import elastic_launch, LaunchConfig from typing_extensions import Annotated -from llama_stack.apis.inference import ChatCompletionRequest, CompletionRequest - from .generation import TokenResult diff --git a/llama_stack/providers/inline/meta_reference/inference/quantization/__init__.py b/llama_stack/providers/inline/inference/meta_reference/quantization/__init__.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/inference/quantization/__init__.py rename to llama_stack/providers/inline/inference/meta_reference/quantization/__init__.py diff --git a/llama_stack/providers/inline/meta_reference/inference/quantization/fp8_impls.py b/llama_stack/providers/inline/inference/meta_reference/quantization/fp8_impls.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/inference/quantization/fp8_impls.py rename to llama_stack/providers/inline/inference/meta_reference/quantization/fp8_impls.py diff --git a/llama_stack/providers/inline/meta_reference/inference/quantization/fp8_txest_disabled.py b/llama_stack/providers/inline/inference/meta_reference/quantization/fp8_txest_disabled.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/inference/quantization/fp8_txest_disabled.py rename to llama_stack/providers/inline/inference/meta_reference/quantization/fp8_txest_disabled.py diff --git a/llama_stack/providers/inline/meta_reference/inference/quantization/hadamard_utils.py b/llama_stack/providers/inline/inference/meta_reference/quantization/hadamard_utils.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/inference/quantization/hadamard_utils.py rename to llama_stack/providers/inline/inference/meta_reference/quantization/hadamard_utils.py diff --git a/llama_stack/providers/inline/meta_reference/inference/quantization/loader.py b/llama_stack/providers/inline/inference/meta_reference/quantization/loader.py similarity index 99% rename from llama_stack/providers/inline/meta_reference/inference/quantization/loader.py rename to llama_stack/providers/inline/inference/meta_reference/quantization/loader.py index 3492ab043..286224931 100644 --- a/llama_stack/providers/inline/meta_reference/inference/quantization/loader.py +++ b/llama_stack/providers/inline/inference/meta_reference/quantization/loader.py @@ -20,16 +20,15 @@ from llama_models.datatypes import CheckpointQuantizationFormat from llama_models.llama3.api.args import ModelArgs from llama_models.llama3.reference_impl.model import Transformer, TransformerBlock from llama_models.sku_list import resolve_model + +from llama_stack.apis.inference import QuantizationType + from termcolor import cprint from torch import nn, Tensor from torchao.quantization.GPTQ import Int8DynActInt4WeightLinear -from llama_stack.apis.inference import QuantizationType - -from llama_stack.providers.inline.meta_reference.inference.config import ( - MetaReferenceQuantizedInferenceConfig, -) +from ..config import MetaReferenceQuantizedInferenceConfig def swiglu_wrapper( diff --git a/llama_stack/providers/inline/meta_reference/inference/quantization/scripts/__init__.py b/llama_stack/providers/inline/inference/meta_reference/quantization/scripts/__init__.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/inference/quantization/scripts/__init__.py rename to llama_stack/providers/inline/inference/meta_reference/quantization/scripts/__init__.py diff --git a/llama_stack/providers/inline/meta_reference/inference/quantization/scripts/build_conda.sh b/llama_stack/providers/inline/inference/meta_reference/quantization/scripts/build_conda.sh similarity index 100% rename from llama_stack/providers/inline/meta_reference/inference/quantization/scripts/build_conda.sh rename to llama_stack/providers/inline/inference/meta_reference/quantization/scripts/build_conda.sh diff --git a/llama_stack/providers/inline/meta_reference/inference/quantization/scripts/quantize_checkpoint.py b/llama_stack/providers/inline/inference/meta_reference/quantization/scripts/quantize_checkpoint.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/inference/quantization/scripts/quantize_checkpoint.py rename to llama_stack/providers/inline/inference/meta_reference/quantization/scripts/quantize_checkpoint.py diff --git a/llama_stack/providers/inline/meta_reference/inference/quantization/scripts/run_quantize_checkpoint.sh b/llama_stack/providers/inline/inference/meta_reference/quantization/scripts/run_quantize_checkpoint.sh similarity index 100% rename from llama_stack/providers/inline/meta_reference/inference/quantization/scripts/run_quantize_checkpoint.sh rename to llama_stack/providers/inline/inference/meta_reference/quantization/scripts/run_quantize_checkpoint.sh diff --git a/llama_stack/providers/inline/vllm/__init__.py b/llama_stack/providers/inline/inference/vllm/__init__.py similarity index 100% rename from llama_stack/providers/inline/vllm/__init__.py rename to llama_stack/providers/inline/inference/vllm/__init__.py diff --git a/llama_stack/providers/inline/vllm/config.py b/llama_stack/providers/inline/inference/vllm/config.py similarity index 100% rename from llama_stack/providers/inline/vllm/config.py rename to llama_stack/providers/inline/inference/vllm/config.py index a7469ebde..22b439f77 100644 --- a/llama_stack/providers/inline/vllm/config.py +++ b/llama_stack/providers/inline/inference/vllm/config.py @@ -5,9 +5,9 @@ # the root directory of this source tree. from llama_models.schema_utils import json_schema_type -from pydantic import BaseModel, Field, field_validator from llama_stack.providers.utils.inference import supported_inference_models +from pydantic import BaseModel, Field, field_validator @json_schema_type diff --git a/llama_stack/providers/inline/vllm/vllm.py b/llama_stack/providers/inline/inference/vllm/vllm.py similarity index 100% rename from llama_stack/providers/inline/vllm/vllm.py rename to llama_stack/providers/inline/inference/vllm/vllm.py diff --git a/llama_stack/providers/inline/meta_reference/memory/__init__.py b/llama_stack/providers/inline/memory/faiss/__init__.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/memory/__init__.py rename to llama_stack/providers/inline/memory/faiss/__init__.py diff --git a/llama_stack/providers/inline/meta_reference/memory/config.py b/llama_stack/providers/inline/memory/faiss/config.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/memory/config.py rename to llama_stack/providers/inline/memory/faiss/config.py index 41970b05f..fd26272ae 100644 --- a/llama_stack/providers/inline/meta_reference/memory/config.py +++ b/llama_stack/providers/inline/memory/faiss/config.py @@ -5,13 +5,13 @@ # the root directory of this source tree. from llama_models.schema_utils import json_schema_type -from pydantic import BaseModel from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR from llama_stack.providers.utils.kvstore.config import ( KVStoreConfig, SqliteKVStoreConfig, ) +from pydantic import BaseModel @json_schema_type diff --git a/llama_stack/providers/inline/meta_reference/memory/faiss.py b/llama_stack/providers/inline/memory/faiss/faiss.py similarity index 99% rename from llama_stack/providers/inline/meta_reference/memory/faiss.py rename to llama_stack/providers/inline/memory/faiss/faiss.py index 4bd5fd5a7..5726d6f87 100644 --- a/llama_stack/providers/inline/meta_reference/memory/faiss.py +++ b/llama_stack/providers/inline/memory/faiss/faiss.py @@ -8,10 +8,11 @@ import logging from typing import Any, Dict, List, Optional -import faiss import numpy as np from numpy.typing import NDArray +import faiss + from llama_models.llama3.api.datatypes import * # noqa: F403 from llama_stack.apis.memory import * # noqa: F403 diff --git a/llama_stack/providers/inline/meta_reference/memory/tests/test_faiss.py b/llama_stack/providers/inline/meta_reference/memory/tests/test_faiss.py deleted file mode 100644 index 7b944319f..000000000 --- a/llama_stack/providers/inline/meta_reference/memory/tests/test_faiss.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import tempfile - -import pytest -from llama_stack.apis.memory import MemoryBankType, VectorMemoryBankDef -from llama_stack.providers.inline.meta_reference.memory.config import FaissImplConfig - -from llama_stack.providers.inline.meta_reference.memory.faiss import FaissMemoryImpl -from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig - - -class TestFaissMemoryImpl: - @pytest.fixture - def faiss_impl(self): - # Create a temporary SQLite database file - temp_db = tempfile.NamedTemporaryFile(suffix=".db", delete=False) - config = FaissImplConfig(kvstore=SqliteKVStoreConfig(db_path=temp_db.name)) - return FaissMemoryImpl(config) - - @pytest.mark.asyncio - async def test_initialize(self, faiss_impl): - # Test empty initialization - await faiss_impl.initialize() - assert len(faiss_impl.cache) == 0 - - # Test initialization with existing banks - bank = VectorMemoryBankDef( - identifier="test_bank", - type=MemoryBankType.vector.value, - embedding_model="all-MiniLM-L6-v2", - chunk_size_in_tokens=512, - overlap_size_in_tokens=64, - ) - - # Register a bank and reinitialize to test loading - await faiss_impl.register_memory_bank(bank) - - # Create new instance to test initialization with existing data - new_impl = FaissMemoryImpl(faiss_impl.config) - await new_impl.initialize() - - assert len(new_impl.cache) == 1 - assert "test_bank" in new_impl.cache - - @pytest.mark.asyncio - async def test_register_memory_bank(self, faiss_impl): - bank = VectorMemoryBankDef( - identifier="test_bank", - type=MemoryBankType.vector.value, - embedding_model="all-MiniLM-L6-v2", - chunk_size_in_tokens=512, - overlap_size_in_tokens=64, - ) - - await faiss_impl.initialize() - await faiss_impl.register_memory_bank(bank) - - assert "test_bank" in faiss_impl.cache - assert faiss_impl.cache["test_bank"].bank == bank - - # Verify persistence - new_impl = FaissMemoryImpl(faiss_impl.config) - await new_impl.initialize() - assert "test_bank" in new_impl.cache - - -if __name__ == "__main__": - pytest.main([__file__]) diff --git a/llama_stack/providers/inline/meta_reference/safety/__init__.py b/llama_stack/providers/inline/safety/meta_reference/__init__.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/safety/__init__.py rename to llama_stack/providers/inline/safety/meta_reference/__init__.py diff --git a/llama_stack/providers/inline/meta_reference/safety/base.py b/llama_stack/providers/inline/safety/meta_reference/base.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/safety/base.py rename to llama_stack/providers/inline/safety/meta_reference/base.py diff --git a/llama_stack/providers/inline/meta_reference/safety/config.py b/llama_stack/providers/inline/safety/meta_reference/config.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/safety/config.py rename to llama_stack/providers/inline/safety/meta_reference/config.py diff --git a/llama_stack/providers/inline/meta_reference/safety/llama_guard.py b/llama_stack/providers/inline/safety/meta_reference/llama_guard.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/safety/llama_guard.py rename to llama_stack/providers/inline/safety/meta_reference/llama_guard.py diff --git a/llama_stack/providers/inline/meta_reference/safety/prompt_guard.py b/llama_stack/providers/inline/safety/meta_reference/prompt_guard.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/safety/prompt_guard.py rename to llama_stack/providers/inline/safety/meta_reference/prompt_guard.py diff --git a/llama_stack/providers/inline/meta_reference/safety/safety.py b/llama_stack/providers/inline/safety/meta_reference/safety.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/safety/safety.py rename to llama_stack/providers/inline/safety/meta_reference/safety.py diff --git a/llama_stack/providers/registry/agents.py b/llama_stack/providers/registry/agents.py index 774dde858..989b9f077 100644 --- a/llama_stack/providers/registry/agents.py +++ b/llama_stack/providers/registry/agents.py @@ -22,8 +22,8 @@ def available_providers() -> List[ProviderSpec]: "scikit-learn", ] + kvstore_dependencies(), - module="llama_stack.providers.inline.meta_reference.agents", - config_class="llama_stack.providers.inline.meta_reference.agents.MetaReferenceAgentsImplConfig", + module="llama_stack.providers.inline.agents.meta_reference", + config_class="llama_stack.providers.inline.agents.meta_reference.MetaReferenceAgentsImplConfig", api_dependencies=[ Api.inference, Api.safety, diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py index 18fe8274e..dc6fa9592 100644 --- a/llama_stack/providers/registry/inference.py +++ b/llama_stack/providers/registry/inference.py @@ -27,8 +27,8 @@ def available_providers() -> List[ProviderSpec]: api=Api.inference, provider_type="meta-reference", pip_packages=META_REFERENCE_DEPS, - module="llama_stack.providers.inline.meta_reference.inference", - config_class="llama_stack.providers.inline.meta_reference.inference.MetaReferenceInferenceConfig", + module="llama_stack.providers.inline.inference.meta_reference", + config_class="llama_stack.providers.inline.inference.meta_reference.MetaReferenceInferenceConfig", ), InlineProviderSpec( api=Api.inference, @@ -40,8 +40,17 @@ def available_providers() -> List[ProviderSpec]: "torchao==0.5.0", ] ), - module="llama_stack.providers.inline.meta_reference.inference", - config_class="llama_stack.providers.inline.meta_reference.inference.MetaReferenceQuantizedInferenceConfig", + module="llama_stack.providers.inline.inference.meta_reference", + config_class="llama_stack.providers.inline.inference.meta_reference.MetaReferenceQuantizedInferenceConfig", + ), + InlineProviderSpec( + api=Api.inference, + provider_type="vllm", + pip_packages=[ + "vllm", + ], + module="llama_stack.providers.inline.inference.vllm", + config_class="llama_stack.providers.inline.inference.vllm.VLLMConfig", ), remote_provider_spec( api=Api.inference, @@ -140,13 +149,4 @@ def available_providers() -> List[ProviderSpec]: config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig", ), ), - InlineProviderSpec( - api=Api.inference, - provider_type="vllm", - pip_packages=[ - "vllm", - ], - module="llama_stack.providers.inline.vllm", - config_class="llama_stack.providers.inline.vllm.VLLMConfig", - ), ] diff --git a/llama_stack/providers/registry/memory.py b/llama_stack/providers/registry/memory.py index c2740017a..93ecb7c13 100644 --- a/llama_stack/providers/registry/memory.py +++ b/llama_stack/providers/registry/memory.py @@ -36,8 +36,16 @@ def available_providers() -> List[ProviderSpec]: api=Api.memory, provider_type="meta-reference", pip_packages=EMBEDDING_DEPS + ["faiss-cpu"], - module="llama_stack.providers.inline.meta_reference.memory", - config_class="llama_stack.providers.inline.meta_reference.memory.FaissImplConfig", + module="llama_stack.providers.inline.memory.faiss", + config_class="llama_stack.providers.inline.memory.faiss.FaissImplConfig", + deprecation_warning="Please use the `faiss` provider instead.", + ), + InlineProviderSpec( + api=Api.memory, + provider_type="faiss", + pip_packages=EMBEDDING_DEPS + ["faiss-cpu"], + module="llama_stack.providers.inline.memory.faiss", + config_class="llama_stack.providers.inline.memory.faiss.FaissImplConfig", ), remote_provider_spec( Api.memory, diff --git a/llama_stack/providers/registry/safety.py b/llama_stack/providers/registry/safety.py index fdaa33192..fb5b6695a 100644 --- a/llama_stack/providers/registry/safety.py +++ b/llama_stack/providers/registry/safety.py @@ -24,8 +24,8 @@ def available_providers() -> List[ProviderSpec]: "transformers", "torch --index-url https://download.pytorch.org/whl/cpu", ], - module="llama_stack.providers.inline.meta_reference.safety", - config_class="llama_stack.providers.inline.meta_reference.safety.SafetyConfig", + module="llama_stack.providers.inline.safety.meta_reference", + config_class="llama_stack.providers.inline.safety.meta_reference.SafetyConfig", api_dependencies=[ Api.inference, ], @@ -54,8 +54,8 @@ def available_providers() -> List[ProviderSpec]: pip_packages=[ "codeshield", ], - module="llama_stack.providers.inline.meta_reference.codeshield", - config_class="llama_stack.providers.inline.meta_reference.codeshield.CodeShieldConfig", + module="llama_stack.providers.inline.safety.meta_reference", + config_class="llama_stack.providers.inline.safety.meta_reference.CodeShieldConfig", api_dependencies=[], ), ] diff --git a/llama_stack/providers/tests/agents/fixtures.py b/llama_stack/providers/tests/agents/fixtures.py index 86ecae1e9..8330e2604 100644 --- a/llama_stack/providers/tests/agents/fixtures.py +++ b/llama_stack/providers/tests/agents/fixtures.py @@ -11,7 +11,7 @@ import pytest_asyncio from llama_stack.distribution.datatypes import Api, Provider -from llama_stack.providers.inline.meta_reference.agents import ( +from llama_stack.providers.inline.agents.meta_reference import ( MetaReferenceAgentsImplConfig, ) diff --git a/llama_stack/providers/tests/inference/fixtures.py b/llama_stack/providers/tests/inference/fixtures.py index 9db70888e..5b047549b 100644 --- a/llama_stack/providers/tests/inference/fixtures.py +++ b/llama_stack/providers/tests/inference/fixtures.py @@ -10,7 +10,7 @@ import pytest import pytest_asyncio from llama_stack.distribution.datatypes import Api, Provider -from llama_stack.providers.inline.meta_reference.inference import ( +from llama_stack.providers.inline.inference.meta_reference import ( MetaReferenceInferenceConfig, ) diff --git a/llama_stack/providers/tests/memory/fixtures.py b/llama_stack/providers/tests/memory/fixtures.py index b30e0fae4..c0931b009 100644 --- a/llama_stack/providers/tests/memory/fixtures.py +++ b/llama_stack/providers/tests/memory/fixtures.py @@ -11,7 +11,7 @@ import pytest import pytest_asyncio from llama_stack.distribution.datatypes import Api, Provider -from llama_stack.providers.inline.meta_reference.memory import FaissImplConfig +from llama_stack.providers.inline.memory.faiss import FaissImplConfig from llama_stack.providers.remote.memory.pgvector import PGVectorConfig from llama_stack.providers.remote.memory.weaviate import WeaviateConfig diff --git a/llama_stack/providers/tests/safety/fixtures.py b/llama_stack/providers/tests/safety/fixtures.py index de1829355..58859c991 100644 --- a/llama_stack/providers/tests/safety/fixtures.py +++ b/llama_stack/providers/tests/safety/fixtures.py @@ -8,7 +8,7 @@ import pytest import pytest_asyncio from llama_stack.distribution.datatypes import Api, Provider -from llama_stack.providers.inline.meta_reference.safety import ( +from llama_stack.providers.inline.safety.meta_reference import ( LlamaGuardShieldConfig, SafetyConfig, )