From 30f6eb282f14abe66f32df39fcd37275dee60cf2 Mon Sep 17 00:00:00 2001 From: Botao Chen Date: Mon, 16 Dec 2024 19:04:47 -0800 Subject: [PATCH] temp commit --- llama_stack/distribution/routers/routers.py | 2 -- .../inline/inference/meta_reference/__init__.py | 3 --- .../inference/meta_reference/inference.py | 2 -- .../inference/meta_reference/model_parallel.py | 2 -- .../providers/tests/inference/conftest.py | 3 +++ .../experimental-post-training/run.yaml | 17 ++++++++++++----- .../meta-reference-quantized-gpu/run.yaml | 2 +- 7 files changed, 16 insertions(+), 15 deletions(-) diff --git a/llama_stack/distribution/routers/routers.py b/llama_stack/distribution/routers/routers.py index 0f487b4ed..16ae35357 100644 --- a/llama_stack/distribution/routers/routers.py +++ b/llama_stack/distribution/routers/routers.py @@ -74,7 +74,6 @@ class InferenceRouter(Inference): self, routing_table: RoutingTable, ) -> None: - print("InferenceRouter init") self.routing_table = routing_table async def initialize(self) -> None: @@ -91,7 +90,6 @@ class InferenceRouter(Inference): metadata: Optional[Dict[str, Any]] = None, model_type: Optional[ModelType] = None, ) -> None: - print("inference router") await self.routing_table.register_model( model_id, provider_model_id, provider_id, metadata, model_type ) diff --git a/llama_stack/providers/inline/inference/meta_reference/__init__.py b/llama_stack/providers/inline/inference/meta_reference/__init__.py index b7e935ebf..18dc61d4a 100644 --- a/llama_stack/providers/inline/inference/meta_reference/__init__.py +++ b/llama_stack/providers/inline/inference/meta_reference/__init__.py @@ -15,9 +15,6 @@ async def get_provider_impl( ): from .inference import MetaReferenceInferenceImpl - print("get_provider_impl") - impl = MetaReferenceInferenceImpl(config) - print("after MetaReferenceInferenceImpl") return impl diff --git a/llama_stack/providers/inline/inference/meta_reference/inference.py b/llama_stack/providers/inline/inference/meta_reference/inference.py index 726e8ac44..d86eba797 100644 --- a/llama_stack/providers/inline/inference/meta_reference/inference.py +++ b/llama_stack/providers/inline/inference/meta_reference/inference.py @@ -94,7 +94,6 @@ class MetaReferenceInferenceImpl( ], ) model = await self.model_registry_helper.register_model(model) - print("model type", type(model)) if model.model_type == ModelType.embedding: self._load_sentence_transformer_model(model.provider_resource_id) @@ -304,7 +303,6 @@ class MetaReferenceInferenceImpl( if self.config.create_distributed_process_group: async with SEMAPHORE: - print("after SEMAPHORE") return impl() else: return impl() diff --git a/llama_stack/providers/inline/inference/meta_reference/model_parallel.py b/llama_stack/providers/inline/inference/meta_reference/model_parallel.py index 3eb11bf5a..f5d5cc567 100644 --- a/llama_stack/providers/inline/inference/meta_reference/model_parallel.py +++ b/llama_stack/providers/inline/inference/meta_reference/model_parallel.py @@ -58,7 +58,6 @@ class LlamaModelParallelGenerator: config: MetaReferenceInferenceConfig, model_id: str, ): - print("LlamaModelParallelGenerator init") self.config = config self.model_id = model_id self.model = resolve_model(model_id) @@ -76,7 +75,6 @@ class LlamaModelParallelGenerator: self.__exit__(None, None, None) def __enter__(self): - print("enter LlamaModelParallelGenerator") if self.config.model_parallel_size: model_parallel_size = self.config.model_parallel_size else: diff --git a/llama_stack/providers/tests/inference/conftest.py b/llama_stack/providers/tests/inference/conftest.py index 54ebcd83a..9f6bf5d67 100644 --- a/llama_stack/providers/tests/inference/conftest.py +++ b/llama_stack/providers/tests/inference/conftest.py @@ -69,6 +69,7 @@ def pytest_generate_tests(metafunc): else: params = MODEL_PARAMS + # print("params", params) metafunc.parametrize( "inference_model", params, @@ -82,5 +83,7 @@ def pytest_generate_tests(metafunc): "inference": INFERENCE_FIXTURES, }, ): + # print("I reach here") fixtures = [stack.values[0]["inference"] for stack in filtered_stacks] + print("fixtures", fixtures) metafunc.parametrize("inference_stack", fixtures, indirect=True) diff --git a/llama_stack/templates/experimental-post-training/run.yaml b/llama_stack/templates/experimental-post-training/run.yaml index 4bdde7aa6..4a1afe85e 100644 --- a/llama_stack/templates/experimental-post-training/run.yaml +++ b/llama_stack/templates/experimental-post-training/run.yaml @@ -3,10 +3,17 @@ image_name: experimental-post-training docker_image: null conda_env: experimental-post-training apis: +- inference - telemetry - datasetio - post_training providers: + inference: + - provider_id: meta-reference-inference + provider_type: inline::meta-reference + config: + max_seq_len: 4096 + checkpoint_dir: null datasetio: - provider_id: huggingface-0 provider_type: remote::huggingface @@ -24,11 +31,11 @@ metadata_store: namespace: null type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db -models: -- metadata: {} - model_id: ${env.POST_TRAINING_MODEL} - provider_id: meta-reference-inference - provider_model_id: null +models: [] +# - metadata: {} +# model_id: ${env.POST_TRAINING_MODEL} +# provider_id: meta-reference-inference +# provider_model_id: null shields: [] memory_banks: [] datasets: diff --git a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml index 550170a00..ea34d3424 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml +++ b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml @@ -16,7 +16,7 @@ providers: - provider_id: meta-reference-inference provider_type: inline::meta-reference-quantized config: - model: ${env.INFERENCE_MODEL} + model: ${env.INFERENCE_MODEL} # please make sure your inference model here is added as resource max_seq_len: 4096 checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null} quantization: