From 7103892f5401a1332e3dbd3e4e17f64cf31e3779 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Tue, 3 Dec 2024 20:49:30 -0800 Subject: [PATCH] all distros --- llama_stack/templates/bedrock/bedrock.py | 17 +++++++++++++++-- llama_stack/templates/bedrock/run.yaml | 6 ------ llama_stack/templates/fireworks/fireworks.py | 11 ++++++++++- llama_stack/templates/fireworks/run.yaml | 6 ------ .../templates/hf-endpoint/hf_endpoint.py | 11 +++++++++-- llama_stack/templates/hf-endpoint/run.yaml | 6 ------ .../templates/hf-serverless/hf_serverless.py | 10 +++++++++- llama_stack/templates/hf-serverless/run.yaml | 6 ------ .../meta-reference-gpu/meta_reference.py | 11 +++++++++-- .../templates/meta-reference-gpu/run.yaml | 6 ------ .../meta_reference.py | 11 +++++++++-- .../meta-reference-quantized-gpu/run.yaml | 6 ------ llama_stack/templates/remote-vllm/run.yaml | 6 ------ llama_stack/templates/remote-vllm/vllm.py | 11 +++++++++-- llama_stack/templates/tgi/run.yaml | 6 ------ llama_stack/templates/tgi/tgi.py | 11 +++++++++-- 16 files changed, 79 insertions(+), 62 deletions(-) diff --git a/llama_stack/templates/bedrock/bedrock.py b/llama_stack/templates/bedrock/bedrock.py index b7f4ae97d..c52b56612 100644 --- a/llama_stack/templates/bedrock/bedrock.py +++ b/llama_stack/templates/bedrock/bedrock.py @@ -6,6 +6,9 @@ from pathlib import Path +from llama_stack.distribution.datatypes import Provider + +from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig from llama_stack.templates.template import DistributionTemplate, RunConfigSettings @@ -20,9 +23,15 @@ def get_distribution_template() -> DistributionTemplate: "datasetio": ["remote::huggingface", "inline::localfs"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], } + name = "bedrock" + memory_provider = Provider( + provider_id="faiss", + provider_type="inline::faiss", + config=FaissImplConfig.sample_run_config(f"distributions/{name}"), + ) return DistributionTemplate( - name="bedrock", + name=name, distro_type="self_hosted", description="Use AWS Bedrock for running LLM inference and safety", docker_image=None, @@ -30,7 +39,11 @@ def get_distribution_template() -> DistributionTemplate: providers=providers, default_models=[], run_configs={ - "run.yaml": RunConfigSettings(), + "run.yaml": RunConfigSettings( + provider_overrides={ + "memory": [memory_provider], + }, + ), }, run_config_env_vars={ "LLAMASTACK_PORT": ( diff --git a/llama_stack/templates/bedrock/run.yaml b/llama_stack/templates/bedrock/run.yaml index 81f699d56..77d4f2248 100644 --- a/llama_stack/templates/bedrock/run.yaml +++ b/llama_stack/templates/bedrock/run.yaml @@ -24,12 +24,6 @@ providers: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/faiss_store.db - - provider_id: chromadb - provider_type: remote::chromadb - config: {} - - provider_id: pgvector - provider_type: remote::pgvector - config: {} safety: - provider_id: bedrock provider_type: remote::bedrock diff --git a/llama_stack/templates/fireworks/fireworks.py b/llama_stack/templates/fireworks/fireworks.py index 830698264..64387e4b7 100644 --- a/llama_stack/templates/fireworks/fireworks.py +++ b/llama_stack/templates/fireworks/fireworks.py @@ -9,6 +9,7 @@ from pathlib import Path from llama_models.sku_list import all_registered_models from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput +from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig from llama_stack.providers.remote.inference.fireworks.fireworks import MODEL_ALIASES @@ -27,11 +28,18 @@ def get_distribution_template() -> DistributionTemplate: "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], } + name = "fireworks" + inference_provider = Provider( provider_id="fireworks", provider_type="remote::fireworks", config=FireworksImplConfig.sample_run_config(), ) + memory_provider = Provider( + provider_id="faiss", + provider_type="inline::faiss", + config=FaissImplConfig.sample_run_config(f"distributions/{name}"), + ) core_model_to_hf_repo = { m.descriptor(): m.huggingface_repo for m in all_registered_models() @@ -45,7 +53,7 @@ def get_distribution_template() -> DistributionTemplate: ] return DistributionTemplate( - name="fireworks", + name=name, distro_type="self_hosted", description="Use Fireworks.AI for running LLM inference", docker_image=None, @@ -56,6 +64,7 @@ def get_distribution_template() -> DistributionTemplate: "run.yaml": RunConfigSettings( provider_overrides={ "inference": [inference_provider], + "memory": [memory_provider], }, default_models=default_models, default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml index 34f93eb54..9296be28f 100644 --- a/llama_stack/templates/fireworks/run.yaml +++ b/llama_stack/templates/fireworks/run.yaml @@ -26,12 +26,6 @@ providers: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/faiss_store.db - - provider_id: chromadb - provider_type: remote::chromadb - config: {} - - provider_id: pgvector - provider_type: remote::pgvector - config: {} safety: - provider_id: llama-guard provider_type: inline::llama-guard diff --git a/llama_stack/templates/hf-endpoint/hf_endpoint.py b/llama_stack/templates/hf-endpoint/hf_endpoint.py index e44d459dc..a645441e2 100644 --- a/llama_stack/templates/hf-endpoint/hf_endpoint.py +++ b/llama_stack/templates/hf-endpoint/hf_endpoint.py @@ -5,6 +5,7 @@ # the root directory of this source tree. from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput +from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig from llama_stack.providers.remote.inference.tgi import InferenceEndpointImplConfig from llama_stack.templates.template import DistributionTemplate, RunConfigSettings @@ -20,12 +21,17 @@ def get_distribution_template() -> DistributionTemplate: "datasetio": ["remote::huggingface", "inline::localfs"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], } - + name = "hf-endpoint" inference_provider = Provider( provider_id="hf-endpoint", provider_type="remote::hf::endpoint", config=InferenceEndpointImplConfig.sample_run_config(), ) + memory_provider = Provider( + provider_id="faiss", + provider_type="inline::faiss", + config=FaissImplConfig.sample_run_config(f"distributions/{name}"), + ) inference_model = ModelInput( model_id="${env.INFERENCE_MODEL}", @@ -37,7 +43,7 @@ def get_distribution_template() -> DistributionTemplate: ) return DistributionTemplate( - name="hf-endpoint", + name=name, distro_type="self_hosted", description="Use (an external) Hugging Face Inference Endpoint for running LLM inference", docker_image=None, @@ -48,6 +54,7 @@ def get_distribution_template() -> DistributionTemplate: "run.yaml": RunConfigSettings( provider_overrides={ "inference": [inference_provider], + "memory": [memory_provider], }, default_models=[inference_model], ), diff --git a/llama_stack/templates/hf-endpoint/run.yaml b/llama_stack/templates/hf-endpoint/run.yaml index 793d6a4a9..bf0697bba 100644 --- a/llama_stack/templates/hf-endpoint/run.yaml +++ b/llama_stack/templates/hf-endpoint/run.yaml @@ -26,12 +26,6 @@ providers: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/faiss_store.db - - provider_id: chromadb - provider_type: remote::chromadb - config: {} - - provider_id: pgvector - provider_type: remote::pgvector - config: {} safety: - provider_id: llama-guard provider_type: inline::llama-guard diff --git a/llama_stack/templates/hf-serverless/hf_serverless.py b/llama_stack/templates/hf-serverless/hf_serverless.py index c74a83b10..d1d12a3b2 100644 --- a/llama_stack/templates/hf-serverless/hf_serverless.py +++ b/llama_stack/templates/hf-serverless/hf_serverless.py @@ -5,6 +5,7 @@ # the root directory of this source tree. from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput +from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig from llama_stack.providers.remote.inference.tgi import InferenceAPIImplConfig from llama_stack.templates.template import DistributionTemplate, RunConfigSettings @@ -21,11 +22,17 @@ def get_distribution_template() -> DistributionTemplate: "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], } + name = "hf-serverless" inference_provider = Provider( provider_id="hf-serverless", provider_type="remote::hf::serverless", config=InferenceAPIImplConfig.sample_run_config(), ) + memory_provider = Provider( + provider_id="faiss", + provider_type="inline::faiss", + config=FaissImplConfig.sample_run_config(f"distributions/{name}"), + ) inference_model = ModelInput( model_id="${env.INFERENCE_MODEL}", @@ -37,7 +44,7 @@ def get_distribution_template() -> DistributionTemplate: ) return DistributionTemplate( - name="hf-serverless", + name=name, distro_type="self_hosted", description="Use (an external) Hugging Face Inference Endpoint for running LLM inference", docker_image=None, @@ -48,6 +55,7 @@ def get_distribution_template() -> DistributionTemplate: "run.yaml": RunConfigSettings( provider_overrides={ "inference": [inference_provider], + "memory": [memory_provider], }, default_models=[inference_model], ), diff --git a/llama_stack/templates/hf-serverless/run.yaml b/llama_stack/templates/hf-serverless/run.yaml index 803805a16..13e2d7789 100644 --- a/llama_stack/templates/hf-serverless/run.yaml +++ b/llama_stack/templates/hf-serverless/run.yaml @@ -26,12 +26,6 @@ providers: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/faiss_store.db - - provider_id: chromadb - provider_type: remote::chromadb - config: {} - - provider_id: pgvector - provider_type: remote::pgvector - config: {} safety: - provider_id: llama-guard provider_type: inline::llama-guard diff --git a/llama_stack/templates/meta-reference-gpu/meta_reference.py b/llama_stack/templates/meta-reference-gpu/meta_reference.py index f0fecb47d..649234e46 100644 --- a/llama_stack/templates/meta-reference-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py @@ -10,6 +10,7 @@ from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput from llama_stack.providers.inline.inference.meta_reference import ( MetaReferenceInferenceConfig, ) +from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig from llama_stack.templates.template import DistributionTemplate, RunConfigSettings @@ -24,7 +25,7 @@ def get_distribution_template() -> DistributionTemplate: "datasetio": ["remote::huggingface", "inline::localfs"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], } - + name = "meta-reference-gpu" inference_provider = Provider( provider_id="meta-reference-inference", provider_type="inline::meta-reference", @@ -33,6 +34,11 @@ def get_distribution_template() -> DistributionTemplate: checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}", ), ) + memory_provider = Provider( + provider_id="faiss", + provider_type="inline::faiss", + config=FaissImplConfig.sample_run_config(f"distributions/{name}"), + ) inference_model = ModelInput( model_id="${env.INFERENCE_MODEL}", @@ -44,7 +50,7 @@ def get_distribution_template() -> DistributionTemplate: ) return DistributionTemplate( - name="meta-reference-gpu", + name=name, distro_type="self_hosted", description="Use Meta Reference for running LLM inference", template_path=Path(__file__).parent / "doc_template.md", @@ -54,6 +60,7 @@ def get_distribution_template() -> DistributionTemplate: "run.yaml": RunConfigSettings( provider_overrides={ "inference": [inference_provider], + "memory": [memory_provider], }, default_models=[inference_model], ), diff --git a/llama_stack/templates/meta-reference-gpu/run.yaml b/llama_stack/templates/meta-reference-gpu/run.yaml index bcefdf800..3675f4a58 100644 --- a/llama_stack/templates/meta-reference-gpu/run.yaml +++ b/llama_stack/templates/meta-reference-gpu/run.yaml @@ -27,12 +27,6 @@ providers: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db - - provider_id: chromadb - provider_type: remote::chromadb - config: {} - - provider_id: pgvector - provider_type: remote::pgvector - config: {} safety: - provider_id: llama-guard provider_type: inline::llama-guard diff --git a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py index a390e71e6..1d611ae5f 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py @@ -10,6 +10,7 @@ from llama_stack.distribution.datatypes import ModelInput, Provider from llama_stack.providers.inline.inference.meta_reference import ( MetaReferenceQuantizedInferenceConfig, ) +from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig from llama_stack.templates.template import DistributionTemplate, RunConfigSettings @@ -24,7 +25,7 @@ def get_distribution_template() -> DistributionTemplate: "datasetio": ["remote::huggingface", "inline::localfs"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], } - + name = "meta-reference-quantized-gpu" inference_provider = Provider( provider_id="meta-reference-inference", provider_type="inline::meta-reference-quantized", @@ -33,13 +34,18 @@ def get_distribution_template() -> DistributionTemplate: checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}", ), ) + memory_provider = Provider( + provider_id="faiss", + provider_type="inline::faiss", + config=FaissImplConfig.sample_run_config(f"distributions/{name}"), + ) inference_model = ModelInput( model_id="${env.INFERENCE_MODEL}", provider_id="meta-reference-inference", ) return DistributionTemplate( - name="meta-reference-quantized-gpu", + name=name, distro_type="self_hosted", description="Use Meta Reference with fp8, int4 quantization for running LLM inference", template_path=Path(__file__).parent / "doc_template.md", @@ -49,6 +55,7 @@ def get_distribution_template() -> DistributionTemplate: "run.yaml": RunConfigSettings( provider_overrides={ "inference": [inference_provider], + "memory": [memory_provider], }, default_models=[inference_model], ), diff --git a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml index 1579c26f9..081af0f59 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml +++ b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml @@ -29,12 +29,6 @@ providers: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-quantized-gpu}/faiss_store.db - - provider_id: chromadb - provider_type: remote::chromadb - config: {} - - provider_id: pgvector - provider_type: remote::pgvector - config: {} safety: - provider_id: llama-guard provider_type: inline::llama-guard diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml index 84b244f15..3457afdd6 100644 --- a/llama_stack/templates/remote-vllm/run.yaml +++ b/llama_stack/templates/remote-vllm/run.yaml @@ -24,12 +24,6 @@ providers: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db - - provider_id: chromadb - provider_type: remote::chromadb - config: {} - - provider_id: pgvector - provider_type: remote::pgvector - config: {} safety: - provider_id: llama-guard provider_type: inline::llama-guard diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py index c3858f7e5..68ab8d348 100644 --- a/llama_stack/templates/remote-vllm/vllm.py +++ b/llama_stack/templates/remote-vllm/vllm.py @@ -7,6 +7,7 @@ from pathlib import Path from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput +from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig from llama_stack.templates.template import DistributionTemplate, RunConfigSettings @@ -19,7 +20,7 @@ def get_distribution_template() -> DistributionTemplate: "agents": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"], } - + name = "remote-vllm" inference_provider = Provider( provider_id="vllm-inference", provider_type="remote::vllm", @@ -27,6 +28,11 @@ def get_distribution_template() -> DistributionTemplate: url="${env.VLLM_URL}", ), ) + memory_provider = Provider( + provider_id="faiss", + provider_type="inline::faiss", + config=FaissImplConfig.sample_run_config(f"distributions/{name}"), + ) inference_model = ModelInput( model_id="${env.INFERENCE_MODEL}", @@ -38,7 +44,7 @@ def get_distribution_template() -> DistributionTemplate: ) return DistributionTemplate( - name="remote-vllm", + name=name, distro_type="self_hosted", description="Use (an external) vLLM server for running LLM inference", template_path=Path(__file__).parent / "doc_template.md", @@ -48,6 +54,7 @@ def get_distribution_template() -> DistributionTemplate: "run.yaml": RunConfigSettings( provider_overrides={ "inference": [inference_provider], + "memory": [memory_provider], }, default_models=[inference_model], ), diff --git a/llama_stack/templates/tgi/run.yaml b/llama_stack/templates/tgi/run.yaml index 9eb40a789..c45e114ee 100644 --- a/llama_stack/templates/tgi/run.yaml +++ b/llama_stack/templates/tgi/run.yaml @@ -25,12 +25,6 @@ providers: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db - - provider_id: chromadb - provider_type: remote::chromadb - config: {} - - provider_id: pgvector - provider_type: remote::pgvector - config: {} safety: - provider_id: llama-guard provider_type: inline::llama-guard diff --git a/llama_stack/templates/tgi/tgi.py b/llama_stack/templates/tgi/tgi.py index 53207d691..edba61804 100644 --- a/llama_stack/templates/tgi/tgi.py +++ b/llama_stack/templates/tgi/tgi.py @@ -7,6 +7,7 @@ from pathlib import Path from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput +from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig from llama_stack.providers.remote.inference.tgi import TGIImplConfig from llama_stack.templates.template import DistributionTemplate, RunConfigSettings @@ -22,7 +23,7 @@ def get_distribution_template() -> DistributionTemplate: "datasetio": ["remote::huggingface", "inline::localfs"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], } - + name = "tgi" inference_provider = Provider( provider_id="tgi-inference", provider_type="remote::tgi", @@ -30,6 +31,11 @@ def get_distribution_template() -> DistributionTemplate: url="${env.TGI_URL}", ), ) + memory_provider = Provider( + provider_id="faiss", + provider_type="inline::faiss", + config=FaissImplConfig.sample_run_config(f"distributions/{name}"), + ) inference_model = ModelInput( model_id="${env.INFERENCE_MODEL}", @@ -41,7 +47,7 @@ def get_distribution_template() -> DistributionTemplate: ) return DistributionTemplate( - name="tgi", + name=name, distro_type="self_hosted", description="Use (an external) TGI server for running LLM inference", docker_image=None, @@ -52,6 +58,7 @@ def get_distribution_template() -> DistributionTemplate: "run.yaml": RunConfigSettings( provider_overrides={ "inference": [inference_provider], + "memory": [memory_provider], }, default_models=[inference_model], ),