From 1b15df8d1d92ee08fe4d5b4ae66d6d74f3c7ba8b Mon Sep 17 00:00:00 2001 From: Hardik Shah Date: Fri, 28 Mar 2025 22:18:40 -0700 Subject: [PATCH] drop hf serverless and endpoint --- llama_stack/templates/dependencies.json | 78 --------- llama_stack/templates/hf-endpoint/__init__.py | 7 - llama_stack/templates/hf-endpoint/build.yaml | 32 ---- .../templates/hf-endpoint/hf_endpoint.py | 154 ------------------ .../hf-endpoint/run-with-safety.yaml | 141 ---------------- llama_stack/templates/hf-endpoint/run.yaml | 131 --------------- .../templates/hf-serverless/__init__.py | 7 - .../templates/hf-serverless/build.yaml | 33 ---- .../templates/hf-serverless/hf_serverless.py | 147 ----------------- .../hf-serverless/run-with-safety.yaml | 141 ---------------- llama_stack/templates/hf-serverless/run.yaml | 131 --------------- 11 files changed, 1002 deletions(-) delete mode 100644 llama_stack/templates/hf-endpoint/__init__.py delete mode 100644 llama_stack/templates/hf-endpoint/build.yaml delete mode 100644 llama_stack/templates/hf-endpoint/hf_endpoint.py delete mode 100644 llama_stack/templates/hf-endpoint/run-with-safety.yaml delete mode 100644 llama_stack/templates/hf-endpoint/run.yaml delete mode 100644 llama_stack/templates/hf-serverless/__init__.py delete mode 100644 llama_stack/templates/hf-serverless/build.yaml delete mode 100644 llama_stack/templates/hf-serverless/hf_serverless.py delete mode 100644 llama_stack/templates/hf-serverless/run-with-safety.yaml delete mode 100644 llama_stack/templates/hf-serverless/run.yaml diff --git a/llama_stack/templates/dependencies.json b/llama_stack/templates/dependencies.json index 931240d37..f1086d084 100644 --- a/llama_stack/templates/dependencies.json +++ b/llama_stack/templates/dependencies.json @@ -266,84 +266,6 @@ "tree_sitter", "uvicorn" ], - "hf-endpoint": [ - "aiohttp", - "aiosqlite", - "autoevals", - "blobfile", - "chardet", - "chromadb-client", - "datasets", - "emoji", - "faiss-cpu", - "fastapi", - "fire", - "httpx", - "huggingface_hub", - "langdetect", - "matplotlib", - "mcp", - "nltk", - "numpy", - "openai", - "opentelemetry-exporter-otlp-proto-http", - "opentelemetry-sdk", - "pandas", - "pillow", - "psycopg2-binary", - "pymongo", - "pypdf", - "pythainlp", - "redis", - "requests", - "scikit-learn", - "scipy", - "sentencepiece", - "tqdm", - "transformers", - "tree_sitter", - "uvicorn" - ], - "hf-serverless": [ - "aiohttp", - "aiosqlite", - "autoevals", - "blobfile", - "chardet", - "chromadb-client", - "datasets", - "emoji", - "faiss-cpu", - "fastapi", - "fire", - "httpx", - "huggingface_hub", - "langdetect", - "matplotlib", - "mcp", - "nltk", - "numpy", - "openai", - "opentelemetry-exporter-otlp-proto-http", - "opentelemetry-sdk", - "pandas", - "pillow", - "psycopg2-binary", - "pymongo", - "pypdf", - "pythainlp", - "redis", - "requests", - "scikit-learn", - "scipy", - "sentencepiece", - "tqdm", - "transformers", - "tree_sitter", - "uvicorn", - "sentence-transformers --no-deps", - "torch torchvision --index-url https://download.pytorch.org/whl/cpu" - ], "meta-reference-gpu": [ "accelerate", "aiosqlite", diff --git a/llama_stack/templates/hf-endpoint/__init__.py b/llama_stack/templates/hf-endpoint/__init__.py deleted file mode 100644 index f2c00e3bf..000000000 --- a/llama_stack/templates/hf-endpoint/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .hf_endpoint import get_distribution_template # noqa: F401 diff --git a/llama_stack/templates/hf-endpoint/build.yaml b/llama_stack/templates/hf-endpoint/build.yaml deleted file mode 100644 index c2eaaa05b..000000000 --- a/llama_stack/templates/hf-endpoint/build.yaml +++ /dev/null @@ -1,32 +0,0 @@ -version: '2' -distribution_spec: - description: Use (an external) Hugging Face Inference Endpoint for running LLM inference - providers: - inference: - - remote::hf::endpoint - vector_io: - - inline::faiss - - remote::chromadb - - remote::pgvector - safety: - - inline::llama-guard - agents: - - inline::meta-reference - telemetry: - - inline::meta-reference - eval: - - inline::meta-reference - datasetio: - - remote::huggingface - - inline::localfs - scoring: - - inline::basic - - inline::llm-as-judge - - inline::braintrust - tool_runtime: - - remote::brave-search - - remote::tavily-search - - inline::code-interpreter - - inline::rag-runtime - - remote::model-context-protocol -image_type: conda diff --git a/llama_stack/templates/hf-endpoint/hf_endpoint.py b/llama_stack/templates/hf-endpoint/hf_endpoint.py deleted file mode 100644 index 53dc9d38f..000000000 --- a/llama_stack/templates/hf-endpoint/hf_endpoint.py +++ /dev/null @@ -1,154 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from llama_stack.apis.models.models import ModelType -from llama_stack.distribution.datatypes import ( - ModelInput, - Provider, - ShieldInput, - ToolGroupInput, -) -from llama_stack.providers.inline.inference.sentence_transformers import ( - SentenceTransformersInferenceConfig, -) -from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig -from llama_stack.providers.remote.inference.tgi import InferenceEndpointImplConfig -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings - - -def get_distribution_template() -> DistributionTemplate: - providers = { - "inference": ["remote::hf::endpoint"], - "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], - "safety": ["inline::llama-guard"], - "agents": ["inline::meta-reference"], - "telemetry": ["inline::meta-reference"], - "eval": ["inline::meta-reference"], - "datasetio": ["remote::huggingface", "inline::localfs"], - "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], - "tool_runtime": [ - "remote::brave-search", - "remote::tavily-search", - "inline::code-interpreter", - "inline::rag-runtime", - "remote::model-context-protocol", - ], - } - name = "hf-endpoint" - inference_provider = Provider( - provider_id="hf-endpoint", - provider_type="remote::hf::endpoint", - config=InferenceEndpointImplConfig.sample_run_config(), - ) - embedding_provider = Provider( - provider_id="sentence-transformers", - provider_type="inline::sentence-transformers", - config=SentenceTransformersInferenceConfig.sample_run_config(), - ) - vector_io_provider = Provider( - provider_id="faiss", - provider_type="inline::faiss", - config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), - ) - - inference_model = ModelInput( - model_id="${env.INFERENCE_MODEL}", - provider_id="hf-endpoint", - ) - safety_model = ModelInput( - model_id="${env.SAFETY_MODEL}", - provider_id="hf-endpoint-safety", - ) - embedding_model = ModelInput( - model_id="all-MiniLM-L6-v2", - provider_id="sentence-transformers", - model_type=ModelType.embedding, - metadata={ - "embedding_dimension": 384, - }, - ) - default_tool_groups = [ - ToolGroupInput( - toolgroup_id="builtin::websearch", - provider_id="tavily-search", - ), - ToolGroupInput( - toolgroup_id="builtin::rag", - provider_id="rag-runtime", - ), - ToolGroupInput( - toolgroup_id="builtin::code_interpreter", - provider_id="code-interpreter", - ), - ] - - return DistributionTemplate( - name=name, - distro_type="self_hosted", - description="Use (an external) Hugging Face Inference Endpoint for running LLM inference", - container_image=None, - template_path=None, - providers=providers, - run_configs={ - "run.yaml": RunConfigSettings( - provider_overrides={ - "inference": [inference_provider, embedding_provider], - "vector_io": [vector_io_provider], - }, - default_models=[inference_model, embedding_model], - default_tool_groups=default_tool_groups, - ), - "run-with-safety.yaml": RunConfigSettings( - provider_overrides={ - "inference": [ - inference_provider, - embedding_provider, - Provider( - provider_id="hf-endpoint-safety", - provider_type="remote::hf::endpoint", - config=InferenceEndpointImplConfig.sample_run_config( - endpoint_name="${env.SAFETY_INFERENCE_ENDPOINT_NAME}", - ), - ), - ], - "vector_io": [vector_io_provider], - }, - default_models=[ - inference_model, - safety_model, - embedding_model, - ], - default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")], - default_tool_groups=default_tool_groups, - ), - }, - run_config_env_vars={ - "LLAMA_STACK_PORT": ( - "8321", - "Port for the Llama Stack distribution server", - ), - "HF_API_TOKEN": ( - "hf_...", - "Hugging Face API token", - ), - "INFERENCE_ENDPOINT_NAME": ( - "", - "HF Inference endpoint name for the main inference model", - ), - "SAFETY_INFERENCE_ENDPOINT_NAME": ( - "", - "HF Inference endpoint for the safety model", - ), - "INFERENCE_MODEL": ( - "meta-llama/Llama-3.2-3B-Instruct", - "Inference model served by the HF Inference Endpoint", - ), - "SAFETY_MODEL": ( - "meta-llama/Llama-Guard-3-1B", - "Safety model served by the HF Inference Endpoint", - ), - }, - ) diff --git a/llama_stack/templates/hf-endpoint/run-with-safety.yaml b/llama_stack/templates/hf-endpoint/run-with-safety.yaml deleted file mode 100644 index 14753e08b..000000000 --- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml +++ /dev/null @@ -1,141 +0,0 @@ -version: '2' -image_name: hf-endpoint -apis: -- agents -- datasetio -- eval -- inference -- safety -- scoring -- telemetry -- tool_runtime -- vector_io -providers: - inference: - - provider_id: hf-endpoint - provider_type: remote::hf::endpoint - config: - endpoint_name: ${env.INFERENCE_ENDPOINT_NAME} - api_token: ${env.HF_API_TOKEN} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} - - provider_id: hf-endpoint-safety - provider_type: remote::hf::endpoint - config: - endpoint_name: ${env.SAFETY_INFERENCE_ENDPOINT_NAME} - api_token: ${env.HF_API_TOKEN} - vector_io: - - provider_id: faiss - provider_type: inline::faiss - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/faiss_store.db - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/agents_store.db - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/hf-endpoint/trace_store.db} - eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/meta_reference_eval.db - datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/localfs_datasetio.db - scoring: - - provider_id: basic - provider_type: inline::basic - config: {} - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - config: {} - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} - max_results: 3 - - provider_id: code-interpreter - provider_type: inline::code-interpreter - config: {} - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db -models: -- metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: hf-endpoint - model_type: llm -- metadata: {} - model_id: ${env.SAFETY_MODEL} - provider_id: hf-endpoint-safety - model_type: llm -- metadata: - embedding_dimension: 384 - model_id: all-MiniLM-L6-v2 - provider_id: sentence-transformers - model_type: embedding -shields: -- shield_id: ${env.SAFETY_MODEL} -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::websearch - provider_id: tavily-search -- toolgroup_id: builtin::rag - provider_id: rag-runtime -- toolgroup_id: builtin::code_interpreter - provider_id: code-interpreter -server: - port: 8321 diff --git a/llama_stack/templates/hf-endpoint/run.yaml b/llama_stack/templates/hf-endpoint/run.yaml deleted file mode 100644 index 706ba9122..000000000 --- a/llama_stack/templates/hf-endpoint/run.yaml +++ /dev/null @@ -1,131 +0,0 @@ -version: '2' -image_name: hf-endpoint -apis: -- agents -- datasetio -- eval -- inference -- safety -- scoring -- telemetry -- tool_runtime -- vector_io -providers: - inference: - - provider_id: hf-endpoint - provider_type: remote::hf::endpoint - config: - endpoint_name: ${env.INFERENCE_ENDPOINT_NAME} - api_token: ${env.HF_API_TOKEN} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} - vector_io: - - provider_id: faiss - provider_type: inline::faiss - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/faiss_store.db - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/agents_store.db - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/hf-endpoint/trace_store.db} - eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/meta_reference_eval.db - datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/localfs_datasetio.db - scoring: - - provider_id: basic - provider_type: inline::basic - config: {} - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - config: {} - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} - max_results: 3 - - provider_id: code-interpreter - provider_type: inline::code-interpreter - config: {} - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db -models: -- metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: hf-endpoint - model_type: llm -- metadata: - embedding_dimension: 384 - model_id: all-MiniLM-L6-v2 - provider_id: sentence-transformers - model_type: embedding -shields: [] -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::websearch - provider_id: tavily-search -- toolgroup_id: builtin::rag - provider_id: rag-runtime -- toolgroup_id: builtin::code_interpreter - provider_id: code-interpreter -server: - port: 8321 diff --git a/llama_stack/templates/hf-serverless/__init__.py b/llama_stack/templates/hf-serverless/__init__.py deleted file mode 100644 index a5f1ab54a..000000000 --- a/llama_stack/templates/hf-serverless/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .hf_serverless import get_distribution_template # noqa: F401 diff --git a/llama_stack/templates/hf-serverless/build.yaml b/llama_stack/templates/hf-serverless/build.yaml deleted file mode 100644 index c0cc1e2c2..000000000 --- a/llama_stack/templates/hf-serverless/build.yaml +++ /dev/null @@ -1,33 +0,0 @@ -version: '2' -distribution_spec: - description: Use (an external) Hugging Face Inference Endpoint for running LLM inference - providers: - inference: - - remote::hf::serverless - - inline::sentence-transformers - vector_io: - - inline::faiss - - remote::chromadb - - remote::pgvector - safety: - - inline::llama-guard - agents: - - inline::meta-reference - telemetry: - - inline::meta-reference - eval: - - inline::meta-reference - datasetio: - - remote::huggingface - - inline::localfs - scoring: - - inline::basic - - inline::llm-as-judge - - inline::braintrust - tool_runtime: - - remote::brave-search - - remote::tavily-search - - inline::code-interpreter - - inline::rag-runtime - - remote::model-context-protocol -image_type: conda diff --git a/llama_stack/templates/hf-serverless/hf_serverless.py b/llama_stack/templates/hf-serverless/hf_serverless.py deleted file mode 100644 index ad8a72012..000000000 --- a/llama_stack/templates/hf-serverless/hf_serverless.py +++ /dev/null @@ -1,147 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from llama_stack.apis.models.models import ModelType -from llama_stack.distribution.datatypes import ( - ModelInput, - Provider, - ShieldInput, - ToolGroupInput, -) -from llama_stack.providers.inline.inference.sentence_transformers import ( - SentenceTransformersInferenceConfig, -) -from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig -from llama_stack.providers.remote.inference.tgi import InferenceAPIImplConfig -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings - - -def get_distribution_template() -> DistributionTemplate: - providers = { - "inference": ["remote::hf::serverless", "inline::sentence-transformers"], - "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], - "safety": ["inline::llama-guard"], - "agents": ["inline::meta-reference"], - "telemetry": ["inline::meta-reference"], - "eval": ["inline::meta-reference"], - "datasetio": ["remote::huggingface", "inline::localfs"], - "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], - "tool_runtime": [ - "remote::brave-search", - "remote::tavily-search", - "inline::code-interpreter", - "inline::rag-runtime", - "remote::model-context-protocol", - ], - } - - name = "hf-serverless" - inference_provider = Provider( - provider_id="hf-serverless", - provider_type="remote::hf::serverless", - config=InferenceAPIImplConfig.sample_run_config(), - ) - embedding_provider = Provider( - provider_id="sentence-transformers", - provider_type="inline::sentence-transformers", - config=SentenceTransformersInferenceConfig.sample_run_config(), - ) - vector_io_provider = Provider( - provider_id="faiss", - provider_type="inline::faiss", - config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), - ) - - inference_model = ModelInput( - model_id="${env.INFERENCE_MODEL}", - provider_id="hf-serverless", - ) - safety_model = ModelInput( - model_id="${env.SAFETY_MODEL}", - provider_id="hf-serverless-safety", - ) - embedding_model = ModelInput( - model_id="all-MiniLM-L6-v2", - provider_id="sentence-transformers", - model_type=ModelType.embedding, - metadata={ - "embedding_dimension": 384, - }, - ) - default_tool_groups = [ - ToolGroupInput( - toolgroup_id="builtin::websearch", - provider_id="tavily-search", - ), - ToolGroupInput( - toolgroup_id="builtin::rag", - provider_id="rag-runtime", - ), - ToolGroupInput( - toolgroup_id="builtin::code_interpreter", - provider_id="code-interpreter", - ), - ] - - return DistributionTemplate( - name=name, - distro_type="self_hosted", - description="Use (an external) Hugging Face Inference Endpoint for running LLM inference", - container_image=None, - template_path=None, - providers=providers, - run_configs={ - "run.yaml": RunConfigSettings( - provider_overrides={ - "inference": [inference_provider, embedding_provider], - "vector_io": [vector_io_provider], - }, - default_models=[inference_model, embedding_model], - default_tool_groups=default_tool_groups, - ), - "run-with-safety.yaml": RunConfigSettings( - provider_overrides={ - "inference": [ - inference_provider, - embedding_provider, - Provider( - provider_id="hf-serverless-safety", - provider_type="remote::hf::serverless", - config=InferenceAPIImplConfig.sample_run_config( - repo="${env.SAFETY_MODEL}", - ), - ), - ], - "vector_io": [vector_io_provider], - }, - default_models=[ - inference_model, - safety_model, - embedding_model, - ], - default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")], - default_tool_groups=default_tool_groups, - ), - }, - run_config_env_vars={ - "LLAMA_STACK_PORT": ( - "8321", - "Port for the Llama Stack distribution server", - ), - "HF_API_TOKEN": ( - "hf_...", - "Hugging Face API token", - ), - "INFERENCE_MODEL": ( - "meta-llama/Llama-3.2-3B-Instruct", - "Inference model to be served by the HF Serverless endpoint", - ), - "SAFETY_MODEL": ( - "meta-llama/Llama-Guard-3-1B", - "Safety model to be served by the HF Serverless endpoint", - ), - }, - ) diff --git a/llama_stack/templates/hf-serverless/run-with-safety.yaml b/llama_stack/templates/hf-serverless/run-with-safety.yaml deleted file mode 100644 index bf26fe507..000000000 --- a/llama_stack/templates/hf-serverless/run-with-safety.yaml +++ /dev/null @@ -1,141 +0,0 @@ -version: '2' -image_name: hf-serverless -apis: -- agents -- datasetio -- eval -- inference -- safety -- scoring -- telemetry -- tool_runtime -- vector_io -providers: - inference: - - provider_id: hf-serverless - provider_type: remote::hf::serverless - config: - huggingface_repo: ${env.INFERENCE_MODEL} - api_token: ${env.HF_API_TOKEN} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} - - provider_id: hf-serverless-safety - provider_type: remote::hf::serverless - config: - huggingface_repo: ${env.SAFETY_MODEL} - api_token: ${env.HF_API_TOKEN} - vector_io: - - provider_id: faiss - provider_type: inline::faiss - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/faiss_store.db - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/agents_store.db - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/hf-serverless/trace_store.db} - eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/meta_reference_eval.db - datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/huggingface_datasetio.db - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/localfs_datasetio.db - scoring: - - provider_id: basic - provider_type: inline::basic - config: {} - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - config: {} - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} - max_results: 3 - - provider_id: code-interpreter - provider_type: inline::code-interpreter - config: {} - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db -models: -- metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: hf-serverless - model_type: llm -- metadata: {} - model_id: ${env.SAFETY_MODEL} - provider_id: hf-serverless-safety - model_type: llm -- metadata: - embedding_dimension: 384 - model_id: all-MiniLM-L6-v2 - provider_id: sentence-transformers - model_type: embedding -shields: -- shield_id: ${env.SAFETY_MODEL} -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::websearch - provider_id: tavily-search -- toolgroup_id: builtin::rag - provider_id: rag-runtime -- toolgroup_id: builtin::code_interpreter - provider_id: code-interpreter -server: - port: 8321 diff --git a/llama_stack/templates/hf-serverless/run.yaml b/llama_stack/templates/hf-serverless/run.yaml deleted file mode 100644 index cc973b8de..000000000 --- a/llama_stack/templates/hf-serverless/run.yaml +++ /dev/null @@ -1,131 +0,0 @@ -version: '2' -image_name: hf-serverless -apis: -- agents -- datasetio -- eval -- inference -- safety -- scoring -- telemetry -- tool_runtime -- vector_io -providers: - inference: - - provider_id: hf-serverless - provider_type: remote::hf::serverless - config: - huggingface_repo: ${env.INFERENCE_MODEL} - api_token: ${env.HF_API_TOKEN} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} - vector_io: - - provider_id: faiss - provider_type: inline::faiss - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/faiss_store.db - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/agents_store.db - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/hf-serverless/trace_store.db} - eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/meta_reference_eval.db - datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/huggingface_datasetio.db - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/localfs_datasetio.db - scoring: - - provider_id: basic - provider_type: inline::basic - config: {} - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - config: {} - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} - max_results: 3 - - provider_id: code-interpreter - provider_type: inline::code-interpreter - config: {} - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db -models: -- metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: hf-serverless - model_type: llm -- metadata: - embedding_dimension: 384 - model_id: all-MiniLM-L6-v2 - provider_id: sentence-transformers - model_type: embedding -shields: [] -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::websearch - provider_id: tavily-search -- toolgroup_id: builtin::rag - provider_id: rag-runtime -- toolgroup_id: builtin::code_interpreter - provider_id: code-interpreter -server: - port: 8321