diff --git a/docs/source/distributions/self_hosted_distro/llamacpp.md b/docs/source/distributions/self_hosted_distro/llamacpp.md index f3a5e3630..136f693dc 100644 --- a/docs/source/distributions/self_hosted_distro/llamacpp.md +++ b/docs/source/distributions/self_hosted_distro/llamacpp.md @@ -33,7 +33,7 @@ Set these environment variables before running Llama Stack: ```bash export LLAMACPP_URL=http://localhost:8080 # URL of your llama.cpp server (without /v1 suffix) export INFERENCE_MODEL=your-model-name # Name/identifier without gguf extension -export LLAMACPP_API_KEY="YOUR_API_KEY" # API key (leave empty for local servers) +export LLAMACPP_API_KEY="YOUR_API_KEY" # API key (leave empty for local servers) ``` ## Running Llama Stack diff --git a/docs/source/providers/inference/remote_llamacpp.md b/docs/source/providers/inference/remote_llamacpp.md index 291c96614..d17b5adf2 100644 --- a/docs/source/providers/inference/remote_llamacpp.md +++ b/docs/source/providers/inference/remote_llamacpp.md @@ -11,7 +11,7 @@ ```yaml openai_compat_api_base: ${env.LLAMACPP_URL:http://localhost:8080}/v1 -api_key: ${env.LLAMACPP_API_KEY:} +api_key: ${env.LLAMACPP_API_KEY:=} ``` diff --git a/llama_stack/providers/remote/inference/llamacpp/config.py b/llama_stack/providers/remote/inference/llamacpp/config.py index 8285ff065..4a16cb17d 100644 --- a/llama_stack/providers/remote/inference/llamacpp/config.py +++ b/llama_stack/providers/remote/inference/llamacpp/config.py @@ -4,15 +4,15 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Any, Union - -from llama_stack.schema_utils import json_schema_type +from typing import Any from pydantic import BaseModel, Field +from llama_stack.schema_utils import json_schema_type + class LlamaCppProviderDataValidator(BaseModel): - llamacpp_api_key: Union[str, None] = Field( + llamacpp_api_key: str | None = Field( default=None, description="API key for llama.cpp server (optional for local servers)", ) @@ -20,7 +20,7 @@ class LlamaCppProviderDataValidator(BaseModel): @json_schema_type class LlamaCppImplConfig(BaseModel): - api_key: Union[str, None] = Field( + api_key: str | None = Field( default=None, description="The llama.cpp server API key (optional for local servers)", ) @@ -31,9 +31,7 @@ class LlamaCppImplConfig(BaseModel): ) @classmethod - def sample_run_config( - cls, api_key: str = "${env.LLAMACPP_API_KEY:}" - ) -> dict[str, Any]: + def sample_run_config(cls, api_key: str = "${env.LLAMACPP_API_KEY:=}") -> dict[str, Any]: return { "openai_compat_api_base": "${env.LLAMACPP_URL:http://localhost:8080}/v1", "api_key": api_key, diff --git a/llama_stack/templates/llamacpp/build.yaml b/llama_stack/templates/llamacpp/build.yaml index 14c57e917..55e03a622 100644 --- a/llama_stack/templates/llamacpp/build.yaml +++ b/llama_stack/templates/llamacpp/build.yaml @@ -1,38 +1,38 @@ -version: "2" +version: 2 distribution_spec: description: Use llama.cpp server for running LLM inference providers: inference: - - remote::llamacpp - - inline::sentence-transformers + - remote::llamacpp + - inline::sentence-transformers vector_io: - - inline::faiss - - remote::chromadb - - remote::pgvector + - inline::faiss + - remote::chromadb + - remote::pgvector safety: - - inline::llama-guard + - inline::llama-guard agents: - - inline::meta-reference + - inline::meta-reference telemetry: - - inline::meta-reference + - inline::meta-reference eval: - - inline::meta-reference + - inline::meta-reference datasetio: - - remote::huggingface - - inline::localfs + - remote::huggingface + - inline::localfs scoring: - - inline::basic - - inline::llm-as-judge - - inline::braintrust + - inline::basic + - inline::llm-as-judge + - inline::braintrust files: - - inline::localfs + - inline::localfs tool_runtime: - - remote::brave-search - - remote::tavily-search - - remote::wolfram-alpha - - inline::rag-runtime - - remote::model-context-protocol + - remote::brave-search + - remote::tavily-search + - remote::wolfram-alpha + - inline::rag-runtime + - remote::model-context-protocol image_type: conda additional_pip_packages: - - aiosqlite - - sqlalchemy[asyncio] +- aiosqlite +- sqlalchemy[asyncio] diff --git a/llama_stack/templates/llamacpp/doc_template.md b/llama_stack/templates/llamacpp/doc_template.md index 582d1d6d7..7c18f9b7f 100644 --- a/llama_stack/templates/llamacpp/doc_template.md +++ b/llama_stack/templates/llamacpp/doc_template.md @@ -32,7 +32,7 @@ Set these environment variables before running Llama Stack: ```bash export LLAMACPP_URL=http://localhost:8080 # URL of your llama.cpp server (without /v1 suffix) export INFERENCE_MODEL=your-model-name # Name/identifier without gguf extension -export LLAMACPP_API_KEY="YOUR_API_KEY" # API key (leave empty for local servers) +export LLAMACPP_API_KEY="YOUR_API_KEY" # API key (leave empty for local servers) ``` ## Running Llama Stack diff --git a/llama_stack/templates/llamacpp/llamacpp.py b/llama_stack/templates/llamacpp/llamacpp.py index 2ed0b8ca9..0c45930a2 100644 --- a/llama_stack/templates/llamacpp/llamacpp.py +++ b/llama_stack/templates/llamacpp/llamacpp.py @@ -21,7 +21,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin def get_distribution_template() -> DistributionTemplate: providers = { - "inference": ["remote::llamacpp"], + "inference": ["remote::llamacpp", "inline::sentence-transformers"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "safety": ["inline::llama-guard"], "agents": ["inline::meta-reference"], @@ -44,6 +44,11 @@ def get_distribution_template() -> DistributionTemplate: provider_type="remote::llamacpp", config=LlamaCppImplConfig.sample_run_config(), ) + sentence_transformers_provider = Provider( + provider_id="sentence-transformers", + provider_type="inline::sentence-transformers", + config={}, + ) vector_io_provider_faiss = Provider( provider_id="faiss", provider_type="inline::faiss", @@ -52,9 +57,7 @@ def get_distribution_template() -> DistributionTemplate: files_provider = Provider( provider_id="meta-reference-files", provider_type="inline::localfs", - config=LocalfsFilesImplConfig.sample_run_config( - f"~/.llama/distributions/{name}" - ), + config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"), ) inference_model = ModelInput( model_id="${env.INFERENCE_MODEL}", @@ -97,7 +100,7 @@ def get_distribution_template() -> DistributionTemplate: run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ - "inference": [inference_provider], + "inference": [inference_provider, sentence_transformers_provider], "vector_io": [vector_io_provider_faiss], "files": [files_provider], }, @@ -106,7 +109,7 @@ def get_distribution_template() -> DistributionTemplate: ), "run-with-safety.yaml": RunConfigSettings( provider_overrides={ - "inference": [inference_provider], + "inference": [inference_provider, sentence_transformers_provider], "vector_io": [vector_io_provider_faiss], "files": [files_provider], "safety": [ diff --git a/llama_stack/templates/llamacpp/run-with-safety.yaml b/llama_stack/templates/llamacpp/run-with-safety.yaml index 7cfaf7cb4..a83db1332 100644 --- a/llama_stack/templates/llamacpp/run-with-safety.yaml +++ b/llama_stack/templates/llamacpp/run-with-safety.yaml @@ -1,152 +1,148 @@ -version: "2" +version: 2 image_name: llamacpp apis: - - agents - - datasetio - - eval - - files - - inference - - safety - - scoring - - telemetry - - tool_runtime - - vector_io +- agents +- datasetio +- eval +- files +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io providers: inference: - - provider_id: llamacpp - provider_type: remote::llamacpp - config: - openai_compat_api_base: ${env.LLAMACPP_URL:http://localhost:8080}/v1 - api_key: ${env.LLAMACPP_API_KEY:} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} + - provider_id: llamacpp + provider_type: remote::llamacpp + config: + openai_compat_api_base: ${env.LLAMACPP_URL:http://localhost:8080}/v1 + api_key: ${env.LLAMACPP_API_KEY:=} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} vector_io: - - provider_id: faiss - provider_type: inline::faiss - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llamacpp}/faiss_store.db + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llamacpp}/faiss_store.db safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: {} + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llamacpp}/agents_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llamacpp}/responses_store.db + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llamacpp}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llamacpp}/responses_store.db telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llamacpp}/trace_store.db + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llamacpp}/trace_store.db + otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=} eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llamacpp}/meta_reference_eval.db + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llamacpp}/meta_reference_eval.db datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llamacpp}/huggingface_datasetio.db - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llamacpp}/localfs_datasetio.db + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llamacpp}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llamacpp}/localfs_datasetio.db scoring: - - provider_id: basic - provider_type: inline::basic - config: {} - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - config: {} - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:} + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} files: - - provider_id: meta-reference-files - provider_type: inline::localfs - config: - storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/llamacpp/files} - metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llamacpp}/files_metadata.db + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/llamacpp/files} + metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llamacpp}/files_metadata.db tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} - - provider_id: wolfram-alpha - provider_type: remote::wolfram-alpha - config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: wolfram-alpha + provider_type: remote::wolfram-alpha + config: + api_key: ${env.WOLFRAM_ALPHA_API_KEY:=} + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llamacpp}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llamacpp}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llamacpp}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llamacpp}/inference_store.db models: - - metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: llamacpp - model_type: llm - - metadata: {} - model_id: ${env.SAFETY_MODEL} - provider_id: llamacpp - model_type: llm - - metadata: - embedding_dimension: 384 - model_id: all-MiniLM-L6-v2 - provider_id: sentence-transformers - model_type: embedding +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: llamacpp + model_type: llm +- metadata: {} + model_id: ${env.SAFETY_MODEL} + provider_id: llamacpp + model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: sentence-transformers + model_type: embedding shields: - - shield_id: ${env.SAFETY_MODEL} - provider_id: llama-guard +- shield_id: ${env.SAFETY_MODEL} + provider_id: llama-guard vector_dbs: [] datasets: [] scoring_fns: [] benchmarks: [] tool_groups: - - toolgroup_id: builtin::websearch - provider_id: tavily-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime - - toolgroup_id: builtin::wolfram_alpha - provider_id: wolfram-alpha +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::rag + provider_id: rag-runtime +- toolgroup_id: builtin::wolfram_alpha + provider_id: wolfram-alpha server: port: 8321 diff --git a/llama_stack/templates/llamacpp/run.yaml b/llama_stack/templates/llamacpp/run.yaml index 52b797464..c9f7b1b8c 100644 --- a/llama_stack/templates/llamacpp/run.yaml +++ b/llama_stack/templates/llamacpp/run.yaml @@ -1,148 +1,143 @@ -version: "2" +version: 2 image_name: llamacpp apis: - - agents - - datasetio - - eval - - files - - inference - - safety - - scoring - - telemetry - - tool_runtime - - vector_io +- agents +- datasetio +- eval +- files +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io providers: inference: - - provider_id: llamacpp - provider_type: remote::llamacpp - config: - openai_compat_api_base: ${env.LLAMACPP_URL:http://localhost:8080}/v1 - api_key: ${env.LLAMACPP_API_KEY:} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} + - provider_id: llamacpp + provider_type: remote::llamacpp + config: + openai_compat_api_base: ${env.LLAMACPP_URL:http://localhost:8080}/v1 + api_key: ${env.LLAMACPP_API_KEY:=} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} vector_io: - - provider_id: faiss - provider_type: inline::faiss - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llamacpp}/faiss_store.db + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llamacpp}/faiss_store.db safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llamacpp}/agents_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llamacpp}/responses_store.db + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llamacpp}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llamacpp}/responses_store.db telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" - sinks: ${env.TELEMETRY_SINKS:=console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llamacpp}/trace_store.db - otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=} + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llamacpp}/trace_store.db + otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=} eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llamacpp}/meta_reference_eval.db + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llamacpp}/meta_reference_eval.db datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llamacpp}/huggingface_datasetio.db - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llamacpp}/localfs_datasetio.db + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llamacpp}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llamacpp}/localfs_datasetio.db scoring: - - provider_id: basic - provider_type: inline::basic - config: {} - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - config: {} - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:} + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} files: - - provider_id: meta-reference-files - provider_type: inline::localfs - config: - storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/llamacpp/files} - metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llamacpp}/files_metadata.db + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/llamacpp/files} + metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llamacpp}/files_metadata.db tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} - - provider_id: wolfram-alpha - provider_type: remote::wolfram-alpha - config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: wolfram-alpha + provider_type: remote::wolfram-alpha + config: + api_key: ${env.WOLFRAM_ALPHA_API_KEY:=} + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llamacpp}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llamacpp}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llamacpp}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llamacpp}/inference_store.db models: - - metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: llamacpp - model_type: llm - - metadata: - embedding_dimension: 384 - model_id: all-MiniLM-L6-v2 - provider_id: sentence-transformers - model_type: embedding +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: llamacpp + model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: sentence-transformers + model_type: embedding shields: [] vector_dbs: [] datasets: [] scoring_fns: [] benchmarks: [] tool_groups: - - toolgroup_id: builtin::websearch - provider_id: tavily-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime - - toolgroup_id: builtin::wolfram_alpha - provider_id: wolfram-alpha +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::rag + provider_id: rag-runtime +- toolgroup_id: builtin::wolfram_alpha + provider_id: wolfram-alpha server: port: 8321 diff --git a/llama_stack/templates/starter/build.yaml b/llama_stack/templates/starter/build.yaml index dc7565d46..5c6b9215d 100644 --- a/llama_stack/templates/starter/build.yaml +++ b/llama_stack/templates/starter/build.yaml @@ -27,6 +27,7 @@ distribution_spec: - remote::cerebras-openai-compat - remote::sambanova - remote::passthrough + - remote::llamacpp - inline::sentence-transformers vector_io: - inline::faiss diff --git a/llama_stack/templates/starter/run.yaml b/llama_stack/templates/starter/run.yaml index 888a2c3bf..c21b1e5bf 100644 --- a/llama_stack/templates/starter/run.yaml +++ b/llama_stack/templates/starter/run.yaml @@ -130,6 +130,11 @@ providers: config: url: ${env.PASSTHROUGH_URL} api_key: ${env.PASSTHROUGH_API_KEY} + - provider_id: ${env.ENABLE_LLAMACPP:=__disabled__} + provider_type: remote::llamacpp + config: + openai_compat_api_base: ${env.LLAMACPP_URL:http://localhost:8080}/v1 + api_key: ${env.LLAMACPP_API_KEY:=} - provider_id: ${env.ENABLE_SENTENCE_TRANSFORMERS:=sentence-transformers} provider_type: inline::sentence-transformers config: {}