diff --git a/llama_stack/distributions/ci-tests/build.yaml b/llama_stack/distributions/ci-tests/build.yaml deleted file mode 100644 index c01e415a9..000000000 --- a/llama_stack/distributions/ci-tests/build.yaml +++ /dev/null @@ -1,59 +0,0 @@ -version: 2 -distribution_spec: - description: CI tests for Llama Stack - providers: - inference: - - provider_type: remote::cerebras - - provider_type: remote::ollama - - provider_type: remote::vllm - - provider_type: remote::tgi - - provider_type: remote::fireworks - - provider_type: remote::together - - provider_type: remote::bedrock - - provider_type: remote::nvidia - - provider_type: remote::openai - - provider_type: remote::anthropic - - provider_type: remote::gemini - - provider_type: remote::vertexai - - provider_type: remote::groq - - provider_type: remote::sambanova - - provider_type: remote::azure - - provider_type: inline::sentence-transformers - vector_io: - - provider_type: inline::faiss - - provider_type: inline::sqlite-vec - - provider_type: inline::milvus - - provider_type: remote::chromadb - - provider_type: remote::pgvector - - provider_type: remote::qdrant - - provider_type: remote::weaviate - files: - - provider_type: inline::localfs - safety: - - provider_type: inline::llama-guard - - provider_type: inline::code-scanner - agents: - - provider_type: inline::meta-reference - post_training: - - provider_type: inline::torchtune-cpu - eval: - - provider_type: inline::meta-reference - datasetio: - - provider_type: remote::huggingface - - provider_type: inline::localfs - scoring: - - provider_type: inline::basic - - provider_type: inline::llm-as-judge - - provider_type: inline::braintrust - tool_runtime: - - provider_type: remote::brave-search - - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - - provider_type: remote::model-context-protocol - batches: - - provider_type: inline::reference -image_type: venv -additional_pip_packages: -- aiosqlite -- asyncpg -- sqlalchemy[asyncio] diff --git a/llama_stack/distributions/ci-tests/run.yaml b/llama_stack/distributions/ci-tests/run.yaml deleted file mode 100644 index 702acff8e..000000000 --- a/llama_stack/distributions/ci-tests/run.yaml +++ /dev/null @@ -1,281 +0,0 @@ -version: 2 -image_name: ci-tests -apis: -- agents -- batches -- datasetio -- eval -- files -- inference -- post_training -- safety -- scoring -- tool_runtime -- vector_io -providers: - inference: - - provider_id: ${env.CEREBRAS_API_KEY:+cerebras} - provider_type: remote::cerebras - config: - base_url: https://api.cerebras.ai - api_key: ${env.CEREBRAS_API_KEY:=} - - provider_id: ${env.OLLAMA_URL:+ollama} - provider_type: remote::ollama - config: - url: ${env.OLLAMA_URL:=http://localhost:11434} - - provider_id: ${env.VLLM_URL:+vllm} - provider_type: remote::vllm - config: - url: ${env.VLLM_URL:=} - max_tokens: ${env.VLLM_MAX_TOKENS:=4096} - api_token: ${env.VLLM_API_TOKEN:=fake} - tls_verify: ${env.VLLM_TLS_VERIFY:=true} - - provider_id: ${env.TGI_URL:+tgi} - provider_type: remote::tgi - config: - url: ${env.TGI_URL:=} - - provider_id: fireworks - provider_type: remote::fireworks - config: - url: https://api.fireworks.ai/inference/v1 - api_key: ${env.FIREWORKS_API_KEY:=} - - provider_id: together - provider_type: remote::together - config: - url: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY:=} - - provider_id: bedrock - provider_type: remote::bedrock - - provider_id: ${env.NVIDIA_API_KEY:+nvidia} - provider_type: remote::nvidia - config: - url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} - api_key: ${env.NVIDIA_API_KEY:=} - append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} - - provider_id: openai - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY:=} - base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1} - - provider_id: anthropic - provider_type: remote::anthropic - config: - api_key: ${env.ANTHROPIC_API_KEY:=} - - provider_id: gemini - provider_type: remote::gemini - config: - api_key: ${env.GEMINI_API_KEY:=} - - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai} - provider_type: remote::vertexai - config: - project: ${env.VERTEX_AI_PROJECT:=} - location: ${env.VERTEX_AI_LOCATION:=us-central1} - - provider_id: groq - provider_type: remote::groq - config: - url: https://api.groq.com - api_key: ${env.GROQ_API_KEY:=} - - provider_id: sambanova - provider_type: remote::sambanova - config: - url: https://api.sambanova.ai/v1 - api_key: ${env.SAMBANOVA_API_KEY:=} - - provider_id: ${env.AZURE_API_KEY:+azure} - provider_type: remote::azure - config: - api_key: ${env.AZURE_API_KEY:=} - api_base: ${env.AZURE_API_BASE:=} - api_version: ${env.AZURE_API_VERSION:=} - api_type: ${env.AZURE_API_TYPE:=} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - vector_io: - - provider_id: faiss - provider_type: inline::faiss - config: - persistence: - namespace: vector_io::faiss - backend: kv_default - - provider_id: sqlite-vec - provider_type: inline::sqlite-vec - config: - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db - persistence: - namespace: vector_io::sqlite_vec - backend: kv_default - - provider_id: ${env.MILVUS_URL:+milvus} - provider_type: inline::milvus - config: - db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db - persistence: - namespace: vector_io::milvus - backend: kv_default - - provider_id: ${env.CHROMADB_URL:+chromadb} - provider_type: remote::chromadb - config: - url: ${env.CHROMADB_URL:=} - persistence: - namespace: vector_io::chroma_remote - backend: kv_default - - provider_id: ${env.PGVECTOR_DB:+pgvector} - provider_type: remote::pgvector - config: - host: ${env.PGVECTOR_HOST:=localhost} - port: ${env.PGVECTOR_PORT:=5432} - db: ${env.PGVECTOR_DB:=} - user: ${env.PGVECTOR_USER:=} - password: ${env.PGVECTOR_PASSWORD:=} - persistence: - namespace: vector_io::pgvector - backend: kv_default - - provider_id: ${env.QDRANT_URL:+qdrant} - provider_type: remote::qdrant - config: - api_key: ${env.QDRANT_API_KEY:=} - persistence: - namespace: vector_io::qdrant_remote - backend: kv_default - - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate} - provider_type: remote::weaviate - config: - weaviate_api_key: null - weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080} - persistence: - namespace: vector_io::weaviate - backend: kv_default - files: - - provider_id: meta-reference-files - provider_type: inline::localfs - config: - storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ci-tests/files} - metadata_store: - table_name: files_metadata - backend: sql_default - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - - provider_id: code-scanner - provider_type: inline::code-scanner - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence: - agent_state: - namespace: agents - backend: kv_default - responses: - table_name: responses - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - post_training: - - provider_id: torchtune-cpu - provider_type: inline::torchtune-cpu - config: - checkpoint_format: meta - eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - kvstore: - namespace: eval - backend: kv_default - datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: - kvstore: - namespace: datasetio::huggingface - backend: kv_default - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - namespace: datasetio::localfs - backend: kv_default - scoring: - - provider_id: basic - provider_type: inline::basic - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:=} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - batches: - - provider_id: reference - provider_type: inline::reference - config: - kvstore: - namespace: batches - backend: kv_default -storage: - backends: - kv_default: - type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/kvstore.db - sql_default: - type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sql_store.db - stores: - metadata: - namespace: registry - backend: kv_default - inference: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - conversations: - table_name: openai_conversations - backend: sql_default - prompts: - namespace: prompts - backend: kv_default -registered_resources: - models: [] - shields: - - shield_id: llama-guard - provider_id: ${env.SAFETY_MODEL:+llama-guard} - provider_shield_id: ${env.SAFETY_MODEL:=} - - shield_id: code-scanner - provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} - provider_shield_id: ${env.CODE_SCANNER_MODEL:=} - vector_dbs: [] - datasets: [] - scoring_fns: [] - benchmarks: [] - tool_groups: - - toolgroup_id: builtin::websearch - provider_id: tavily-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true -vector_stores: - default_provider_id: faiss - default_embedding_model: - provider_id: sentence-transformers - model_id: nomic-ai/nomic-embed-text-v1.5 -safety: - default_shield_id: llama-guard diff --git a/llama_stack/distributions/dell/build.yaml b/llama_stack/distributions/dell/build.yaml deleted file mode 100644 index 7bc26ca9e..000000000 --- a/llama_stack/distributions/dell/build.yaml +++ /dev/null @@ -1,33 +0,0 @@ -version: 2 -distribution_spec: - description: Dell's distribution of Llama Stack. TGI inference via Dell's custom - container - providers: - inference: - - provider_type: remote::tgi - - provider_type: inline::sentence-transformers - vector_io: - - provider_type: inline::faiss - - provider_type: remote::chromadb - - provider_type: remote::pgvector - safety: - - provider_type: inline::llama-guard - agents: - - provider_type: inline::meta-reference - eval: - - provider_type: inline::meta-reference - datasetio: - - provider_type: remote::huggingface - - provider_type: inline::localfs - scoring: - - provider_type: inline::basic - - provider_type: inline::llm-as-judge - - provider_type: inline::braintrust - tool_runtime: - - provider_type: remote::brave-search - - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime -image_type: venv -additional_pip_packages: -- aiosqlite -- sqlalchemy[asyncio] diff --git a/llama_stack/distributions/dell/run-with-safety.yaml b/llama_stack/distributions/dell/run-with-safety.yaml deleted file mode 100644 index e0da8060d..000000000 --- a/llama_stack/distributions/dell/run-with-safety.yaml +++ /dev/null @@ -1,144 +0,0 @@ -version: 2 -image_name: dell -apis: -- agents -- datasetio -- eval -- inference -- safety -- scoring -- tool_runtime -- vector_io -providers: - inference: - - provider_id: tgi0 - provider_type: remote::tgi - config: - url: ${env.DEH_URL} - - provider_id: tgi1 - provider_type: remote::tgi - config: - url: ${env.DEH_SAFETY_URL} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - vector_io: - - provider_id: chromadb - provider_type: remote::chromadb - config: - url: ${env.CHROMADB_URL:=} - persistence: - namespace: vector_io::chroma_remote - backend: kv_default - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence: - agent_state: - namespace: agents - backend: kv_default - responses: - table_name: responses - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - kvstore: - namespace: eval - backend: kv_default - datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: - kvstore: - namespace: datasetio::huggingface - backend: kv_default - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - namespace: datasetio::localfs - backend: kv_default - scoring: - - provider_id: basic - provider_type: inline::basic - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:=} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime -storage: - backends: - kv_default: - type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/kvstore.db - sql_default: - type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db - stores: - metadata: - namespace: registry - backend: kv_default - inference: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - conversations: - table_name: openai_conversations - backend: sql_default - prompts: - namespace: prompts - backend: kv_default -registered_resources: - models: - - metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: tgi0 - model_type: llm - - metadata: {} - model_id: ${env.SAFETY_MODEL} - provider_id: tgi1 - model_type: llm - - metadata: - embedding_dimension: 768 - model_id: nomic-embed-text-v1.5 - provider_id: sentence-transformers - model_type: embedding - shields: - - shield_id: ${env.SAFETY_MODEL} - vector_dbs: [] - datasets: [] - scoring_fns: [] - benchmarks: [] - tool_groups: - - toolgroup_id: builtin::websearch - provider_id: brave-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true diff --git a/llama_stack/distributions/dell/run.yaml b/llama_stack/distributions/dell/run.yaml deleted file mode 100644 index bc3117d88..000000000 --- a/llama_stack/distributions/dell/run.yaml +++ /dev/null @@ -1,135 +0,0 @@ -version: 2 -image_name: dell -apis: -- agents -- datasetio -- eval -- inference -- safety -- scoring -- tool_runtime -- vector_io -providers: - inference: - - provider_id: tgi0 - provider_type: remote::tgi - config: - url: ${env.DEH_URL} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - vector_io: - - provider_id: chromadb - provider_type: remote::chromadb - config: - url: ${env.CHROMADB_URL:=} - persistence: - namespace: vector_io::chroma_remote - backend: kv_default - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence: - agent_state: - namespace: agents - backend: kv_default - responses: - table_name: responses - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - kvstore: - namespace: eval - backend: kv_default - datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: - kvstore: - namespace: datasetio::huggingface - backend: kv_default - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - namespace: datasetio::localfs - backend: kv_default - scoring: - - provider_id: basic - provider_type: inline::basic - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:=} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime -storage: - backends: - kv_default: - type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/kvstore.db - sql_default: - type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db - stores: - metadata: - namespace: registry - backend: kv_default - inference: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - conversations: - table_name: openai_conversations - backend: sql_default - prompts: - namespace: prompts - backend: kv_default -registered_resources: - models: - - metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: tgi0 - model_type: llm - - metadata: - embedding_dimension: 768 - model_id: nomic-embed-text-v1.5 - provider_id: sentence-transformers - model_type: embedding - shields: [] - vector_dbs: [] - datasets: [] - scoring_fns: [] - benchmarks: [] - tool_groups: - - toolgroup_id: builtin::websearch - provider_id: brave-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true diff --git a/llama_stack/distributions/meta-reference-gpu/build.yaml b/llama_stack/distributions/meta-reference-gpu/build.yaml deleted file mode 100644 index 1513742a7..000000000 --- a/llama_stack/distributions/meta-reference-gpu/build.yaml +++ /dev/null @@ -1,32 +0,0 @@ -version: 2 -distribution_spec: - description: Use Meta Reference for running LLM inference - providers: - inference: - - provider_type: inline::meta-reference - vector_io: - - provider_type: inline::faiss - - provider_type: remote::chromadb - - provider_type: remote::pgvector - safety: - - provider_type: inline::llama-guard - agents: - - provider_type: inline::meta-reference - eval: - - provider_type: inline::meta-reference - datasetio: - - provider_type: remote::huggingface - - provider_type: inline::localfs - scoring: - - provider_type: inline::basic - - provider_type: inline::llm-as-judge - - provider_type: inline::braintrust - tool_runtime: - - provider_type: remote::brave-search - - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - - provider_type: remote::model-context-protocol -image_type: venv -additional_pip_packages: -- aiosqlite -- sqlalchemy[asyncio] diff --git a/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml b/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml deleted file mode 100644 index 2fa9d198b..000000000 --- a/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +++ /dev/null @@ -1,157 +0,0 @@ -version: 2 -image_name: meta-reference-gpu -apis: -- agents -- datasetio -- eval -- inference -- safety -- scoring -- tool_runtime -- vector_io -providers: - inference: - - provider_id: meta-reference-inference - provider_type: inline::meta-reference - config: - model: ${env.INFERENCE_MODEL} - checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:=null} - quantization: - type: ${env.QUANTIZATION_TYPE:=bf16} - model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0} - max_batch_size: ${env.MAX_BATCH_SIZE:=1} - max_seq_len: ${env.MAX_SEQ_LEN:=4096} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - - provider_id: meta-reference-safety - provider_type: inline::meta-reference - config: - model: ${env.SAFETY_MODEL} - checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:=null} - quantization: - type: ${env.QUANTIZATION_TYPE:=bf16} - model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0} - max_batch_size: ${env.MAX_BATCH_SIZE:=1} - max_seq_len: ${env.MAX_SEQ_LEN:=4096} - vector_io: - - provider_id: faiss - provider_type: inline::faiss - config: - persistence: - namespace: vector_io::faiss - backend: kv_default - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence: - agent_state: - namespace: agents - backend: kv_default - responses: - table_name: responses - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - kvstore: - namespace: eval - backend: kv_default - datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: - kvstore: - namespace: datasetio::huggingface - backend: kv_default - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - namespace: datasetio::localfs - backend: kv_default - scoring: - - provider_id: basic - provider_type: inline::basic - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:=} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol -storage: - backends: - kv_default: - type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/kvstore.db - sql_default: - type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db - stores: - metadata: - namespace: registry - backend: kv_default - inference: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - conversations: - table_name: openai_conversations - backend: sql_default - prompts: - namespace: prompts - backend: kv_default -registered_resources: - models: - - metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: meta-reference-inference - model_type: llm - - metadata: {} - model_id: ${env.SAFETY_MODEL} - provider_id: meta-reference-safety - model_type: llm - - metadata: - embedding_dimension: 768 - model_id: nomic-embed-text-v1.5 - provider_id: sentence-transformers - model_type: embedding - shields: - - shield_id: ${env.SAFETY_MODEL} - vector_dbs: [] - datasets: [] - scoring_fns: [] - benchmarks: [] - tool_groups: - - toolgroup_id: builtin::websearch - provider_id: tavily-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true diff --git a/llama_stack/distributions/meta-reference-gpu/run.yaml b/llama_stack/distributions/meta-reference-gpu/run.yaml deleted file mode 100644 index 5c7f75ca8..000000000 --- a/llama_stack/distributions/meta-reference-gpu/run.yaml +++ /dev/null @@ -1,142 +0,0 @@ -version: 2 -image_name: meta-reference-gpu -apis: -- agents -- datasetio -- eval -- inference -- safety -- scoring -- tool_runtime -- vector_io -providers: - inference: - - provider_id: meta-reference-inference - provider_type: inline::meta-reference - config: - model: ${env.INFERENCE_MODEL} - checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:=null} - quantization: - type: ${env.QUANTIZATION_TYPE:=bf16} - model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0} - max_batch_size: ${env.MAX_BATCH_SIZE:=1} - max_seq_len: ${env.MAX_SEQ_LEN:=4096} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - vector_io: - - provider_id: faiss - provider_type: inline::faiss - config: - persistence: - namespace: vector_io::faiss - backend: kv_default - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence: - agent_state: - namespace: agents - backend: kv_default - responses: - table_name: responses - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - kvstore: - namespace: eval - backend: kv_default - datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: - kvstore: - namespace: datasetio::huggingface - backend: kv_default - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - namespace: datasetio::localfs - backend: kv_default - scoring: - - provider_id: basic - provider_type: inline::basic - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:=} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol -storage: - backends: - kv_default: - type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/kvstore.db - sql_default: - type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db - stores: - metadata: - namespace: registry - backend: kv_default - inference: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - conversations: - table_name: openai_conversations - backend: sql_default - prompts: - namespace: prompts - backend: kv_default -registered_resources: - models: - - metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: meta-reference-inference - model_type: llm - - metadata: - embedding_dimension: 768 - model_id: nomic-embed-text-v1.5 - provider_id: sentence-transformers - model_type: embedding - shields: [] - vector_dbs: [] - datasets: [] - scoring_fns: [] - benchmarks: [] - tool_groups: - - toolgroup_id: builtin::websearch - provider_id: tavily-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true diff --git a/llama_stack/distributions/nvidia/build.yaml b/llama_stack/distributions/nvidia/build.yaml deleted file mode 100644 index 8ddd12439..000000000 --- a/llama_stack/distributions/nvidia/build.yaml +++ /dev/null @@ -1,29 +0,0 @@ -version: 2 -distribution_spec: - description: Use NVIDIA NIM for running LLM inference, evaluation and safety - providers: - inference: - - provider_type: remote::nvidia - vector_io: - - provider_type: inline::faiss - safety: - - provider_type: remote::nvidia - agents: - - provider_type: inline::meta-reference - eval: - - provider_type: remote::nvidia - post_training: - - provider_type: remote::nvidia - datasetio: - - provider_type: inline::localfs - - provider_type: remote::nvidia - scoring: - - provider_type: inline::basic - tool_runtime: - - provider_type: inline::rag-runtime - files: - - provider_type: inline::localfs -image_type: venv -additional_pip_packages: -- aiosqlite -- sqlalchemy[asyncio] diff --git a/llama_stack/distributions/nvidia/run-with-safety.yaml b/llama_stack/distributions/nvidia/run-with-safety.yaml deleted file mode 100644 index 1d57ad17a..000000000 --- a/llama_stack/distributions/nvidia/run-with-safety.yaml +++ /dev/null @@ -1,140 +0,0 @@ -version: 2 -image_name: nvidia -apis: -- agents -- datasetio -- eval -- files -- inference -- post_training -- safety -- scoring -- tool_runtime -- vector_io -providers: - inference: - - provider_id: nvidia - provider_type: remote::nvidia - config: - url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} - api_key: ${env.NVIDIA_API_KEY:=} - append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} - - provider_id: nvidia - provider_type: remote::nvidia - config: - guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331} - config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check} - vector_io: - - provider_id: faiss - provider_type: inline::faiss - config: - persistence: - namespace: vector_io::faiss - backend: kv_default - safety: - - provider_id: nvidia - provider_type: remote::nvidia - config: - guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331} - config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check} - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence: - agent_state: - namespace: agents - backend: kv_default - responses: - table_name: responses - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - eval: - - provider_id: nvidia - provider_type: remote::nvidia - config: - evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331} - post_training: - - provider_id: nvidia - provider_type: remote::nvidia - config: - api_key: ${env.NVIDIA_API_KEY:=} - dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default} - project_id: ${env.NVIDIA_PROJECT_ID:=test-project} - customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test} - datasetio: - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - namespace: datasetio::localfs - backend: kv_default - - provider_id: nvidia - provider_type: remote::nvidia - config: - api_key: ${env.NVIDIA_API_KEY:=} - dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default} - project_id: ${env.NVIDIA_PROJECT_ID:=test-project} - datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test} - scoring: - - provider_id: basic - provider_type: inline::basic - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime - files: - - provider_id: meta-reference-files - provider_type: inline::localfs - config: - storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/nvidia/files} - metadata_store: - table_name: files_metadata - backend: sql_default -storage: - backends: - kv_default: - type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/kvstore.db - sql_default: - type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db - stores: - metadata: - namespace: registry - backend: kv_default - inference: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - conversations: - table_name: openai_conversations - backend: sql_default - prompts: - namespace: prompts - backend: kv_default -registered_resources: - models: - - metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: nvidia - model_type: llm - - metadata: {} - model_id: ${env.SAFETY_MODEL} - provider_id: nvidia - model_type: llm - shields: - - shield_id: ${env.SAFETY_MODEL} - provider_id: nvidia - vector_dbs: [] - datasets: [] - scoring_fns: [] - benchmarks: [] - tool_groups: - - toolgroup_id: builtin::rag - provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true diff --git a/llama_stack/distributions/nvidia/run.yaml b/llama_stack/distributions/nvidia/run.yaml deleted file mode 100644 index 8c50b8bfb..000000000 --- a/llama_stack/distributions/nvidia/run.yaml +++ /dev/null @@ -1,119 +0,0 @@ -version: 2 -image_name: nvidia -apis: -- agents -- datasetio -- eval -- files -- inference -- post_training -- safety -- scoring -- tool_runtime -- vector_io -providers: - inference: - - provider_id: nvidia - provider_type: remote::nvidia - config: - url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} - api_key: ${env.NVIDIA_API_KEY:=} - append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} - vector_io: - - provider_id: faiss - provider_type: inline::faiss - config: - persistence: - namespace: vector_io::faiss - backend: kv_default - safety: - - provider_id: nvidia - provider_type: remote::nvidia - config: - guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331} - config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check} - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence: - agent_state: - namespace: agents - backend: kv_default - responses: - table_name: responses - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - eval: - - provider_id: nvidia - provider_type: remote::nvidia - config: - evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331} - post_training: - - provider_id: nvidia - provider_type: remote::nvidia - config: - api_key: ${env.NVIDIA_API_KEY:=} - dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default} - project_id: ${env.NVIDIA_PROJECT_ID:=test-project} - customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test} - datasetio: - - provider_id: nvidia - provider_type: remote::nvidia - config: - api_key: ${env.NVIDIA_API_KEY:=} - dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default} - project_id: ${env.NVIDIA_PROJECT_ID:=test-project} - datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test} - scoring: - - provider_id: basic - provider_type: inline::basic - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime - files: - - provider_id: meta-reference-files - provider_type: inline::localfs - config: - storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/nvidia/files} - metadata_store: - table_name: files_metadata - backend: sql_default -storage: - backends: - kv_default: - type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/kvstore.db - sql_default: - type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db - stores: - metadata: - namespace: registry - backend: kv_default - inference: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - conversations: - table_name: openai_conversations - backend: sql_default - prompts: - namespace: prompts - backend: kv_default -registered_resources: - models: [] - shields: [] - vector_dbs: [] - datasets: [] - scoring_fns: [] - benchmarks: [] - tool_groups: - - toolgroup_id: builtin::rag - provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true diff --git a/llama_stack/distributions/open-benchmark/build.yaml b/llama_stack/distributions/open-benchmark/build.yaml deleted file mode 100644 index 05acd98e3..000000000 --- a/llama_stack/distributions/open-benchmark/build.yaml +++ /dev/null @@ -1,36 +0,0 @@ -version: 2 -distribution_spec: - description: Distribution for running open benchmarks - providers: - inference: - - provider_type: remote::openai - - provider_type: remote::anthropic - - provider_type: remote::gemini - - provider_type: remote::groq - - provider_type: remote::together - vector_io: - - provider_type: inline::sqlite-vec - - provider_type: remote::chromadb - - provider_type: remote::pgvector - safety: - - provider_type: inline::llama-guard - agents: - - provider_type: inline::meta-reference - eval: - - provider_type: inline::meta-reference - datasetio: - - provider_type: remote::huggingface - - provider_type: inline::localfs - scoring: - - provider_type: inline::basic - - provider_type: inline::llm-as-judge - - provider_type: inline::braintrust - tool_runtime: - - provider_type: remote::brave-search - - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - - provider_type: remote::model-context-protocol -image_type: venv -additional_pip_packages: -- aiosqlite -- sqlalchemy[asyncio] diff --git a/llama_stack/distributions/open-benchmark/run.yaml b/llama_stack/distributions/open-benchmark/run.yaml deleted file mode 100644 index 912e48dd3..000000000 --- a/llama_stack/distributions/open-benchmark/run.yaml +++ /dev/null @@ -1,255 +0,0 @@ -version: 2 -image_name: open-benchmark -apis: -- agents -- datasetio -- eval -- inference -- safety -- scoring -- tool_runtime -- vector_io -providers: - inference: - - provider_id: openai - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY:=} - base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1} - - provider_id: anthropic - provider_type: remote::anthropic - config: - api_key: ${env.ANTHROPIC_API_KEY:=} - - provider_id: gemini - provider_type: remote::gemini - config: - api_key: ${env.GEMINI_API_KEY:=} - - provider_id: groq - provider_type: remote::groq - config: - url: https://api.groq.com - api_key: ${env.GROQ_API_KEY:=} - - provider_id: together - provider_type: remote::together - config: - url: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY:=} - vector_io: - - provider_id: sqlite-vec - provider_type: inline::sqlite-vec - config: - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sqlite_vec.db - persistence: - namespace: vector_io::sqlite_vec - backend: kv_default - - provider_id: ${env.ENABLE_CHROMADB:+chromadb} - provider_type: remote::chromadb - config: - url: ${env.CHROMADB_URL:=} - persistence: - namespace: vector_io::chroma_remote - backend: kv_default - - provider_id: ${env.ENABLE_PGVECTOR:+pgvector} - provider_type: remote::pgvector - config: - host: ${env.PGVECTOR_HOST:=localhost} - port: ${env.PGVECTOR_PORT:=5432} - db: ${env.PGVECTOR_DB:=} - user: ${env.PGVECTOR_USER:=} - password: ${env.PGVECTOR_PASSWORD:=} - persistence: - namespace: vector_io::pgvector - backend: kv_default - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence: - agent_state: - namespace: agents - backend: kv_default - responses: - table_name: responses - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - kvstore: - namespace: eval - backend: kv_default - datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: - kvstore: - namespace: datasetio::huggingface - backend: kv_default - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - namespace: datasetio::localfs - backend: kv_default - scoring: - - provider_id: basic - provider_type: inline::basic - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:=} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol -storage: - backends: - kv_default: - type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/kvstore.db - sql_default: - type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sql_store.db - stores: - metadata: - namespace: registry - backend: kv_default - inference: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - conversations: - table_name: openai_conversations - backend: sql_default - prompts: - namespace: prompts - backend: kv_default -registered_resources: - models: - - metadata: {} - model_id: gpt-4o - provider_id: openai - provider_model_id: gpt-4o - model_type: llm - - metadata: {} - model_id: claude-3-5-sonnet-latest - provider_id: anthropic - provider_model_id: claude-3-5-sonnet-latest - model_type: llm - - metadata: {} - model_id: gemini/gemini-1.5-flash - provider_id: gemini - provider_model_id: gemini/gemini-1.5-flash - model_type: llm - - metadata: {} - model_id: meta-llama/Llama-3.3-70B-Instruct - provider_id: groq - provider_model_id: groq/llama-3.3-70b-versatile - model_type: llm - - metadata: {} - model_id: meta-llama/Llama-3.1-405B-Instruct - provider_id: together - provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo - model_type: llm - shields: - - shield_id: meta-llama/Llama-Guard-3-8B - vector_dbs: [] - datasets: - - purpose: eval/messages-answer - source: - type: uri - uri: huggingface://datasets/llamastack/simpleqa?split=train - metadata: {} - dataset_id: simpleqa - - purpose: eval/messages-answer - source: - type: uri - uri: huggingface://datasets/llamastack/mmlu_cot?split=test&name=all - metadata: {} - dataset_id: mmlu_cot - - purpose: eval/messages-answer - source: - type: uri - uri: huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main - metadata: {} - dataset_id: gpqa_cot - - purpose: eval/messages-answer - source: - type: uri - uri: huggingface://datasets/llamastack/math_500?split=test - metadata: {} - dataset_id: math_500 - - purpose: eval/messages-answer - source: - type: uri - uri: huggingface://datasets/llamastack/IfEval?split=train - metadata: {} - dataset_id: ifeval - - purpose: eval/messages-answer - source: - type: uri - uri: huggingface://datasets/llamastack/docvqa?split=val - metadata: {} - dataset_id: docvqa - scoring_fns: [] - benchmarks: - - dataset_id: simpleqa - scoring_functions: - - llm-as-judge::405b-simpleqa - metadata: {} - benchmark_id: meta-reference-simpleqa - - dataset_id: mmlu_cot - scoring_functions: - - basic::regex_parser_multiple_choice_answer - metadata: {} - benchmark_id: meta-reference-mmlu-cot - - dataset_id: gpqa_cot - scoring_functions: - - basic::regex_parser_multiple_choice_answer - metadata: {} - benchmark_id: meta-reference-gpqa-cot - - dataset_id: math_500 - scoring_functions: - - basic::regex_parser_math_response - metadata: {} - benchmark_id: meta-reference-math-500 - - dataset_id: ifeval - scoring_functions: - - basic::ifeval - metadata: {} - benchmark_id: meta-reference-ifeval - - dataset_id: docvqa - scoring_functions: - - basic::docvqa - metadata: {} - benchmark_id: meta-reference-docvqa - tool_groups: - - toolgroup_id: builtin::websearch - provider_id: tavily-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true diff --git a/llama_stack/distributions/postgres-demo/build.yaml b/llama_stack/distributions/postgres-demo/build.yaml deleted file mode 100644 index 063dc3999..000000000 --- a/llama_stack/distributions/postgres-demo/build.yaml +++ /dev/null @@ -1,23 +0,0 @@ -version: 2 -distribution_spec: - description: Quick start template for running Llama Stack with several popular providers - providers: - inference: - - provider_type: remote::vllm - - provider_type: inline::sentence-transformers - vector_io: - - provider_type: remote::chromadb - safety: - - provider_type: inline::llama-guard - agents: - - provider_type: inline::meta-reference - tool_runtime: - - provider_type: remote::brave-search - - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - - provider_type: remote::model-context-protocol -image_type: venv -additional_pip_packages: -- asyncpg -- psycopg2-binary -- sqlalchemy[asyncio] diff --git a/llama_stack/distributions/postgres-demo/run.yaml b/llama_stack/distributions/postgres-demo/run.yaml deleted file mode 100644 index dd1c2bc7f..000000000 --- a/llama_stack/distributions/postgres-demo/run.yaml +++ /dev/null @@ -1,118 +0,0 @@ -version: 2 -image_name: postgres-demo -apis: -- agents -- inference -- safety -- tool_runtime -- vector_io -providers: - inference: - - provider_id: vllm-inference - provider_type: remote::vllm - config: - url: ${env.VLLM_URL:=http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:=4096} - api_token: ${env.VLLM_API_TOKEN:=fake} - tls_verify: ${env.VLLM_TLS_VERIFY:=true} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - vector_io: - - provider_id: ${env.ENABLE_CHROMADB:+chromadb} - provider_type: remote::chromadb - config: - url: ${env.CHROMADB_URL:=} - persistence: - namespace: vector_io::chroma_remote - backend: kv_default - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence: - agent_state: - namespace: agents - backend: kv_default - responses: - table_name: responses - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol -storage: - backends: - kv_default: - type: kv_postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} - sql_default: - type: sql_postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - stores: - metadata: - namespace: registry - backend: kv_default - inference: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - conversations: - table_name: openai_conversations - backend: sql_default - prompts: - namespace: prompts - backend: kv_default -registered_resources: - models: - - metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: vllm-inference - model_type: llm - - metadata: - embedding_dimension: 768 - model_id: nomic-embed-text-v1.5 - provider_id: sentence-transformers - model_type: embedding - shields: - - shield_id: meta-llama/Llama-Guard-3-8B - vector_dbs: [] - datasets: [] - scoring_fns: [] - benchmarks: [] - tool_groups: - - toolgroup_id: builtin::websearch - provider_id: tavily-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true diff --git a/llama_stack/distributions/starter-gpu/build.yaml b/llama_stack/distributions/starter-gpu/build.yaml deleted file mode 100644 index b2e2a0c85..000000000 --- a/llama_stack/distributions/starter-gpu/build.yaml +++ /dev/null @@ -1,60 +0,0 @@ -version: 2 -distribution_spec: - description: Quick start template for running Llama Stack with several popular providers. - This distribution is intended for GPU-enabled environments. - providers: - inference: - - provider_type: remote::cerebras - - provider_type: remote::ollama - - provider_type: remote::vllm - - provider_type: remote::tgi - - provider_type: remote::fireworks - - provider_type: remote::together - - provider_type: remote::bedrock - - provider_type: remote::nvidia - - provider_type: remote::openai - - provider_type: remote::anthropic - - provider_type: remote::gemini - - provider_type: remote::vertexai - - provider_type: remote::groq - - provider_type: remote::sambanova - - provider_type: remote::azure - - provider_type: inline::sentence-transformers - vector_io: - - provider_type: inline::faiss - - provider_type: inline::sqlite-vec - - provider_type: inline::milvus - - provider_type: remote::chromadb - - provider_type: remote::pgvector - - provider_type: remote::qdrant - - provider_type: remote::weaviate - files: - - provider_type: inline::localfs - safety: - - provider_type: inline::llama-guard - - provider_type: inline::code-scanner - agents: - - provider_type: inline::meta-reference - post_training: - - provider_type: inline::huggingface-gpu - eval: - - provider_type: inline::meta-reference - datasetio: - - provider_type: remote::huggingface - - provider_type: inline::localfs - scoring: - - provider_type: inline::basic - - provider_type: inline::llm-as-judge - - provider_type: inline::braintrust - tool_runtime: - - provider_type: remote::brave-search - - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - - provider_type: remote::model-context-protocol - batches: - - provider_type: inline::reference -image_type: venv -additional_pip_packages: -- aiosqlite -- asyncpg -- sqlalchemy[asyncio] diff --git a/llama_stack/distributions/starter-gpu/run.yaml b/llama_stack/distributions/starter-gpu/run.yaml deleted file mode 100644 index 807f0d678..000000000 --- a/llama_stack/distributions/starter-gpu/run.yaml +++ /dev/null @@ -1,284 +0,0 @@ -version: 2 -image_name: starter-gpu -apis: -- agents -- batches -- datasetio -- eval -- files -- inference -- post_training -- safety -- scoring -- tool_runtime -- vector_io -providers: - inference: - - provider_id: ${env.CEREBRAS_API_KEY:+cerebras} - provider_type: remote::cerebras - config: - base_url: https://api.cerebras.ai - api_key: ${env.CEREBRAS_API_KEY:=} - - provider_id: ${env.OLLAMA_URL:+ollama} - provider_type: remote::ollama - config: - url: ${env.OLLAMA_URL:=http://localhost:11434} - - provider_id: ${env.VLLM_URL:+vllm} - provider_type: remote::vllm - config: - url: ${env.VLLM_URL:=} - max_tokens: ${env.VLLM_MAX_TOKENS:=4096} - api_token: ${env.VLLM_API_TOKEN:=fake} - tls_verify: ${env.VLLM_TLS_VERIFY:=true} - - provider_id: ${env.TGI_URL:+tgi} - provider_type: remote::tgi - config: - url: ${env.TGI_URL:=} - - provider_id: fireworks - provider_type: remote::fireworks - config: - url: https://api.fireworks.ai/inference/v1 - api_key: ${env.FIREWORKS_API_KEY:=} - - provider_id: together - provider_type: remote::together - config: - url: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY:=} - - provider_id: bedrock - provider_type: remote::bedrock - - provider_id: ${env.NVIDIA_API_KEY:+nvidia} - provider_type: remote::nvidia - config: - url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} - api_key: ${env.NVIDIA_API_KEY:=} - append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} - - provider_id: openai - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY:=} - base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1} - - provider_id: anthropic - provider_type: remote::anthropic - config: - api_key: ${env.ANTHROPIC_API_KEY:=} - - provider_id: gemini - provider_type: remote::gemini - config: - api_key: ${env.GEMINI_API_KEY:=} - - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai} - provider_type: remote::vertexai - config: - project: ${env.VERTEX_AI_PROJECT:=} - location: ${env.VERTEX_AI_LOCATION:=us-central1} - - provider_id: groq - provider_type: remote::groq - config: - url: https://api.groq.com - api_key: ${env.GROQ_API_KEY:=} - - provider_id: sambanova - provider_type: remote::sambanova - config: - url: https://api.sambanova.ai/v1 - api_key: ${env.SAMBANOVA_API_KEY:=} - - provider_id: ${env.AZURE_API_KEY:+azure} - provider_type: remote::azure - config: - api_key: ${env.AZURE_API_KEY:=} - api_base: ${env.AZURE_API_BASE:=} - api_version: ${env.AZURE_API_VERSION:=} - api_type: ${env.AZURE_API_TYPE:=} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - vector_io: - - provider_id: faiss - provider_type: inline::faiss - config: - persistence: - namespace: vector_io::faiss - backend: kv_default - - provider_id: sqlite-vec - provider_type: inline::sqlite-vec - config: - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec.db - persistence: - namespace: vector_io::sqlite_vec - backend: kv_default - - provider_id: ${env.MILVUS_URL:+milvus} - provider_type: inline::milvus - config: - db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter-gpu}/milvus.db - persistence: - namespace: vector_io::milvus - backend: kv_default - - provider_id: ${env.CHROMADB_URL:+chromadb} - provider_type: remote::chromadb - config: - url: ${env.CHROMADB_URL:=} - persistence: - namespace: vector_io::chroma_remote - backend: kv_default - - provider_id: ${env.PGVECTOR_DB:+pgvector} - provider_type: remote::pgvector - config: - host: ${env.PGVECTOR_HOST:=localhost} - port: ${env.PGVECTOR_PORT:=5432} - db: ${env.PGVECTOR_DB:=} - user: ${env.PGVECTOR_USER:=} - password: ${env.PGVECTOR_PASSWORD:=} - persistence: - namespace: vector_io::pgvector - backend: kv_default - - provider_id: ${env.QDRANT_URL:+qdrant} - provider_type: remote::qdrant - config: - api_key: ${env.QDRANT_API_KEY:=} - persistence: - namespace: vector_io::qdrant_remote - backend: kv_default - - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate} - provider_type: remote::weaviate - config: - weaviate_api_key: null - weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080} - persistence: - namespace: vector_io::weaviate - backend: kv_default - files: - - provider_id: meta-reference-files - provider_type: inline::localfs - config: - storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter-gpu/files} - metadata_store: - table_name: files_metadata - backend: sql_default - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - - provider_id: code-scanner - provider_type: inline::code-scanner - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence: - agent_state: - namespace: agents - backend: kv_default - responses: - table_name: responses - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - post_training: - - provider_id: huggingface-gpu - provider_type: inline::huggingface-gpu - config: - checkpoint_format: huggingface - distributed_backend: null - device: cpu - dpo_output_dir: ~/.llama/distributions/starter-gpu/dpo_output - eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - kvstore: - namespace: eval - backend: kv_default - datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: - kvstore: - namespace: datasetio::huggingface - backend: kv_default - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - namespace: datasetio::localfs - backend: kv_default - scoring: - - provider_id: basic - provider_type: inline::basic - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:=} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - batches: - - provider_id: reference - provider_type: inline::reference - config: - kvstore: - namespace: batches - backend: kv_default -storage: - backends: - kv_default: - type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/kvstore.db - sql_default: - type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sql_store.db - stores: - metadata: - namespace: registry - backend: kv_default - inference: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - conversations: - table_name: openai_conversations - backend: sql_default - prompts: - namespace: prompts - backend: kv_default -registered_resources: - models: [] - shields: - - shield_id: llama-guard - provider_id: ${env.SAFETY_MODEL:+llama-guard} - provider_shield_id: ${env.SAFETY_MODEL:=} - - shield_id: code-scanner - provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} - provider_shield_id: ${env.CODE_SCANNER_MODEL:=} - vector_dbs: [] - datasets: [] - scoring_fns: [] - benchmarks: [] - tool_groups: - - toolgroup_id: builtin::websearch - provider_id: tavily-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true -vector_stores: - default_provider_id: faiss - default_embedding_model: - provider_id: sentence-transformers - model_id: nomic-ai/nomic-embed-text-v1.5 -safety: - default_shield_id: llama-guard diff --git a/llama_stack/distributions/starter/build.yaml b/llama_stack/distributions/starter/build.yaml deleted file mode 100644 index baa80ef3e..000000000 --- a/llama_stack/distributions/starter/build.yaml +++ /dev/null @@ -1,60 +0,0 @@ -version: 2 -distribution_spec: - description: Quick start template for running Llama Stack with several popular providers. - This distribution is intended for CPU-only environments. - providers: - inference: - - provider_type: remote::cerebras - - provider_type: remote::ollama - - provider_type: remote::vllm - - provider_type: remote::tgi - - provider_type: remote::fireworks - - provider_type: remote::together - - provider_type: remote::bedrock - - provider_type: remote::nvidia - - provider_type: remote::openai - - provider_type: remote::anthropic - - provider_type: remote::gemini - - provider_type: remote::vertexai - - provider_type: remote::groq - - provider_type: remote::sambanova - - provider_type: remote::azure - - provider_type: inline::sentence-transformers - vector_io: - - provider_type: inline::faiss - - provider_type: inline::sqlite-vec - - provider_type: inline::milvus - - provider_type: remote::chromadb - - provider_type: remote::pgvector - - provider_type: remote::qdrant - - provider_type: remote::weaviate - files: - - provider_type: inline::localfs - safety: - - provider_type: inline::llama-guard - - provider_type: inline::code-scanner - agents: - - provider_type: inline::meta-reference - post_training: - - provider_type: inline::torchtune-cpu - eval: - - provider_type: inline::meta-reference - datasetio: - - provider_type: remote::huggingface - - provider_type: inline::localfs - scoring: - - provider_type: inline::basic - - provider_type: inline::llm-as-judge - - provider_type: inline::braintrust - tool_runtime: - - provider_type: remote::brave-search - - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - - provider_type: remote::model-context-protocol - batches: - - provider_type: inline::reference -image_type: venv -additional_pip_packages: -- aiosqlite -- asyncpg -- sqlalchemy[asyncio] diff --git a/llama_stack/distributions/starter/run.yaml b/llama_stack/distributions/starter/run.yaml deleted file mode 100644 index eb4652af0..000000000 --- a/llama_stack/distributions/starter/run.yaml +++ /dev/null @@ -1,281 +0,0 @@ -version: 2 -image_name: starter -apis: -- agents -- batches -- datasetio -- eval -- files -- inference -- post_training -- safety -- scoring -- tool_runtime -- vector_io -providers: - inference: - - provider_id: ${env.CEREBRAS_API_KEY:+cerebras} - provider_type: remote::cerebras - config: - base_url: https://api.cerebras.ai - api_key: ${env.CEREBRAS_API_KEY:=} - - provider_id: ${env.OLLAMA_URL:+ollama} - provider_type: remote::ollama - config: - url: ${env.OLLAMA_URL:=http://localhost:11434} - - provider_id: ${env.VLLM_URL:+vllm} - provider_type: remote::vllm - config: - url: ${env.VLLM_URL:=} - max_tokens: ${env.VLLM_MAX_TOKENS:=4096} - api_token: ${env.VLLM_API_TOKEN:=fake} - tls_verify: ${env.VLLM_TLS_VERIFY:=true} - - provider_id: ${env.TGI_URL:+tgi} - provider_type: remote::tgi - config: - url: ${env.TGI_URL:=} - - provider_id: fireworks - provider_type: remote::fireworks - config: - url: https://api.fireworks.ai/inference/v1 - api_key: ${env.FIREWORKS_API_KEY:=} - - provider_id: together - provider_type: remote::together - config: - url: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY:=} - - provider_id: bedrock - provider_type: remote::bedrock - - provider_id: ${env.NVIDIA_API_KEY:+nvidia} - provider_type: remote::nvidia - config: - url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} - api_key: ${env.NVIDIA_API_KEY:=} - append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} - - provider_id: openai - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY:=} - base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1} - - provider_id: anthropic - provider_type: remote::anthropic - config: - api_key: ${env.ANTHROPIC_API_KEY:=} - - provider_id: gemini - provider_type: remote::gemini - config: - api_key: ${env.GEMINI_API_KEY:=} - - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai} - provider_type: remote::vertexai - config: - project: ${env.VERTEX_AI_PROJECT:=} - location: ${env.VERTEX_AI_LOCATION:=us-central1} - - provider_id: groq - provider_type: remote::groq - config: - url: https://api.groq.com - api_key: ${env.GROQ_API_KEY:=} - - provider_id: sambanova - provider_type: remote::sambanova - config: - url: https://api.sambanova.ai/v1 - api_key: ${env.SAMBANOVA_API_KEY:=} - - provider_id: ${env.AZURE_API_KEY:+azure} - provider_type: remote::azure - config: - api_key: ${env.AZURE_API_KEY:=} - api_base: ${env.AZURE_API_BASE:=} - api_version: ${env.AZURE_API_VERSION:=} - api_type: ${env.AZURE_API_TYPE:=} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - vector_io: - - provider_id: faiss - provider_type: inline::faiss - config: - persistence: - namespace: vector_io::faiss - backend: kv_default - - provider_id: sqlite-vec - provider_type: inline::sqlite-vec - config: - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db - persistence: - namespace: vector_io::sqlite_vec - backend: kv_default - - provider_id: ${env.MILVUS_URL:+milvus} - provider_type: inline::milvus - config: - db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db - persistence: - namespace: vector_io::milvus - backend: kv_default - - provider_id: ${env.CHROMADB_URL:+chromadb} - provider_type: remote::chromadb - config: - url: ${env.CHROMADB_URL:=} - persistence: - namespace: vector_io::chroma_remote - backend: kv_default - - provider_id: ${env.PGVECTOR_DB:+pgvector} - provider_type: remote::pgvector - config: - host: ${env.PGVECTOR_HOST:=localhost} - port: ${env.PGVECTOR_PORT:=5432} - db: ${env.PGVECTOR_DB:=} - user: ${env.PGVECTOR_USER:=} - password: ${env.PGVECTOR_PASSWORD:=} - persistence: - namespace: vector_io::pgvector - backend: kv_default - - provider_id: ${env.QDRANT_URL:+qdrant} - provider_type: remote::qdrant - config: - api_key: ${env.QDRANT_API_KEY:=} - persistence: - namespace: vector_io::qdrant_remote - backend: kv_default - - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate} - provider_type: remote::weaviate - config: - weaviate_api_key: null - weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080} - persistence: - namespace: vector_io::weaviate - backend: kv_default - files: - - provider_id: meta-reference-files - provider_type: inline::localfs - config: - storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} - metadata_store: - table_name: files_metadata - backend: sql_default - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - - provider_id: code-scanner - provider_type: inline::code-scanner - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence: - agent_state: - namespace: agents - backend: kv_default - responses: - table_name: responses - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - post_training: - - provider_id: torchtune-cpu - provider_type: inline::torchtune-cpu - config: - checkpoint_format: meta - eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - kvstore: - namespace: eval - backend: kv_default - datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: - kvstore: - namespace: datasetio::huggingface - backend: kv_default - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - namespace: datasetio::localfs - backend: kv_default - scoring: - - provider_id: basic - provider_type: inline::basic - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:=} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - batches: - - provider_id: reference - provider_type: inline::reference - config: - kvstore: - namespace: batches - backend: kv_default -storage: - backends: - kv_default: - type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/kvstore.db - sql_default: - type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sql_store.db - stores: - metadata: - namespace: registry - backend: kv_default - inference: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - conversations: - table_name: openai_conversations - backend: sql_default - prompts: - namespace: prompts - backend: kv_default -registered_resources: - models: [] - shields: - - shield_id: llama-guard - provider_id: ${env.SAFETY_MODEL:+llama-guard} - provider_shield_id: ${env.SAFETY_MODEL:=} - - shield_id: code-scanner - provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} - provider_shield_id: ${env.CODE_SCANNER_MODEL:=} - vector_dbs: [] - datasets: [] - scoring_fns: [] - benchmarks: [] - tool_groups: - - toolgroup_id: builtin::websearch - provider_id: tavily-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true -vector_stores: - default_provider_id: faiss - default_embedding_model: - provider_id: sentence-transformers - model_id: nomic-ai/nomic-embed-text-v1.5 -safety: - default_shield_id: llama-guard diff --git a/llama_stack/distributions/watsonx/build.yaml b/llama_stack/distributions/watsonx/build.yaml deleted file mode 100644 index dba1a94e2..000000000 --- a/llama_stack/distributions/watsonx/build.yaml +++ /dev/null @@ -1,33 +0,0 @@ -version: 2 -distribution_spec: - description: Use watsonx for running LLM inference - providers: - inference: - - provider_type: remote::watsonx - - provider_type: inline::sentence-transformers - vector_io: - - provider_type: inline::faiss - safety: - - provider_type: inline::llama-guard - agents: - - provider_type: inline::meta-reference - eval: - - provider_type: inline::meta-reference - datasetio: - - provider_type: remote::huggingface - - provider_type: inline::localfs - scoring: - - provider_type: inline::basic - - provider_type: inline::llm-as-judge - - provider_type: inline::braintrust - tool_runtime: - - provider_type: remote::brave-search - - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - - provider_type: remote::model-context-protocol - files: - - provider_type: inline::localfs -image_type: venv -additional_pip_packages: -- aiosqlite -- sqlalchemy[asyncio] diff --git a/llama_stack/distributions/watsonx/run.yaml b/llama_stack/distributions/watsonx/run.yaml deleted file mode 100644 index 8456115d2..000000000 --- a/llama_stack/distributions/watsonx/run.yaml +++ /dev/null @@ -1,136 +0,0 @@ -version: 2 -image_name: watsonx -apis: -- agents -- datasetio -- eval -- files -- inference -- safety -- scoring -- tool_runtime -- vector_io -providers: - inference: - - provider_id: watsonx - provider_type: remote::watsonx - config: - url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com} - api_key: ${env.WATSONX_API_KEY:=} - project_id: ${env.WATSONX_PROJECT_ID:=} - vector_io: - - provider_id: faiss - provider_type: inline::faiss - config: - persistence: - namespace: vector_io::faiss - backend: kv_default - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence: - agent_state: - namespace: agents - backend: kv_default - responses: - table_name: responses - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - kvstore: - namespace: eval - backend: kv_default - datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: - kvstore: - namespace: datasetio::huggingface - backend: kv_default - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - namespace: datasetio::localfs - backend: kv_default - scoring: - - provider_id: basic - provider_type: inline::basic - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:=} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - files: - - provider_id: meta-reference-files - provider_type: inline::localfs - config: - storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/watsonx/files} - metadata_store: - table_name: files_metadata - backend: sql_default -storage: - backends: - kv_default: - type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/kvstore.db - sql_default: - type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/sql_store.db - stores: - metadata: - namespace: registry - backend: kv_default - inference: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - conversations: - table_name: openai_conversations - backend: sql_default - prompts: - namespace: prompts - backend: kv_default -registered_resources: - models: [] - shields: [] - vector_dbs: [] - datasets: [] - scoring_fns: [] - benchmarks: [] - tool_groups: - - toolgroup_id: builtin::websearch - provider_id: tavily-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true diff --git a/scripts/distro_codegen.py b/scripts/distro_codegen.py index 68190c7f5..4dbdda5c4 100755 --- a/scripts/distro_codegen.py +++ b/scripts/distro_codegen.py @@ -55,7 +55,7 @@ def process_distro(distro_dir: Path, progress, change_tracker: ChangedPathTracke if template_func := getattr(module, "get_distribution_template", None): distro = template_func() - yaml_output_dir = REPO_ROOT / "llama_stack" / "distributions" / distro.name + yaml_output_dir = REPO_ROOT / "src" / "llama_stack" / "distributions" / distro.name doc_output_dir = REPO_ROOT / "docs/docs/distributions" / f"{distro.distro_type}_distro" change_tracker.add_paths(yaml_output_dir, doc_output_dir) distro.save_distribution( diff --git a/src/llama_stack/distributions/dell/run-with-safety.yaml b/src/llama_stack/distributions/dell/run-with-safety.yaml index 2563f2f4b..e0da8060d 100644 --- a/src/llama_stack/distributions/dell/run-with-safety.yaml +++ b/src/llama_stack/distributions/dell/run-with-safety.yaml @@ -109,6 +109,9 @@ storage: conversations: table_name: openai_conversations backend: sql_default + prompts: + namespace: prompts + backend: kv_default registered_resources: models: - metadata: {} diff --git a/src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml b/src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml index 01b5db4f9..2fa9d198b 100644 --- a/src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +++ b/src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml @@ -122,6 +122,9 @@ storage: conversations: table_name: openai_conversations backend: sql_default + prompts: + namespace: prompts + backend: kv_default registered_resources: models: - metadata: {} diff --git a/src/llama_stack/distributions/nvidia/run-with-safety.yaml b/src/llama_stack/distributions/nvidia/run-with-safety.yaml index c23d0f9cb..1d57ad17a 100644 --- a/src/llama_stack/distributions/nvidia/run-with-safety.yaml +++ b/src/llama_stack/distributions/nvidia/run-with-safety.yaml @@ -111,6 +111,9 @@ storage: conversations: table_name: openai_conversations backend: sql_default + prompts: + namespace: prompts + backend: kv_default registered_resources: models: - metadata: {}