From 81ad240faa48d2a2d91e5fbfc3dda21443432a6f Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 9 Sep 2025 23:00:50 -0700 Subject: [PATCH] fix(k8s): unwedge run.yaml to add files --- .../k8s-benchmark/stack-configmap.yaml | 19 +- .../k8s-benchmark/stack_run_config.yaml | 9 + .../distributions/k8s/stack-configmap.yaml | 182 +++++------------- .../distributions/k8s/stack_run_config.yaml | 9 + 4 files changed, 77 insertions(+), 142 deletions(-) diff --git a/docs/source/distributions/k8s-benchmark/stack-configmap.yaml b/docs/source/distributions/k8s-benchmark/stack-configmap.yaml index edf4ebd75..bf6109b68 100644 --- a/docs/source/distributions/k8s-benchmark/stack-configmap.yaml +++ b/docs/source/distributions/k8s-benchmark/stack-configmap.yaml @@ -6,6 +6,7 @@ data: apis: - agents - inference + - files - safety - telemetry - tool_runtime @@ -19,13 +20,6 @@ data: max_tokens: ${env.VLLM_MAX_TOKENS:=4096} api_token: ${env.VLLM_API_TOKEN:=fake} tls_verify: ${env.VLLM_TLS_VERIFY:=true} - - provider_id: vllm-safety - provider_type: remote::vllm - config: - url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:=4096} - api_token: ${env.VLLM_API_TOKEN:=fake} - tls_verify: ${env.VLLM_TLS_VERIFY:=true} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -41,6 +35,14 @@ data: db: ${env.POSTGRES_DB:=llamastack} user: ${env.POSTGRES_USER:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack} + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} + metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -111,9 +113,6 @@ data: - model_id: ${env.INFERENCE_MODEL} provider_id: vllm-inference model_type: llm - - model_id: ${env.SAFETY_MODEL} - provider_id: vllm-safety - model_type: llm shields: - shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} vector_dbs: [] diff --git a/docs/source/distributions/k8s-benchmark/stack_run_config.yaml b/docs/source/distributions/k8s-benchmark/stack_run_config.yaml index 5a810639e..f8ff7811b 100644 --- a/docs/source/distributions/k8s-benchmark/stack_run_config.yaml +++ b/docs/source/distributions/k8s-benchmark/stack_run_config.yaml @@ -3,6 +3,7 @@ image_name: kubernetes-benchmark-demo apis: - agents - inference +- files - safety - telemetry - tool_runtime @@ -31,6 +32,14 @@ providers: db: ${env.POSTGRES_DB:=llamastack} user: ${env.POSTGRES_USER:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack} + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} + metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db safety: - provider_id: llama-guard provider_type: inline::llama-guard diff --git a/docs/source/distributions/k8s/stack-configmap.yaml b/docs/source/distributions/k8s/stack-configmap.yaml index 4f95554e3..3dbb0da97 100644 --- a/docs/source/distributions/k8s/stack-configmap.yaml +++ b/docs/source/distributions/k8s/stack-configmap.yaml @@ -1,137 +1,55 @@ apiVersion: v1 data: - stack_run_config.yaml: | - version: '2' - image_name: kubernetes-demo - apis: - - agents - - inference - - safety - - telemetry - - tool_runtime - - vector_io - providers: - inference: - - provider_id: vllm-inference - provider_type: remote::vllm - config: - url: ${env.VLLM_URL:=http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:=4096} - api_token: ${env.VLLM_API_TOKEN:=fake} - tls_verify: ${env.VLLM_TLS_VERIFY:=true} - - provider_id: vllm-safety - provider_type: remote::vllm - config: - url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:=4096} - api_token: ${env.VLLM_API_TOKEN:=fake} - tls_verify: ${env.VLLM_TLS_VERIFY:=true} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} - vector_io: - - provider_id: ${env.ENABLE_CHROMADB:+chromadb} - provider_type: remote::chromadb - config: - url: ${env.CHROMADB_URL:=} - kvstore: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - responses_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" - sinks: ${env.TELEMETRY_SINKS:=console} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:+} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:+} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} - metadata_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - table_name: llamastack_kvstore - inference_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - models: - - metadata: - embedding_dimension: 384 - model_id: all-MiniLM-L6-v2 - provider_id: sentence-transformers - model_type: embedding - - metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: vllm-inference - model_type: llm - - metadata: {} - model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} - provider_id: vllm-safety - model_type: llm - shields: - - shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} - vector_dbs: [] - datasets: [] - scoring_fns: [] - benchmarks: [] - tool_groups: - - toolgroup_id: builtin::websearch - provider_id: tavily-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime - server: - port: 8321 - auth: - provider_config: - type: github_token + stack_run_config.yaml: "version: '2'\nimage_name: kubernetes-demo\napis:\n- agents\n- + inference\n- files\n- safety\n- telemetry\n- tool_runtime\n- vector_io\nproviders:\n + \ inference:\n - provider_id: vllm-inference\n provider_type: remote::vllm\n + \ config:\n url: ${env.VLLM_URL:=http://localhost:8000/v1}\n max_tokens: + ${env.VLLM_MAX_TOKENS:=4096}\n api_token: ${env.VLLM_API_TOKEN:=fake}\n tls_verify: + ${env.VLLM_TLS_VERIFY:=true}\n - provider_id: vllm-safety\n provider_type: + remote::vllm\n config:\n url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}\n + \ max_tokens: ${env.VLLM_MAX_TOKENS:=4096}\n api_token: ${env.VLLM_API_TOKEN:=fake}\n + \ tls_verify: ${env.VLLM_TLS_VERIFY:=true}\n - provider_id: sentence-transformers\n + \ provider_type: inline::sentence-transformers\n config: {}\n vector_io:\n + \ - provider_id: ${env.ENABLE_CHROMADB:+chromadb}\n provider_type: remote::chromadb\n + \ config:\n url: ${env.CHROMADB_URL:=}\n kvstore:\n type: postgres\n + \ host: ${env.POSTGRES_HOST:=localhost}\n port: ${env.POSTGRES_PORT:=5432}\n + \ db: ${env.POSTGRES_DB:=llamastack}\n user: ${env.POSTGRES_USER:=llamastack}\n + \ password: ${env.POSTGRES_PASSWORD:=llamastack}\n files:\n - provider_id: + meta-reference-files\n provider_type: inline::localfs\n config:\n storage_dir: + ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}\n metadata_store:\n + \ type: sqlite\n db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db + \ \n safety:\n - provider_id: llama-guard\n provider_type: inline::llama-guard\n + \ config:\n excluded_categories: []\n agents:\n - provider_id: meta-reference\n + \ provider_type: inline::meta-reference\n config:\n persistence_store:\n + \ type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n port: + ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n user: + ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n + \ responses_store:\n type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n + \ port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n + \ user: ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n + \ telemetry:\n - provider_id: meta-reference\n provider_type: inline::meta-reference\n + \ config:\n service_name: \"${env.OTEL_SERVICE_NAME:=\\u200B}\"\n sinks: + ${env.TELEMETRY_SINKS:=console}\n tool_runtime:\n - provider_id: brave-search\n + \ provider_type: remote::brave-search\n config:\n api_key: ${env.BRAVE_SEARCH_API_KEY:+}\n + \ max_results: 3\n - provider_id: tavily-search\n provider_type: remote::tavily-search\n + \ config:\n api_key: ${env.TAVILY_SEARCH_API_KEY:+}\n max_results: + 3\n - provider_id: rag-runtime\n provider_type: inline::rag-runtime\n config: + {}\n - provider_id: model-context-protocol\n provider_type: remote::model-context-protocol\n + \ config: {}\nmetadata_store:\n type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n + \ port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n user: + ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n + \ table_name: llamastack_kvstore\ninference_store:\n type: postgres\n host: + ${env.POSTGRES_HOST:=localhost}\n port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n + \ user: ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\nmodels:\n- + metadata:\n embedding_dimension: 384\n model_id: all-MiniLM-L6-v2\n provider_id: + sentence-transformers\n model_type: embedding\n- metadata: {}\n model_id: ${env.INFERENCE_MODEL}\n + \ provider_id: vllm-inference\n model_type: llm\n- metadata: {}\n model_id: + ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}\n provider_id: vllm-safety\n + \ model_type: llm\nshields:\n- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}\nvector_dbs: + []\ndatasets: []\nscoring_fns: []\nbenchmarks: []\ntool_groups:\n- toolgroup_id: + builtin::websearch\n provider_id: tavily-search\n- toolgroup_id: builtin::rag\n + \ provider_id: rag-runtime\nserver:\n port: 8321\n auth:\n provider_config:\n + \ type: github_token\n" kind: ConfigMap metadata: creationTimestamp: null diff --git a/docs/source/distributions/k8s/stack_run_config.yaml b/docs/source/distributions/k8s/stack_run_config.yaml index a2d65e1a9..b841ab977 100644 --- a/docs/source/distributions/k8s/stack_run_config.yaml +++ b/docs/source/distributions/k8s/stack_run_config.yaml @@ -3,6 +3,7 @@ image_name: kubernetes-demo apis: - agents - inference +- files - safety - telemetry - tool_runtime @@ -38,6 +39,14 @@ providers: db: ${env.POSTGRES_DB:=llamastack} user: ${env.POSTGRES_USER:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack} + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} + metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db safety: - provider_id: llama-guard provider_type: inline::llama-guard