From 7ce0c5c5dc478a50c3c0991f394e4387f90fe15b Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Mon, 10 Nov 2025 15:54:00 -0800 Subject: [PATCH] more fixes to postgres-store run yaml ugh --- .../ci-tests/run-with-postgres-store.yaml | 25 +++- .../starter-gpu/run-with-postgres-store.yaml | 25 +++- .../starter/run-with-postgres-store.yaml | 25 +++- .../distributions/starter/starter.py | 115 +++++------------- 4 files changed, 89 insertions(+), 101 deletions(-) diff --git a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml index 94a62aa5a..5384b58fe 100644 --- a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml @@ -170,7 +170,7 @@ providers: namespace: agents backend: kv_default responses: - table_name: agent_responses + table_name: responses backend: sql_default max_write_queue_size: 10000 num_writers: 4 @@ -230,8 +230,6 @@ providers: kvstore: namespace: batches backend: kv_default - max_concurrent_batches: 1 - max_concurrent_requests_per_batch: 10 storage: backends: kv_default: @@ -266,13 +264,30 @@ storage: backend: kv_default registered_resources: models: [] - shields: [] + shields: + - shield_id: llama-guard + provider_id: ${env.SAFETY_MODEL:+llama-guard} + provider_shield_id: ${env.SAFETY_MODEL:=} + - shield_id: code-scanner + provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} + provider_shield_id: ${env.CODE_SCANNER_MODEL:=} vector_dbs: [] datasets: [] scoring_fns: [] benchmarks: [] - tool_groups: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime server: port: 8321 telemetry: enabled: true +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 +safety: + default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml index 2fe8f80d0..e29ada6f4 100644 --- a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml @@ -170,7 +170,7 @@ providers: namespace: agents backend: kv_default responses: - table_name: agent_responses + table_name: responses backend: sql_default max_write_queue_size: 10000 num_writers: 4 @@ -233,8 +233,6 @@ providers: kvstore: namespace: batches backend: kv_default - max_concurrent_batches: 1 - max_concurrent_requests_per_batch: 10 storage: backends: kv_default: @@ -269,13 +267,30 @@ storage: backend: kv_default registered_resources: models: [] - shields: [] + shields: + - shield_id: llama-guard + provider_id: ${env.SAFETY_MODEL:+llama-guard} + provider_shield_id: ${env.SAFETY_MODEL:=} + - shield_id: code-scanner + provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} + provider_shield_id: ${env.CODE_SCANNER_MODEL:=} vector_dbs: [] datasets: [] scoring_fns: [] benchmarks: [] - tool_groups: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime server: port: 8321 telemetry: enabled: true +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 +safety: + default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml index 1402db2f8..437674bf9 100644 --- a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml @@ -170,7 +170,7 @@ providers: namespace: agents backend: kv_default responses: - table_name: agent_responses + table_name: responses backend: sql_default max_write_queue_size: 10000 num_writers: 4 @@ -230,8 +230,6 @@ providers: kvstore: namespace: batches backend: kv_default - max_concurrent_batches: 1 - max_concurrent_requests_per_batch: 10 storage: backends: kv_default: @@ -266,13 +264,30 @@ storage: backend: kv_default registered_resources: models: [] - shields: [] + shields: + - shield_id: llama-guard + provider_id: ${env.SAFETY_MODEL:+llama-guard} + provider_shield_id: ${env.SAFETY_MODEL:=} + - shield_id: code-scanner + provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} + provider_shield_id: ${env.CODE_SCANNER_MODEL:=} vector_dbs: [] datasets: [] scoring_fns: [] benchmarks: [] - tool_groups: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime server: port: 8321 telemetry: enabled: true +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 +safety: + default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/starter/starter.py b/src/llama_stack/distributions/starter/starter.py index 917a15fec..7b7773289 100644 --- a/src/llama_stack/distributions/starter/starter.py +++ b/src/llama_stack/distributions/starter/starter.py @@ -17,22 +17,9 @@ from llama_stack.core.datatypes import ( ToolGroupInput, VectorStoresConfig, ) -from llama_stack.core.storage.datatypes import ( - InferenceStoreReference, - KVStoreReference, - ResponsesStoreReference, - SqlStoreReference, -) from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings from llama_stack.providers.datatypes import RemoteProviderSpec -from llama_stack.providers.inline.agents.meta_reference.config import ( - AgentPersistenceConfig, - MetaReferenceAgentsImplConfig, -) -from llama_stack.providers.inline.batches.reference.config import ( - ReferenceBatchesImplConfig, -) from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, @@ -254,6 +241,33 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: "files": [files_provider], } + base_run_settings = RunConfigSettings( + provider_overrides=default_overrides, + default_models=[], + default_tool_groups=default_tool_groups, + default_shields=default_shields, + vector_stores_config=VectorStoresConfig( + default_provider_id="faiss", + default_embedding_model=QualifiedModel( + provider_id="sentence-transformers", + model_id="nomic-ai/nomic-embed-text-v1.5", + ), + ), + safety_config=SafetyConfig( + default_shield_id="llama-guard", + ), + ) + + postgres_run_settings = base_run_settings.model_copy( + update={ + "storage_backends": { + "kv_default": postgres_kv_config, + "sql_default": postgres_sql_config, + } + }, + deep=True, + ) + return DistributionTemplate( name=name, distro_type="self_hosted", @@ -263,79 +277,8 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: providers=providers, additional_pip_packages=list(set(PostgresSqlStoreConfig.pip_packages() + PostgresKVStoreConfig.pip_packages())), run_configs={ - "run.yaml": RunConfigSettings( - provider_overrides=default_overrides, - default_models=[], - default_tool_groups=default_tool_groups, - default_shields=default_shields, - vector_stores_config=VectorStoresConfig( - default_provider_id="faiss", - default_embedding_model=QualifiedModel( - provider_id="sentence-transformers", - model_id="nomic-ai/nomic-embed-text-v1.5", - ), - ), - safety_config=SafetyConfig( - default_shield_id="llama-guard", - ), - ), - "run-with-postgres-store.yaml": RunConfigSettings( - provider_overrides={ - **default_overrides, - "agents": [ - Provider( - provider_id="meta-reference", - provider_type="inline::meta-reference", - config=MetaReferenceAgentsImplConfig( - persistence=AgentPersistenceConfig( - agent_state=KVStoreReference( - backend="kv_default", - namespace="agents", - ), - responses=ResponsesStoreReference( - backend="sql_default", - table_name="agent_responses", - ), - ), - ).model_dump(exclude_none=True), - ) - ], - "batches": [ - Provider( - provider_id="reference", - provider_type="inline::reference", - config=ReferenceBatchesImplConfig( - kvstore=KVStoreReference( - backend="kv_default", - namespace="batches", - ), - ).model_dump(exclude_none=True), - ) - ], - }, - storage_backends={ - "kv_default": postgres_kv_config, - "sql_default": postgres_sql_config, - }, - storage_stores={ - "metadata": KVStoreReference( - backend="kv_default", - namespace="registry", - ).model_dump(exclude_none=True), - "inference": InferenceStoreReference( - backend="sql_default", - table_name="inference_store", - ).model_dump(exclude_none=True), - "conversations": SqlStoreReference( - backend="sql_default", - table_name="openai_conversations", - ).model_dump(exclude_none=True), - "prompts": KVStoreReference( - backend="kv_default", - namespace="prompts", - ).model_dump(exclude_none=True), - }, - ), + "run.yaml": base_run_settings, + "run-with-postgres-store.yaml": postgres_run_settings, }, run_config_env_vars={ "LLAMA_STACK_PORT": (