From a58c0639d52a53bf36ebdb7971f6f6de800c25f2 Mon Sep 17 00:00:00 2001 From: ehhuang Date: Wed, 4 Jun 2025 17:41:27 -0700 Subject: [PATCH] chore: update postgres_demo distro config (#2396) # What does this PR do? ## Test Plan --- .../templates/postgres-demo/build.yaml | 2 +- .../templates/postgres-demo/postgres_demo.py | 71 ++++------ llama_stack/templates/postgres-demo/run.yaml | 133 ++---------------- 3 files changed, 37 insertions(+), 169 deletions(-) diff --git a/llama_stack/templates/postgres-demo/build.yaml b/llama_stack/templates/postgres-demo/build.yaml index 8f3648abe..a7dee0787 100644 --- a/llama_stack/templates/postgres-demo/build.yaml +++ b/llama_stack/templates/postgres-demo/build.yaml @@ -3,8 +3,8 @@ distribution_spec: description: Quick start template for running Llama Stack with several popular providers providers: inference: - - remote::fireworks - remote::vllm + - inline::sentence-transformers vector_io: - remote::chromadb safety: diff --git a/llama_stack/templates/postgres-demo/postgres_demo.py b/llama_stack/templates/postgres-demo/postgres_demo.py index 2edd6d0e7..759281567 100644 --- a/llama_stack/templates/postgres-demo/postgres_demo.py +++ b/llama_stack/templates/postgres-demo/postgres_demo.py @@ -5,64 +5,36 @@ # the root directory of this source tree. +from llama_stack.apis.models.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, ShieldInput, ToolGroupInput, ) -from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig -from llama_stack.providers.remote.inference.fireworks.models import ( - MODEL_ENTRIES as FIREWORKS_MODEL_ENTRIES, -) +from llama_stack.providers.inline.inference.sentence_transformers import SentenceTransformersInferenceConfig from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig -from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig from llama_stack.templates.template import ( DistributionTemplate, RunConfigSettings, - get_model_registry, ) -def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderModelEntry]]]: - # in this template, we allow each API key to be optional - providers = [ - ( - "fireworks", - FIREWORKS_MODEL_ENTRIES, - FireworksImplConfig.sample_run_config(api_key="${env.FIREWORKS_API_KEY:}"), - ), - ] - inference_providers = [] - available_models = {} - for provider_id, model_entries, config in providers: - inference_providers.append( - Provider( - provider_id=provider_id, - provider_type=f"remote::{provider_id}", - config=config, - ) - ) - available_models[provider_id] = model_entries - inference_providers.append( +def get_distribution_template() -> DistributionTemplate: + inference_providers = [ Provider( provider_id="vllm-inference", provider_type="remote::vllm", config=VLLMInferenceAdapterConfig.sample_run_config( url="${env.VLLM_URL:http://localhost:8000/v1}", ), - ) - ) - return inference_providers, available_models - - -def get_distribution_template() -> DistributionTemplate: - inference_providers, available_models = get_inference_providers() + ), + ] providers = { - "inference": ([p.provider_type for p in inference_providers]), + "inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]), "vector_io": ["remote::chromadb"], "safety": ["inline::llama-guard"], "agents": ["inline::meta-reference"], @@ -94,12 +66,24 @@ def get_distribution_template() -> DistributionTemplate: ), ] - default_models = get_model_registry(available_models) - default_models.append( + default_models = [ ModelInput( model_id="${env.INFERENCE_MODEL}", provider_id="vllm-inference", ) + ] + embedding_provider = Provider( + provider_id="sentence-transformers", + provider_type="inline::sentence-transformers", + config=SentenceTransformersInferenceConfig.sample_run_config(), + ) + embedding_model = ModelInput( + model_id="all-MiniLM-L6-v2", + provider_id=embedding_provider.provider_id, + model_type=ModelType.embedding, + metadata={ + "embedding_dimension": 384, + }, ) postgres_config = PostgresSqlStoreConfig.sample_run_config() return DistributionTemplate( @@ -109,11 +93,11 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=None, providers=providers, - available_models_by_provider=available_models, + available_models_by_provider={}, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ - "inference": inference_providers, + "inference": inference_providers + [embedding_provider], "vector_io": vector_io_providers, "agents": [ Provider( @@ -131,12 +115,13 @@ def get_distribution_template() -> DistributionTemplate: provider_type="inline::meta-reference", config=dict( service_name="${env.OTEL_SERVICE_NAME:}", - sinks="${env.TELEMETRY_SINKS:console}", + sinks="${env.TELEMETRY_SINKS:console,otel_trace}", + otel_trace_endpoint="${env.OTEL_TRACE_ENDPOINT:http://localhost:4318/v1/traces}", ), ) ], }, - default_models=default_models, + default_models=default_models + [embedding_model], default_tool_groups=default_tool_groups, default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], metadata_store=PostgresKVStoreConfig.sample_run_config(), @@ -148,9 +133,5 @@ def get_distribution_template() -> DistributionTemplate: "8321", "Port for the Llama Stack distribution server", ), - "FIREWORKS_API_KEY": ( - "", - "Fireworks API Key", - ), }, ) diff --git a/llama_stack/templates/postgres-demo/run.yaml b/llama_stack/templates/postgres-demo/run.yaml index 9dc366434..0e0d020b2 100644 --- a/llama_stack/templates/postgres-demo/run.yaml +++ b/llama_stack/templates/postgres-demo/run.yaml @@ -9,11 +9,6 @@ apis: - vector_io providers: inference: - - provider_id: fireworks - provider_type: remote::fireworks - config: - url: https://api.fireworks.ai/inference/v1 - api_key: ${env.FIREWORKS_API_KEY:} - provider_id: vllm-inference provider_type: remote::vllm config: @@ -21,6 +16,9 @@ providers: max_tokens: ${env.VLLM_MAX_TOKENS:4096} api_token: ${env.VLLM_API_TOKEN:fake} tls_verify: ${env.VLLM_TLS_VERIFY:true} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} vector_io: - provider_id: ${env.ENABLE_CHROMADB+chromadb} provider_type: remote::chromadb @@ -54,7 +52,8 @@ providers: provider_type: inline::meta-reference config: service_name: ${env.OTEL_SERVICE_NAME:} - sinks: ${env.TELEMETRY_SINKS:console} + sinks: ${env.TELEMETRY_SINKS:console,otel_trace} + otel_trace_endpoint: ${env.OTEL_TRACE_ENDPOINT:http://localhost:4318/v1/traces} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search @@ -88,127 +87,15 @@ inference_store: user: ${env.POSTGRES_USER:llamastack} password: ${env.POSTGRES_PASSWORD:llamastack} models: -- metadata: {} - model_id: accounts/fireworks/models/llama-v3p1-8b-instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.1-8B-Instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct - model_type: llm -- metadata: {} - model_id: accounts/fireworks/models/llama-v3p1-70b-instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.1-70B-Instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct - model_type: llm -- metadata: {} - model_id: accounts/fireworks/models/llama-v3p1-405b-instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct - model_type: llm -- metadata: {} - model_id: accounts/fireworks/models/llama-v3p2-3b-instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-3B-Instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct - model_type: llm -- metadata: {} - model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-11B-Vision-Instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct - model_type: llm -- metadata: {} - model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-90B-Vision-Instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct - model_type: llm -- metadata: {} - model_id: accounts/fireworks/models/llama-v3p3-70b-instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.3-70B-Instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct - model_type: llm -- metadata: {} - model_id: accounts/fireworks/models/llama-guard-3-8b - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-guard-3-8b - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-Guard-3-8B - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-guard-3-8b - model_type: llm -- metadata: {} - model_id: accounts/fireworks/models/llama-guard-3-11b-vision - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-Guard-3-11B-Vision - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision - model_type: llm -- metadata: {} - model_id: accounts/fireworks/models/llama4-scout-instruct-basic - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic - model_type: llm -- metadata: {} - model_id: accounts/fireworks/models/llama4-maverick-instruct-basic - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic - model_type: llm -- metadata: - embedding_dimension: 768 - context_length: 8192 - model_id: nomic-ai/nomic-embed-text-v1.5 - provider_id: fireworks - provider_model_id: nomic-ai/nomic-embed-text-v1.5 - model_type: embedding - metadata: {} model_id: ${env.INFERENCE_MODEL} provider_id: vllm-inference model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: sentence-transformers + model_type: embedding shields: - shield_id: meta-llama/Llama-Guard-3-8B vector_dbs: []