mirror of
				https://github.com/meta-llama/llama-stack.git
				synced 2025-10-22 08:17:18 +00:00 
			
		
		
		
	
		
			Some checks failed
		
		
	
	Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
				
			Python Package Build Test / build (3.13) (push) Failing after 3s
				
			Vector IO Integration Tests / test-matrix (push) Failing after 6s
				
			Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 5s
				
			SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 8s
				
			SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 13s
				
			Unit Tests / unit-tests (3.13) (push) Failing after 4s
				
			Test External API and Providers / test-external (venv) (push) Failing after 7s
				
			Unit Tests / unit-tests (3.12) (push) Failing after 6s
				
			Python Package Build Test / build (3.12) (push) Failing after 10s
				
			Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 18s
				
			API Conformance Tests / check-schema-compatibility (push) Successful in 22s
				
			UI Tests / ui-tests (22) (push) Successful in 29s
				
			Pre-commit / pre-commit (push) Successful in 1m25s
				
			# What does this PR do? As shown in #3421, we can scale stack to handle more RPS with k8s replicas. This PR enables multi process stack with uvicorn --workers so that we can achieve the same scaling without being in k8s. To achieve that we refactor main to split out the app construction logic. This method needs to be non-async. We created a new `Stack` class to house impls and have a `start()` method to be called in lifespan to start background tasks instead of starting them in the old `construct_stack`. This way we avoid having to manage an event loop manually. ## Test Plan CI > uv run --with llama-stack python -m llama_stack.core.server.server benchmarking/k8s-benchmark/stack_run_config.yaml works. > LLAMA_STACK_CONFIG=benchmarking/k8s-benchmark/stack_run_config.yaml uv run uvicorn llama_stack.core.server.server:create_app --port 8321 --workers 4 works.
		
			
				
	
	
		
			141 lines
		
	
	
	
		
			4.6 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			141 lines
		
	
	
	
		
			4.6 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
| apiVersion: v1
 | |
| data:
 | |
|   stack_run_config.yaml: |
 | |
|     version: '2'
 | |
|     image_name: kubernetes-benchmark-demo
 | |
|     apis:
 | |
|     - agents
 | |
|     - files
 | |
|     - inference
 | |
|     - files
 | |
|     - safety
 | |
|     - telemetry
 | |
|     - tool_runtime
 | |
|     - vector_io
 | |
|     providers:
 | |
|       inference:
 | |
|       - provider_id: vllm-inference
 | |
|         provider_type: remote::vllm
 | |
|         config:
 | |
|           url: ${env.VLLM_URL:=http://localhost:8000/v1}
 | |
|           max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
 | |
|           api_token: ${env.VLLM_API_TOKEN:=fake}
 | |
|           tls_verify: ${env.VLLM_TLS_VERIFY:=true}
 | |
|       - provider_id: sentence-transformers
 | |
|         provider_type: inline::sentence-transformers
 | |
|         config: {}
 | |
|       files:
 | |
|       - provider_id: meta-reference-files
 | |
|         provider_type: inline::localfs
 | |
|         config:
 | |
|           storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
 | |
|           metadata_store:
 | |
|             type: sqlite
 | |
|             db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
 | |
|       vector_io:
 | |
|       - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
 | |
|         provider_type: remote::chromadb
 | |
|         config:
 | |
|           url: ${env.CHROMADB_URL:=}
 | |
|           kvstore:
 | |
|             type: postgres
 | |
|             host: ${env.POSTGRES_HOST:=localhost}
 | |
|             port: ${env.POSTGRES_PORT:=5432}
 | |
|             db: ${env.POSTGRES_DB:=llamastack}
 | |
|             user: ${env.POSTGRES_USER:=llamastack}
 | |
|             password: ${env.POSTGRES_PASSWORD:=llamastack}
 | |
|       files:
 | |
|       - provider_id: meta-reference-files
 | |
|         provider_type: inline::localfs
 | |
|         config:
 | |
|           storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
 | |
|           metadata_store:
 | |
|             type: sqlite
 | |
|             db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
 | |
|       safety:
 | |
|       - provider_id: llama-guard
 | |
|         provider_type: inline::llama-guard
 | |
|         config:
 | |
|           excluded_categories: []
 | |
|       agents:
 | |
|       - provider_id: meta-reference
 | |
|         provider_type: inline::meta-reference
 | |
|         config:
 | |
|           persistence_store:
 | |
|             type: postgres
 | |
|             host: ${env.POSTGRES_HOST:=localhost}
 | |
|             port: ${env.POSTGRES_PORT:=5432}
 | |
|             db: ${env.POSTGRES_DB:=llamastack}
 | |
|             user: ${env.POSTGRES_USER:=llamastack}
 | |
|             password: ${env.POSTGRES_PASSWORD:=llamastack}
 | |
|           responses_store:
 | |
|             type: postgres
 | |
|             host: ${env.POSTGRES_HOST:=localhost}
 | |
|             port: ${env.POSTGRES_PORT:=5432}
 | |
|             db: ${env.POSTGRES_DB:=llamastack}
 | |
|             user: ${env.POSTGRES_USER:=llamastack}
 | |
|             password: ${env.POSTGRES_PASSWORD:=llamastack}
 | |
|       telemetry:
 | |
|       - provider_id: meta-reference
 | |
|         provider_type: inline::meta-reference
 | |
|         config:
 | |
|           service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
 | |
|           sinks: ${env.TELEMETRY_SINKS:=console}
 | |
|       tool_runtime:
 | |
|       - provider_id: brave-search
 | |
|         provider_type: remote::brave-search
 | |
|         config:
 | |
|           api_key: ${env.BRAVE_SEARCH_API_KEY:+}
 | |
|           max_results: 3
 | |
|       - provider_id: tavily-search
 | |
|         provider_type: remote::tavily-search
 | |
|         config:
 | |
|           api_key: ${env.TAVILY_SEARCH_API_KEY:+}
 | |
|           max_results: 3
 | |
|       - provider_id: rag-runtime
 | |
|         provider_type: inline::rag-runtime
 | |
|         config: {}
 | |
|       - provider_id: model-context-protocol
 | |
|         provider_type: remote::model-context-protocol
 | |
|         config: {}
 | |
|     metadata_store:
 | |
|       type: postgres
 | |
|       host: ${env.POSTGRES_HOST:=localhost}
 | |
|       port: ${env.POSTGRES_PORT:=5432}
 | |
|       db: ${env.POSTGRES_DB:=llamastack}
 | |
|       user: ${env.POSTGRES_USER:=llamastack}
 | |
|       password: ${env.POSTGRES_PASSWORD:=llamastack}
 | |
|       table_name: llamastack_kvstore
 | |
|     inference_store:
 | |
|       type: postgres
 | |
|       host: ${env.POSTGRES_HOST:=localhost}
 | |
|       port: ${env.POSTGRES_PORT:=5432}
 | |
|       db: ${env.POSTGRES_DB:=llamastack}
 | |
|       user: ${env.POSTGRES_USER:=llamastack}
 | |
|       password: ${env.POSTGRES_PASSWORD:=llamastack}
 | |
|     models:
 | |
|     - metadata:
 | |
|         embedding_dimension: 384
 | |
|       model_id: all-MiniLM-L6-v2
 | |
|       provider_id: sentence-transformers
 | |
|       model_type: embedding
 | |
|     - model_id: ${env.INFERENCE_MODEL}
 | |
|       provider_id: vllm-inference
 | |
|       model_type: llm
 | |
|     shields:
 | |
|     - shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
 | |
|     vector_dbs: []
 | |
|     datasets: []
 | |
|     scoring_fns: []
 | |
|     benchmarks: []
 | |
|     tool_groups:
 | |
|     - toolgroup_id: builtin::websearch
 | |
|       provider_id: tavily-search
 | |
|     - toolgroup_id: builtin::rag
 | |
|       provider_id: rag-runtime
 | |
|     server:
 | |
|       port: 8323
 | |
| kind: ConfigMap
 | |
| metadata:
 | |
|   creationTimestamp: null
 | |
|   name: llama-stack-config
 |