mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 09:53:45 +00:00
refactor: address PR feedback - improve naming, error handling, and documentation
Address all feedback from PR #3962: **Code Quality Improvements:** - Rename `_uvicorn_run` → `_run_server` for accurate method naming - Refactor error handling: move Gunicorn fallback logic from `_run_with_gunicorn` to caller - Update comments to reflect both Uvicorn and Gunicorn behavior - Update test mock from `_uvicorn_run` to `_run_server` **Environment Variable:** - Change `LLAMA_STACK_DISABLE_GUNICORN` → `LLAMA_STACK_ENABLE_GUNICORN` - More intuitive positive logic (no double negatives) - Defaults to `true` on Unix systems - Clearer log messages distinguishing platform limitations vs explicit disable **Documentation:** - Remove unnecessary `uv sync --group unit --group test` from user docs - Clarify SQLite limitations: "SQLite only allows one writer at a time" - Accurate explanation: WAL mode enables concurrent reads but writes are serialized - Strong recommendation for PostgreSQL in production with high traffic **Architecture:** - Better separation of concerns: `_run_with_gunicorn` just executes, caller handles fallback - Exceptions propagate to caller for centralized decision making 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
9ff881a28a
commit
241e189fee
10 changed files with 75 additions and 63 deletions
|
|
@ -90,7 +90,8 @@ On Unix-based systems (Linux, macOS), the server automatically uses Gunicorn wit
|
|||
**Important Notes**:
|
||||
|
||||
- On Windows, the server automatically falls back to single-process Uvicorn.
|
||||
- **Database Race Condition**: When using multiple workers without `GUNICORN_PRELOAD=true`, you may encounter database initialization race conditions (e.g., "table already exists" errors) as multiple workers simultaneously attempt to create database tables. To avoid this issue in production, set `GUNICORN_PRELOAD=true` and ensure all dependencies are installed with `uv sync --group unit --group test`.
|
||||
- **Database Race Condition**: When using multiple workers without `GUNICORN_PRELOAD=true`, you may encounter database initialization race conditions (e.g., "table already exists" errors) as multiple workers simultaneously attempt to create database tables. To avoid this issue in production, set `GUNICORN_PRELOAD=true`.
|
||||
- **SQLite with Multiple Workers**: SQLite works with Gunicorn's multi-process mode for development and low-to-moderate traffic scenarios. The system automatically enables WAL (Write-Ahead Logging) mode and sets a 5-second busy timeout. However, **SQLite only allows one writer at a time** - even with WAL mode, write operations from multiple workers are serialized, causing workers to wait for database locks under concurrent write load. **For production deployments with high traffic or multiple workers, we strongly recommend using PostgreSQL or another production-grade database** for true concurrent write performance.
|
||||
|
||||
**Example production configuration:**
|
||||
```bash
|
||||
|
|
|
|||
|
|
@ -44,7 +44,9 @@ Configure Gunicorn behavior using environment variables:
|
|||
- `GUNICORN_MAX_REQUESTS_JITTER`: Randomize worker restart timing (default: `1000`)
|
||||
- `GUNICORN_PRELOAD`: Preload app before forking workers for memory efficiency (default: `true`, as set in `run.py` line 264)
|
||||
|
||||
**Important**: When using multiple workers without `GUNICORN_PRELOAD=true`, you may encounter database initialization race conditions. To avoid this, set `GUNICORN_PRELOAD=true` and install all dependencies with `uv sync --group unit --group test`.
|
||||
**Important Notes**:
|
||||
- When using multiple workers without `GUNICORN_PRELOAD=true`, you may encounter database initialization race conditions. To avoid this, set `GUNICORN_PRELOAD=true`.
|
||||
- **SQLite with Multiple Workers**: SQLite works with Gunicorn's multi-process mode for development and low-to-moderate traffic scenarios. The system automatically enables WAL (Write-Ahead Logging) mode and sets a 5-second busy timeout. However, **SQLite only allows one writer at a time** - even with WAL mode, write operations from multiple workers are serialized, causing workers to wait for database locks under concurrent write load. **For production deployments with high traffic or multiple workers, we strongly recommend using PostgreSQL or another production-grade database** for true concurrent write performance.
|
||||
|
||||
**Example production configuration:**
|
||||
```bash
|
||||
|
|
|
|||
|
|
@ -181,9 +181,15 @@ class StackRun(Subcommand):
|
|||
except AttributeError as e:
|
||||
self.parser.error(f"failed to parse config file '{config_file}':\n {e}")
|
||||
|
||||
self._uvicorn_run(config_file, args)
|
||||
self._run_server(config_file, args)
|
||||
|
||||
def _uvicorn_run(self, config_file: Path | None, args: argparse.Namespace) -> None:
|
||||
def _run_server(self, config_file: Path | None, args: argparse.Namespace) -> None:
|
||||
"""
|
||||
Run the Llama Stack server using either Gunicorn (on Unix systems) or Uvicorn (on Windows or when disabled).
|
||||
|
||||
On Unix systems (Linux/macOS), defaults to Gunicorn with Uvicorn workers for production-grade multi-process
|
||||
performance. Falls back to single-process Uvicorn on Windows or when LLAMA_STACK_ENABLE_GUNICORN=false.
|
||||
"""
|
||||
if not config_file:
|
||||
self.parser.error("Config file is required")
|
||||
|
||||
|
|
@ -229,27 +235,37 @@ class StackRun(Subcommand):
|
|||
|
||||
logger.info(f"Listening on {host}:{port}")
|
||||
|
||||
# We need to catch KeyboardInterrupt because uvicorn's signal handling
|
||||
# re-raises SIGINT signals using signal.raise_signal(), which Python
|
||||
# converts to KeyboardInterrupt. Without this catch, we'd get a confusing
|
||||
# stack trace when using Ctrl+C or kill -2 (SIGINT).
|
||||
# SIGTERM (kill -15) works fine without this because Python doesn't
|
||||
# have a default handler for it.
|
||||
#
|
||||
# Another approach would be to ignore SIGINT entirely - let uvicorn handle it through its own
|
||||
# signal handling but this is quite intrusive and not worth the effort.
|
||||
# We need to catch KeyboardInterrupt because both Uvicorn and Gunicorn's signal handling
|
||||
# can raise SIGINT signals, which Python converts to KeyboardInterrupt. Without this catch,
|
||||
# we'd get a confusing stack trace when using Ctrl+C or kill -2 (SIGINT).
|
||||
# SIGTERM (kill -15) works fine without this because Python doesn't have a default handler for it.
|
||||
try:
|
||||
# Check if Gunicorn should be disabled (for testing or debugging)
|
||||
disable_gunicorn = os.getenv("LLAMA_STACK_DISABLE_GUNICORN", "false").lower() == "true"
|
||||
# Check if Gunicorn should be enabled
|
||||
# Default to true on Unix systems, can be disabled via environment variable
|
||||
enable_gunicorn = os.getenv("LLAMA_STACK_ENABLE_GUNICORN", "true").lower() == "true" and sys.platform in (
|
||||
"linux",
|
||||
"darwin",
|
||||
)
|
||||
|
||||
if not disable_gunicorn and sys.platform in ("linux", "darwin"):
|
||||
if enable_gunicorn:
|
||||
# On Unix-like systems, use Gunicorn with Uvicorn workers for production-grade performance
|
||||
try:
|
||||
self._run_with_gunicorn(host, port, uvicorn_config)
|
||||
except (FileNotFoundError, subprocess.CalledProcessError) as e:
|
||||
# Gunicorn not available or failed to start - fall back to Uvicorn
|
||||
logger.warning(f"Gunicorn unavailable or failed to start: {e}")
|
||||
logger.info("Falling back to single-process Uvicorn server...")
|
||||
uvicorn.run("llama_stack.core.server.server:create_app", **uvicorn_config) # type: ignore[arg-type]
|
||||
else:
|
||||
# On other systems (e.g., Windows), fall back to Uvicorn directly
|
||||
# Also used when LLAMA_STACK_DISABLE_GUNICORN=true (for tests)
|
||||
if disable_gunicorn:
|
||||
logger.info("Gunicorn disabled via LLAMA_STACK_DISABLE_GUNICORN environment variable")
|
||||
# Fall back to Uvicorn for:
|
||||
# - Windows systems (Gunicorn not supported)
|
||||
# - Unix systems with LLAMA_STACK_ENABLE_GUNICORN=false (for testing/debugging)
|
||||
if sys.platform not in ("linux", "darwin"):
|
||||
logger.info("Using single-process Uvicorn server (Gunicorn not supported on this platform)")
|
||||
else:
|
||||
logger.info(
|
||||
"Using single-process Uvicorn server (Gunicorn disabled via LLAMA_STACK_ENABLE_GUNICORN=false)"
|
||||
)
|
||||
uvicorn.run("llama_stack.core.server.server:create_app", **uvicorn_config) # type: ignore[arg-type]
|
||||
except (KeyboardInterrupt, SystemExit):
|
||||
logger.info("Received interrupt signal, shutting down gracefully...")
|
||||
|
|
@ -359,16 +375,9 @@ class StackRun(Subcommand):
|
|||
logger.info(f"Worker recycling: every {max_requests}±{max_requests_jitter} requests (prevents memory leaks)")
|
||||
logger.info(f"Total concurrent capacity: {num_workers * worker_connections} connections")
|
||||
|
||||
try:
|
||||
# Execute the Gunicorn command
|
||||
# If Gunicorn is not found or fails to start, raise the exception for the caller to handle
|
||||
subprocess.run(gunicorn_command, check=True)
|
||||
except FileNotFoundError:
|
||||
logger.error("Error: 'gunicorn' command not found. Please ensure Gunicorn is installed.")
|
||||
logger.error("Falling back to Uvicorn...")
|
||||
uvicorn.run("llama_stack.core.server.server:create_app", **uvicorn_config) # type: ignore[arg-type]
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(f"Failed to start Gunicorn server. Error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
def _start_ui_development_server(self, stack_server_port: int):
|
||||
logger.info("Attempting to start UI development server...")
|
||||
|
|
|
|||
|
|
@ -295,8 +295,8 @@ def test_providers_flag_generates_config_with_api_keys():
|
|||
enable_ui=False,
|
||||
)
|
||||
|
||||
# Mock _uvicorn_run to prevent starting a server
|
||||
with patch.object(stack_run, "_uvicorn_run"):
|
||||
# Mock _run_server to prevent starting a server
|
||||
with patch.object(stack_run, "_run_server"):
|
||||
stack_run._run_stack_run_cmd(args)
|
||||
|
||||
# Read the generated config file
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue