feat: Add HTTPS serving option (#1000)

# What does this PR do? Enables HTTPS option for Llama Stack. While doing so, introduces a `ServerConfig` sub-structure to house all server related configuration (port, ssl, etc.) Also simplified the `start_container.sh` entrypoint to simply be `python` instead of a complex bash command line. ## Test Plan Conda: Run: ```bash $ llama stack build --template together $ llama stack run --port 8322 # ensure server starts $ llama-stack-client configure --endpoint http://localhost:8322 $ llama-stack-client models list ``` Create a self-signed SSL key / cert pair. Then, using a local checkout of `llama-stack-client-python`, change https://github.com/meta-llama/llama-stack-client-python/blob/main/src/llama_stack_client/_base_client.py#L759 to add `kwargs.setdefault("verify", False)` so SSL verification is disabled. Then: ```bash $ llama stack run --port 8322 --tls-keyfile <KEYFILE> --tls-certfile <CERTFILE> $ llama-stack-client configure --endpoint https://localhost:8322 # notice the `https` $ llama-stack-client models list ``` Also tested with containers (but of course one needs to make sure the cert and key files are appropriately provided to the container.)
2025-10-05 20:27:35 +00:00 · 2025-02-07 09:39:08 -08:00 · 2025-02-07 09:39:08 -08:00 · f8f2f7f9bb
commit f8f2f7f9bb
parent c97e05f75e
5 changed files with 88 additions and 6 deletions
--- a/llama_stack/distribution/datatypes.py
+++ b/llama_stack/distribution/datatypes.py
@ -117,6 +117,23 @@ class Provider(BaseModel):
    config: Dict[str, Any]


+class ServerConfig(BaseModel):
+    port: int = Field(
+        default=8321,
+        description="Port to listen on",
+        ge=1024,
+        le=65535,
+    )
+    tls_certfile: Optional[str] = Field(
+        default=None,
+        description="Path to TLS certificate file for HTTPS",
+    )
+    tls_keyfile: Optional[str] = Field(
+        default=None,
+        description="Path to TLS key file for HTTPS",
+    )
+
+
 class StackRunConfig(BaseModel):
    version: str = LLAMA_STACK_RUN_CONFIG_VERSION

@ -159,6 +176,11 @@ a default SQLite store will be used.""",
    eval_tasks: List[EvalTaskInput] = Field(default_factory=list)
    tool_groups: List[ToolGroupInput] = Field(default_factory=list)

+    server: ServerConfig = Field(
+        default_factory=ServerConfig,
+        description="Configuration for the HTTP(S) server",
+    )
+

 class BuildConfig(BaseModel):
    version: str = LLAMA_STACK_BUILD_CONFIG_VERSION
--- a/llama_stack/distribution/server/server.py
+++ b/llama_stack/distribution/server/server.py
@ -282,8 +282,19 @@ def main():
        action="append",
        help="Environment variables in KEY=value format. Can be specified multiple times.",
    )
+    parser.add_argument(
+        "--tls-keyfile",
+        help="Path to TLS key file for HTTPS",
+        required="--tls-certfile" in sys.argv,
+    )
+    parser.add_argument(
+        "--tls-certfile",
+        help="Path to TLS certificate file for HTTPS",
+        required="--tls-keyfile" in sys.argv,
+    )

    args = parser.parse_args()
+
    if args.env:
        for env_pair in args.env:
            try:
@ -381,11 +392,36 @@ def main():

    import uvicorn

-    # FYI this does not do hot-reloads
+    # Configure SSL if certificates are provided
+    port = args.port or config.server.port
+
+    ssl_config = None
+    if args.tls_keyfile:
+        keyfile = args.tls_keyfile
+        certfile = args.tls_certfile
+    else:
+        keyfile = config.server.tls_keyfile
+        certfile = config.server.tls_certfile
+
+    if keyfile and certfile:
+        ssl_config = {
+            "ssl_keyfile": keyfile,
+            "ssl_certfile": certfile,
+        }
+        print(f"HTTPS enabled with certificates:\n  Key: {keyfile}\n  Cert: {certfile}")

    listen_host = ["::", "0.0.0.0"] if not args.disable_ipv6 else "0.0.0.0"
-    print(f"Listening on {listen_host}:{args.port}")
-    uvicorn.run(app, host=listen_host, port=args.port)
+    print(f"Listening on {listen_host}:{port}")
+
+    uvicorn_config = {
+        "app": app,
+        "host": listen_host,
+        "port": port,
+    }
+    if ssl_config:
+        uvicorn_config.update(ssl_config)
+
+    uvicorn.run(**uvicorn_config)


 def extract_path_params(route: str) -> List[str]:
--- a/llama_stack/distribution/start_conda_env.sh
+++ b/llama_stack/distribution/start_conda_env.sh
@ -34,6 +34,7 @@ shift

 # Process environment variables from --env arguments
 env_vars=""
+other_args=""
 while [[ $# -gt 0 ]]; do
  case "$1" in
  --env)
@ -48,6 +49,7 @@ while [[ $# -gt 0 ]]; do
    fi
    ;;
  *)
+    other_args="$other_args $1"
    shift
    ;;
  esac
@ -61,4 +63,5 @@ $CONDA_PREFIX/bin/python \
  -m llama_stack.distribution.server.server \
  --yaml-config "$yaml_config" \
  --port "$port" \
-  $env_vars
+  $env_vars \
+  $other_args
--- a/llama_stack/distribution/start_container.sh
+++ b/llama_stack/distribution/start_container.sh
@ -40,8 +40,12 @@ shift
 port="$1"
 shift

+# Initialize other_args
+other_args=""
+
 # Process environment variables from --env arguments
 env_vars=""
+
 while [[ $# -gt 0 ]]; do
    case "$1" in
        --env)
@ -55,6 +59,7 @@ while [[ $# -gt 0 ]]; do
            fi
            ;;
        *)
+            other_args="$other_args $1"
            shift
            ;;
    esac
@ -93,5 +98,8 @@ $CONTAINER_BINARY run $CONTAINER_OPTS -it \
  -v "$yaml_config:/app/config.yaml" \
  $mounts \
  --env LLAMA_STACK_PORT=$port \
-  --entrypoint='["python", "-m", "llama_stack.distribution.server.server", "--yaml-config", "/app/config.yaml"]' \
-  $container_image:$version_tag
+  --entrypoint python \
+  $container_image:$version_tag \
+  -m llama_stack.distribution.server.server \
+  --yaml-config /app/config.yaml \
+  $other_args