chore(ui): use proxy server for backend API calls; simplified k8s deployment (#2350)

# What does this PR do? - no more CORS middleware needed ## Test Plan ### Local test llama stack run starter --image-type conda npm run dev verify UI works in browser ### Deploy to k8s temporarily change ui-k8s.yaml.template to load from PR commit <img width="604" alt="image" src="https://github.com/user-attachments/assets/87fa2e52-1e93-4e32-9e0f-5b283b7a37b3" /> sh ./apply.sh $ kubectl get services go to external_ip:8322 and play around with UI <img width="1690" alt="image" src="https://github.com/user-attachments/assets/5b7ec827-4302-4435-a9eb-df423676d873" />
2025-12-05 18:27:22 +00:00 · 2025-06-03 14:57:10 -07:00 · 2025-06-03 14:57:10 -07:00 · d96f6ec763
commit d96f6ec763
parent 7c1998db25
5 changed files with 109 additions and 17 deletions
--- a/llama_stack/distribution/server/server.py
+++ b/llama_stack/distribution/server/server.py
@ -26,7 +26,6 @@ from aiohttp import hdrs
 from fastapi import Body, FastAPI, HTTPException, Request
 from fastapi import Path as FastapiPath
 from fastapi.exceptions import RequestValidationError
-from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, StreamingResponse
 from openai import BadRequestError
 from pydantic import BaseModel, ValidationError
@ -479,17 +478,6 @@ def main(args: argparse.Namespace | None = None):
            window_seconds=window_seconds,
        )

-    # --- CORS middleware for local development ---
-    # TODO: move to reverse proxy
-    ui_port = os.environ.get("LLAMA_STACK_UI_PORT", 8322)
-    app.add_middleware(
-        CORSMiddleware,
-        allow_origins=[f"http://localhost:{ui_port}"],
-        allow_credentials=True,
-        allow_methods=["*"],
-        allow_headers=["*"],
-    )
-
    try:
        impls = asyncio.run(construct_stack(config))
    except InvalidProviderError as e: