chore(ui): use proxy server for backend API calls; simplified k8s deployment (#2350)

# What does this PR do? - no more CORS middleware needed ## Test Plan ### Local test llama stack run starter --image-type conda npm run dev verify UI works in browser ### Deploy to k8s temporarily change ui-k8s.yaml.template to load from PR commit <img width="604" alt="image" src="https://github.com/user-attachments/assets/87fa2e52-1e93-4e32-9e0f-5b283b7a37b3" /> sh ./apply.sh $ kubectl get services go to external_ip:8322 and play around with UI <img width="1690" alt="image" src="https://github.com/user-attachments/assets/5b7ec827-4302-4435-a9eb-df423676d873" />
2025-12-03 09:53:45 +00:00 · 2025-06-03 14:57:10 -07:00 · 2025-06-03 14:57:10 -07:00 · d96f6ec763
commit d96f6ec763
parent 7c1998db25
5 changed files with 109 additions and 17 deletions
--- a/llama_stack/distribution/server/server.py
+++ b/llama_stack/distribution/server/server.py
@ -26,7 +26,6 @@ from aiohttp import hdrs
 from fastapi import Body, FastAPI, HTTPException, Request
 from fastapi import Path as FastapiPath
 from fastapi.exceptions import RequestValidationError
-from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, StreamingResponse
 from openai import BadRequestError
 from pydantic import BaseModel, ValidationError
@ -479,17 +478,6 @@ def main(args: argparse.Namespace | None = None):
            window_seconds=window_seconds,
        )

-    # --- CORS middleware for local development ---
-    # TODO: move to reverse proxy
-    ui_port = os.environ.get("LLAMA_STACK_UI_PORT", 8322)
-    app.add_middleware(
-        CORSMiddleware,
-        allow_origins=[f"http://localhost:{ui_port}"],
-        allow_credentials=True,
-        allow_methods=["*"],
-        allow_headers=["*"],
-    )
-
    try:
        impls = asyncio.run(construct_stack(config))
    except InvalidProviderError as e:
--- a/llama_stack/ui/app/api/v1/[...path]/route.ts
+++ b/llama_stack/ui/app/api/v1/[...path]/route.ts
@ -0,0 +1,105 @@
+import { NextRequest, NextResponse } from "next/server";
+
+// Get backend URL from environment variable or default to localhost for development
+const BACKEND_URL =
+  process.env.LLAMA_STACK_BACKEND_URL ||
+  `http://localhost:${process.env.LLAMA_STACK_PORT || 8321}`;
+
+async function proxyRequest(request: NextRequest, method: string) {
+  try {
+    // Extract the path from the request URL
+    const url = new URL(request.url);
+    const pathSegments = url.pathname.split("/");
+
+    // Remove /api from the path to get the actual API path
+    // /api/v1/models/list -> /v1/models/list
+    const apiPath = pathSegments.slice(2).join("/"); // Remove 'api' segment
+    const targetUrl = `${BACKEND_URL}/${apiPath}${url.search}`;
+
+    console.log(`Proxying ${method} ${url.pathname} -> ${targetUrl}`);
+
+    // Prepare headers (exclude host and other problematic headers)
+    const headers = new Headers();
+    request.headers.forEach((value, key) => {
+      // Skip headers that might cause issues in proxy
+      if (
+        !["host", "connection", "content-length"].includes(key.toLowerCase())
+      ) {
+        headers.set(key, value);
+      }
+    });
+
+    // Prepare the request options
+    const requestOptions: RequestInit = {
+      method,
+      headers,
+    };
+
+    // Add body for methods that support it
+    if (["POST", "PUT", "PATCH"].includes(method) && request.body) {
+      requestOptions.body = await request.text();
+    }
+
+    // Make the request to FastAPI backend
+    const response = await fetch(targetUrl, requestOptions);
+
+    // Get response data
+    const responseText = await response.text();
+
+    console.log(
+      `Response from FastAPI: ${response.status} ${response.statusText}`,
+    );
+
+    // Create response with same status and headers
+    const proxyResponse = new NextResponse(responseText, {
+      status: response.status,
+      statusText: response.statusText,
+    });
+
+    // Copy response headers (except problematic ones)
+    response.headers.forEach((value, key) => {
+      if (!["connection", "transfer-encoding"].includes(key.toLowerCase())) {
+        proxyResponse.headers.set(key, value);
+      }
+    });
+
+    return proxyResponse;
+  } catch (error) {
+    console.error("Proxy request failed:", error);
+
+    return NextResponse.json(
+      {
+        error: "Proxy request failed",
+        message: error instanceof Error ? error.message : "Unknown error",
+        backend_url: BACKEND_URL,
+        timestamp: new Date().toISOString(),
+      },
+      { status: 500 },
+    );
+  }
+}
+
+// HTTP method handlers
+export async function GET(request: NextRequest) {
+  return proxyRequest(request, "GET");
+}
+
+export async function POST(request: NextRequest) {
+  return proxyRequest(request, "POST");
+}
+
+export async function PUT(request: NextRequest) {
+  return proxyRequest(request, "PUT");
+}
+
+export async function DELETE(request: NextRequest) {
+  return proxyRequest(request, "DELETE");
+}
+
+export async function PATCH(request: NextRequest) {
+  return proxyRequest(request, "PATCH");
+}
+
+export async function OPTIONS(request: NextRequest) {
+  return proxyRequest(request, "OPTIONS");
+}
--- a/llama_stack/ui/lib/client.ts
+++ b/llama_stack/ui/lib/client.ts
@ -1,5 +1,6 @@
 import LlamaStackClient from "llama-stack-client";

 export const client = new LlamaStackClient({
-  baseURL: process.env.NEXT_PUBLIC_LLAMA_STACK_BASE_URL,
+  baseURL:
+    typeof window !== "undefined" ? `${window.location.origin}/api` : "/api",
 });