From d96f6ec7636a7fdd96ce4754cea2d4d0b42820d2 Mon Sep 17 00:00:00 2001
From: ehhuang <ehhuang@users.noreply.github.com>
Date: Tue, 3 Jun 2025 14:57:10 -0700
Subject: [PATCH] chore(ui): use proxy server for backend API calls; simplified
 k8s deployment (#2350)

# What does this PR do?
- no more CORS middleware needed


## Test Plan
### Local test
llama stack run starter --image-type conda
npm run dev
verify UI works in browser

### Deploy to k8s
temporarily change ui-k8s.yaml.template to load from PR commit
<img width="604" alt="image"
src="https://github.com/user-attachments/assets/87fa2e52-1e93-4e32-9e0f-5b283b7a37b3"
/>

sh ./apply.sh
$ kubectl get services
go to external_ip:8322 and play around with UI
<img width="1690" alt="image"
src="https://github.com/user-attachments/assets/5b7ec827-4302-4435-a9eb-df423676d873"
/>
---
 docs/source/distributions/k8s/apply.sh        |   2 -
 .../distributions/k8s/ui-k8s.yaml.template    |   4 +-
 llama_stack/distribution/server/server.py     |  12 --
 llama_stack/ui/app/api/v1/[...path]/route.ts  | 105 ++++++++++++++++++
 llama_stack/ui/lib/client.ts                  |   3 +-
 5 files changed, 109 insertions(+), 17 deletions(-)
 create mode 100644 llama_stack/ui/app/api/v1/[...path]/route.ts
diff --git a/docs/source/distributions/k8s/apply.sh b/docs/source/distributions/k8s/apply.sh
index 8a45fc8c6..7ff7d28eb 100755
--- a/docs/source/distributions/k8s/apply.sh
+++ b/docs/source/distributions/k8s/apply.sh
@@ -13,8 +13,6 @@ export POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-llamastack}
 export INFERENCE_MODEL=${INFERENCE_MODEL:-meta-llama/Llama-3.2-3B-Instruct}
 export SAFETY_MODEL=${SAFETY_MODEL:-meta-llama/Llama-Guard-3-1B}
 
-export NEXT_PUBLIC_LLAMA_STACK_BASE_URL=${NEXT_PUBLIC_LLAMA_STACK_BASE_URL:-}
-
 set -euo pipefail
 set -x
 
diff --git a/docs/source/distributions/k8s/ui-k8s.yaml.template b/docs/source/distributions/k8s/ui-k8s.yaml.template
index a87de4bf2..ef1bf0c55 100644
--- a/docs/source/distributions/k8s/ui-k8s.yaml.template
+++ b/docs/source/distributions/k8s/ui-k8s.yaml.template
@@ -22,8 +22,8 @@ spec:
         image: node:18-alpine
         command: ["/bin/sh"]
         env:
-        - name: NEXT_PUBLIC_LLAMA_STACK_BASE_URL
-          value: ${NEXT_PUBLIC_LLAMA_STACK_BASE_URL}
+        - name: LLAMA_STACK_BACKEND_URL
+          value: "http://llama-stack-service:8321"
         - name: LLAMA_STACK_UI_PORT
           value: "8322"
         args:
diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py
index 5fdfba574..4f2427a55 100644
--- a/llama_stack/distribution/server/server.py
+++ b/llama_stack/distribution/server/server.py
@@ -26,7 +26,6 @@ from aiohttp import hdrs
 from fastapi import Body, FastAPI, HTTPException, Request
 from fastapi import Path as FastapiPath
 from fastapi.exceptions import RequestValidationError
-from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, StreamingResponse
 from openai import BadRequestError
 from pydantic import BaseModel, ValidationError
@@ -479,17 +478,6 @@ def main(args: argparse.Namespace | None = None):
             window_seconds=window_seconds,
         )
 
-    # --- CORS middleware for local development ---
-    # TODO: move to reverse proxy
-    ui_port = os.environ.get("LLAMA_STACK_UI_PORT", 8322)
-    app.add_middleware(
-        CORSMiddleware,
-        allow_origins=[f"http://localhost:{ui_port}"],
-        allow_credentials=True,
-        allow_methods=["*"],
-        allow_headers=["*"],
-    )
-
     try:
         impls = asyncio.run(construct_stack(config))
     except InvalidProviderError as e:
diff --git a/llama_stack/ui/app/api/v1/[...path]/route.ts b/llama_stack/ui/app/api/v1/[...path]/route.ts
new file mode 100644
index 000000000..1959f9099
--- /dev/null
+++ b/llama_stack/ui/app/api/v1/[...path]/route.ts
@@ -0,0 +1,105 @@
+import { NextRequest, NextResponse } from "next/server";
+
+// Get backend URL from environment variable or default to localhost for development
+const BACKEND_URL =
+  process.env.LLAMA_STACK_BACKEND_URL ||
+  `http://localhost:${process.env.LLAMA_STACK_PORT || 8321}`;
+
+async function proxyRequest(request: NextRequest, method: string) {
+  try {
+    // Extract the path from the request URL
+    const url = new URL(request.url);
+    const pathSegments = url.pathname.split("/");
+
+    // Remove /api from the path to get the actual API path
+    // /api/v1/models/list -> /v1/models/list
+    const apiPath = pathSegments.slice(2).join("/"); // Remove 'api' segment
+    const targetUrl = `${BACKEND_URL}/${apiPath}${url.search}`;
+
+    console.log(`Proxying ${method} ${url.pathname} -> ${targetUrl}`);
+
+    // Prepare headers (exclude host and other problematic headers)
+    const headers = new Headers();
+    request.headers.forEach((value, key) => {
+      // Skip headers that might cause issues in proxy
+      if (
+        !["host", "connection", "content-length"].includes(key.toLowerCase())
+      ) {
+        headers.set(key, value);
+      }
+    });
+
+    // Prepare the request options
+    const requestOptions: RequestInit = {
+      method,
+      headers,
+    };
+
+    // Add body for methods that support it
+    if (["POST", "PUT", "PATCH"].includes(method) && request.body) {
+      requestOptions.body = await request.text();
+    }
+
+    // Make the request to FastAPI backend
+    const response = await fetch(targetUrl, requestOptions);
+
+    // Get response data
+    const responseText = await response.text();
+
+    console.log(
+      `Response from FastAPI: ${response.status} ${response.statusText}`,
+    );
+
+    // Create response with same status and headers
+    const proxyResponse = new NextResponse(responseText, {
+      status: response.status,
+      statusText: response.statusText,
+    });
+
+    // Copy response headers (except problematic ones)
+    response.headers.forEach((value, key) => {
+      if (!["connection", "transfer-encoding"].includes(key.toLowerCase())) {
+        proxyResponse.headers.set(key, value);
+      }
+    });
+
+    return proxyResponse;
+  } catch (error) {
+    console.error("Proxy request failed:", error);
+
+    return NextResponse.json(
+      {
+        error: "Proxy request failed",
+        message: error instanceof Error ? error.message : "Unknown error",
+        backend_url: BACKEND_URL,
+        timestamp: new Date().toISOString(),
+      },
+      { status: 500 },
+    );
+  }
+}
+
+// HTTP method handlers
+export async function GET(request: NextRequest) {
+  return proxyRequest(request, "GET");
+}
+
+export async function POST(request: NextRequest) {
+  return proxyRequest(request, "POST");
+}
+
+export async function PUT(request: NextRequest) {
+  return proxyRequest(request, "PUT");
+}
+
+export async function DELETE(request: NextRequest) {
+  return proxyRequest(request, "DELETE");
+}
+
+export async function PATCH(request: NextRequest) {
+  return proxyRequest(request, "PATCH");
+}
+
+export async function OPTIONS(request: NextRequest) {
+  return proxyRequest(request, "OPTIONS");
+}
diff --git a/llama_stack/ui/lib/client.ts b/llama_stack/ui/lib/client.ts
index d8dbaf7a3..8492496e2 100644
--- a/llama_stack/ui/lib/client.ts
+++ b/llama_stack/ui/lib/client.ts
@@ -1,5 +1,6 @@
 import LlamaStackClient from "llama-stack-client";
 
 export const client = new LlamaStackClient({
-  baseURL: process.env.NEXT_PUBLIC_LLAMA_STACK_BASE_URL,
+  baseURL:
+    typeof window !== "undefined" ? `${window.location.origin}/api` : "/api",
 });