chore(ui): use proxy server for backend API calls; simplified k8s deployment (#2350)

# What does this PR do? - no more CORS middleware needed ## Test Plan ### Local test llama stack run starter --image-type conda npm run dev verify UI works in browser ### Deploy to k8s temporarily change ui-k8s.yaml.template to load from PR commit <img width="604" alt="image" src="https://github.com/user-attachments/assets/87fa2e52-1e93-4e32-9e0f-5b283b7a37b3" /> sh ./apply.sh $ kubectl get services go to external_ip:8322 and play around with UI <img width="1690" alt="image" src="https://github.com/user-attachments/assets/5b7ec827-4302-4435-a9eb-df423676d873" />
2025-06-27 18:50:41 +00:00 · 2025-06-03 14:57:10 -07:00 · 2025-06-03 14:57:10 -07:00 · d96f6ec763
commit d96f6ec763
parent 7c1998db25
5 changed files with 109 additions and 17 deletions
--- a/docs/source/distributions/k8s/apply.sh
+++ b/docs/source/distributions/k8s/apply.sh
@ -13,8 +13,6 @@ export POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-llamastack}
 export INFERENCE_MODEL=${INFERENCE_MODEL:-meta-llama/Llama-3.2-3B-Instruct}
 export SAFETY_MODEL=${SAFETY_MODEL:-meta-llama/Llama-Guard-3-1B}
 export NEXT_PUBLIC_LLAMA_STACK_BASE_URL=${NEXT_PUBLIC_LLAMA_STACK_BASE_URL:-}
 set -euo pipefail
 set -x
--- a/docs/source/distributions/k8s/ui-k8s.yaml.template
+++ b/docs/source/distributions/k8s/ui-k8s.yaml.template
@ -22,8 +22,8 @@ spec:
        image: node:18-alpine
        command: ["/bin/sh"]
        env:
-        - name: NEXT_PUBLIC_LLAMA_STACK_BASE_URL
+        - name: LLAMA_STACK_BACKEND_URL
-          value: ${NEXT_PUBLIC_LLAMA_STACK_BASE_URL}
+          value: "http://llama-stack-service:8321"
        - name: LLAMA_STACK_UI_PORT
          value: "8322"
        args:
--- a/llama_stack/distribution/server/server.py
+++ b/llama_stack/distribution/server/server.py
@ -26,7 +26,6 @@ from aiohttp import hdrs
 from fastapi import Body, FastAPI, HTTPException, Request
 from fastapi import Path as FastapiPath
 from fastapi.exceptions import RequestValidationError
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, StreamingResponse
 from openai import BadRequestError
 from pydantic import BaseModel, ValidationError
@ -479,17 +478,6 @@ def main(args: argparse.Namespace | None = None):
            window_seconds=window_seconds,
        )
    # --- CORS middleware for local development ---
    # TODO: move to reverse proxy
    ui_port = os.environ.get("LLAMA_STACK_UI_PORT", 8322)
    app.add_middleware(
        CORSMiddleware,
        allow_origins=[f"http://localhost:{ui_port}"],
        allow_credentials=True,
        allow_methods=["*"],
        allow_headers=["*"],
    )
    try:
        impls = asyncio.run(construct_stack(config))
    except InvalidProviderError as e:
--- a/llama_stack/ui/app/api/v1/[...path]/route.ts
+++ b/llama_stack/ui/app/api/v1/[...path]/route.ts
@ -0,0 +1,105 @@
 import { NextRequest, NextResponse } from "next/server";
 // Get backend URL from environment variable or default to localhost for development
 const BACKEND_URL =
  process.env.LLAMA_STACK_BACKEND_URL ||
  `http://localhost:${process.env.LLAMA_STACK_PORT || 8321}`;
 async function proxyRequest(request: NextRequest, method: string) {
  try {
    // Extract the path from the request URL
    const url = new URL(request.url);
    const pathSegments = url.pathname.split("/");
    // Remove /api from the path to get the actual API path
    // /api/v1/models/list -> /v1/models/list
    const apiPath = pathSegments.slice(2).join("/"); // Remove 'api' segment
    const targetUrl = `${BACKEND_URL}/${apiPath}${url.search}`;
    console.log(`Proxying ${method} ${url.pathname} -> ${targetUrl}`);
    // Prepare headers (exclude host and other problematic headers)
    const headers = new Headers();
    request.headers.forEach((value, key) => {
      // Skip headers that might cause issues in proxy
      if (
        !["host", "connection", "content-length"].includes(key.toLowerCase())
      ) {
        headers.set(key, value);
      }
    });
    // Prepare the request options
    const requestOptions: RequestInit = {
      method,
      headers,
    };
    // Add body for methods that support it
    if (["POST", "PUT", "PATCH"].includes(method) && request.body) {
      requestOptions.body = await request.text();
    }
    // Make the request to FastAPI backend
    const response = await fetch(targetUrl, requestOptions);
    // Get response data
    const responseText = await response.text();
    console.log(
      `Response from FastAPI: ${response.status} ${response.statusText}`,
    );
    // Create response with same status and headers
    const proxyResponse = new NextResponse(responseText, {
      status: response.status,
      statusText: response.statusText,
    });
    // Copy response headers (except problematic ones)
    response.headers.forEach((value, key) => {
      if (!["connection", "transfer-encoding"].includes(key.toLowerCase())) {
        proxyResponse.headers.set(key, value);
      }
    });
    return proxyResponse;
  } catch (error) {
    console.error("Proxy request failed:", error);
    return NextResponse.json(
      {
        error: "Proxy request failed",
        message: error instanceof Error ? error.message : "Unknown error",
        backend_url: BACKEND_URL,
        timestamp: new Date().toISOString(),
      },
      { status: 500 },
    );
  }
 }
 // HTTP method handlers
 export async function GET(request: NextRequest) {
  return proxyRequest(request, "GET");
 }
 export async function POST(request: NextRequest) {
  return proxyRequest(request, "POST");
 }
 export async function PUT(request: NextRequest) {
  return proxyRequest(request, "PUT");
 }
 export async function DELETE(request: NextRequest) {
  return proxyRequest(request, "DELETE");
 }
 export async function PATCH(request: NextRequest) {
  return proxyRequest(request, "PATCH");
 }
 export async function OPTIONS(request: NextRequest) {
  return proxyRequest(request, "OPTIONS");
 }
--- a/llama_stack/ui/lib/client.ts
+++ b/llama_stack/ui/lib/client.ts
@ -1,5 +1,6 @@
 import LlamaStackClient from "llama-stack-client";
 export const client = new LlamaStackClient({
-  baseURL: process.env.NEXT_PUBLIC_LLAMA_STACK_BASE_URL,
+  baseURL:
    typeof window !== "undefined" ? `${window.location.origin}/api` : "/api",
 });