From d96f6ec7636a7fdd96ce4754cea2d4d0b42820d2 Mon Sep 17 00:00:00 2001 From: ehhuang Date: Tue, 3 Jun 2025 14:57:10 -0700 Subject: [PATCH] chore(ui): use proxy server for backend API calls; simplified k8s deployment (#2350) # What does this PR do? - no more CORS middleware needed ## Test Plan ### Local test llama stack run starter --image-type conda npm run dev verify UI works in browser ### Deploy to k8s temporarily change ui-k8s.yaml.template to load from PR commit image sh ./apply.sh $ kubectl get services go to external_ip:8322 and play around with UI image --- docs/source/distributions/k8s/apply.sh | 2 - .../distributions/k8s/ui-k8s.yaml.template | 4 +- llama_stack/distribution/server/server.py | 12 -- llama_stack/ui/app/api/v1/[...path]/route.ts | 105 ++++++++++++++++++ llama_stack/ui/lib/client.ts | 3 +- 5 files changed, 109 insertions(+), 17 deletions(-) create mode 100644 llama_stack/ui/app/api/v1/[...path]/route.ts diff --git a/docs/source/distributions/k8s/apply.sh b/docs/source/distributions/k8s/apply.sh index 8a45fc8c6..7ff7d28eb 100755 --- a/docs/source/distributions/k8s/apply.sh +++ b/docs/source/distributions/k8s/apply.sh @@ -13,8 +13,6 @@ export POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-llamastack} export INFERENCE_MODEL=${INFERENCE_MODEL:-meta-llama/Llama-3.2-3B-Instruct} export SAFETY_MODEL=${SAFETY_MODEL:-meta-llama/Llama-Guard-3-1B} -export NEXT_PUBLIC_LLAMA_STACK_BASE_URL=${NEXT_PUBLIC_LLAMA_STACK_BASE_URL:-} - set -euo pipefail set -x diff --git a/docs/source/distributions/k8s/ui-k8s.yaml.template b/docs/source/distributions/k8s/ui-k8s.yaml.template index a87de4bf2..ef1bf0c55 100644 --- a/docs/source/distributions/k8s/ui-k8s.yaml.template +++ b/docs/source/distributions/k8s/ui-k8s.yaml.template @@ -22,8 +22,8 @@ spec: image: node:18-alpine command: ["/bin/sh"] env: - - name: NEXT_PUBLIC_LLAMA_STACK_BASE_URL - value: ${NEXT_PUBLIC_LLAMA_STACK_BASE_URL} + - name: LLAMA_STACK_BACKEND_URL + value: "http://llama-stack-service:8321" - name: LLAMA_STACK_UI_PORT value: "8322" args: diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py index 5fdfba574..4f2427a55 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/distribution/server/server.py @@ -26,7 +26,6 @@ from aiohttp import hdrs from fastapi import Body, FastAPI, HTTPException, Request from fastapi import Path as FastapiPath from fastapi.exceptions import RequestValidationError -from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse, StreamingResponse from openai import BadRequestError from pydantic import BaseModel, ValidationError @@ -479,17 +478,6 @@ def main(args: argparse.Namespace | None = None): window_seconds=window_seconds, ) - # --- CORS middleware for local development --- - # TODO: move to reverse proxy - ui_port = os.environ.get("LLAMA_STACK_UI_PORT", 8322) - app.add_middleware( - CORSMiddleware, - allow_origins=[f"http://localhost:{ui_port}"], - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], - ) - try: impls = asyncio.run(construct_stack(config)) except InvalidProviderError as e: diff --git a/llama_stack/ui/app/api/v1/[...path]/route.ts b/llama_stack/ui/app/api/v1/[...path]/route.ts new file mode 100644 index 000000000..1959f9099 --- /dev/null +++ b/llama_stack/ui/app/api/v1/[...path]/route.ts @@ -0,0 +1,105 @@ +import { NextRequest, NextResponse } from "next/server"; + +// Get backend URL from environment variable or default to localhost for development +const BACKEND_URL = + process.env.LLAMA_STACK_BACKEND_URL || + `http://localhost:${process.env.LLAMA_STACK_PORT || 8321}`; + +async function proxyRequest(request: NextRequest, method: string) { + try { + // Extract the path from the request URL + const url = new URL(request.url); + const pathSegments = url.pathname.split("/"); + + // Remove /api from the path to get the actual API path + // /api/v1/models/list -> /v1/models/list + const apiPath = pathSegments.slice(2).join("/"); // Remove 'api' segment + const targetUrl = `${BACKEND_URL}/${apiPath}${url.search}`; + + console.log(`Proxying ${method} ${url.pathname} -> ${targetUrl}`); + + // Prepare headers (exclude host and other problematic headers) + const headers = new Headers(); + request.headers.forEach((value, key) => { + // Skip headers that might cause issues in proxy + if ( + !["host", "connection", "content-length"].includes(key.toLowerCase()) + ) { + headers.set(key, value); + } + }); + + // Prepare the request options + const requestOptions: RequestInit = { + method, + headers, + }; + + // Add body for methods that support it + if (["POST", "PUT", "PATCH"].includes(method) && request.body) { + requestOptions.body = await request.text(); + } + + // Make the request to FastAPI backend + const response = await fetch(targetUrl, requestOptions); + + // Get response data + const responseText = await response.text(); + + console.log( + `Response from FastAPI: ${response.status} ${response.statusText}`, + ); + + // Create response with same status and headers + const proxyResponse = new NextResponse(responseText, { + status: response.status, + statusText: response.statusText, + }); + + // Copy response headers (except problematic ones) + response.headers.forEach((value, key) => { + if (!["connection", "transfer-encoding"].includes(key.toLowerCase())) { + proxyResponse.headers.set(key, value); + } + }); + + return proxyResponse; + } catch (error) { + console.error("Proxy request failed:", error); + + return NextResponse.json( + { + error: "Proxy request failed", + message: error instanceof Error ? error.message : "Unknown error", + backend_url: BACKEND_URL, + timestamp: new Date().toISOString(), + }, + { status: 500 }, + ); + } +} + +// HTTP method handlers +export async function GET(request: NextRequest) { + return proxyRequest(request, "GET"); +} + +export async function POST(request: NextRequest) { + return proxyRequest(request, "POST"); +} + +export async function PUT(request: NextRequest) { + return proxyRequest(request, "PUT"); +} + +export async function DELETE(request: NextRequest) { + return proxyRequest(request, "DELETE"); +} + +export async function PATCH(request: NextRequest) { + return proxyRequest(request, "PATCH"); +} + +export async function OPTIONS(request: NextRequest) { + return proxyRequest(request, "OPTIONS"); +} diff --git a/llama_stack/ui/lib/client.ts b/llama_stack/ui/lib/client.ts index d8dbaf7a3..8492496e2 100644 --- a/llama_stack/ui/lib/client.ts +++ b/llama_stack/ui/lib/client.ts @@ -1,5 +1,6 @@ import LlamaStackClient from "llama-stack-client"; export const client = new LlamaStackClient({ - baseURL: process.env.NEXT_PUBLIC_LLAMA_STACK_BASE_URL, + baseURL: + typeof window !== "undefined" ? `${window.location.origin}/api` : "/api", });