chore(ui): use proxy server for backend API calls; simplified k8s deployment (#2350)

# What does this PR do?
- no more CORS middleware needed


## Test Plan
### Local test
llama stack run starter --image-type conda
npm run dev
verify UI works in browser

### Deploy to k8s
temporarily change ui-k8s.yaml.template to load from PR commit
<img width="604" alt="image"
src="https://github.com/user-attachments/assets/87fa2e52-1e93-4e32-9e0f-5b283b7a37b3"
/>

sh ./apply.sh
$ kubectl get services
go to external_ip:8322 and play around with UI
<img width="1690" alt="image"
src="https://github.com/user-attachments/assets/5b7ec827-4302-4435-a9eb-df423676d873"
/>
This commit is contained in:
ehhuang 2025-06-03 14:57:10 -07:00 committed by GitHub
parent 7c1998db25
commit d96f6ec763
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 109 additions and 17 deletions

View file

@ -13,8 +13,6 @@ export POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-llamastack}
export INFERENCE_MODEL=${INFERENCE_MODEL:-meta-llama/Llama-3.2-3B-Instruct} export INFERENCE_MODEL=${INFERENCE_MODEL:-meta-llama/Llama-3.2-3B-Instruct}
export SAFETY_MODEL=${SAFETY_MODEL:-meta-llama/Llama-Guard-3-1B} export SAFETY_MODEL=${SAFETY_MODEL:-meta-llama/Llama-Guard-3-1B}
export NEXT_PUBLIC_LLAMA_STACK_BASE_URL=${NEXT_PUBLIC_LLAMA_STACK_BASE_URL:-}
set -euo pipefail set -euo pipefail
set -x set -x

View file

@ -22,8 +22,8 @@ spec:
image: node:18-alpine image: node:18-alpine
command: ["/bin/sh"] command: ["/bin/sh"]
env: env:
- name: NEXT_PUBLIC_LLAMA_STACK_BASE_URL - name: LLAMA_STACK_BACKEND_URL
value: ${NEXT_PUBLIC_LLAMA_STACK_BASE_URL} value: "http://llama-stack-service:8321"
- name: LLAMA_STACK_UI_PORT - name: LLAMA_STACK_UI_PORT
value: "8322" value: "8322"
args: args:

View file

@ -26,7 +26,6 @@ from aiohttp import hdrs
from fastapi import Body, FastAPI, HTTPException, Request from fastapi import Body, FastAPI, HTTPException, Request
from fastapi import Path as FastapiPath from fastapi import Path as FastapiPath
from fastapi.exceptions import RequestValidationError from fastapi.exceptions import RequestValidationError
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, StreamingResponse from fastapi.responses import JSONResponse, StreamingResponse
from openai import BadRequestError from openai import BadRequestError
from pydantic import BaseModel, ValidationError from pydantic import BaseModel, ValidationError
@ -479,17 +478,6 @@ def main(args: argparse.Namespace | None = None):
window_seconds=window_seconds, window_seconds=window_seconds,
) )
# --- CORS middleware for local development ---
# TODO: move to reverse proxy
ui_port = os.environ.get("LLAMA_STACK_UI_PORT", 8322)
app.add_middleware(
CORSMiddleware,
allow_origins=[f"http://localhost:{ui_port}"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
try: try:
impls = asyncio.run(construct_stack(config)) impls = asyncio.run(construct_stack(config))
except InvalidProviderError as e: except InvalidProviderError as e:

View file

@ -0,0 +1,105 @@
import { NextRequest, NextResponse } from "next/server";
// Get backend URL from environment variable or default to localhost for development
const BACKEND_URL =
process.env.LLAMA_STACK_BACKEND_URL ||
`http://localhost:${process.env.LLAMA_STACK_PORT || 8321}`;
async function proxyRequest(request: NextRequest, method: string) {
try {
// Extract the path from the request URL
const url = new URL(request.url);
const pathSegments = url.pathname.split("/");
// Remove /api from the path to get the actual API path
// /api/v1/models/list -> /v1/models/list
const apiPath = pathSegments.slice(2).join("/"); // Remove 'api' segment
const targetUrl = `${BACKEND_URL}/${apiPath}${url.search}`;
console.log(`Proxying ${method} ${url.pathname} -> ${targetUrl}`);
// Prepare headers (exclude host and other problematic headers)
const headers = new Headers();
request.headers.forEach((value, key) => {
// Skip headers that might cause issues in proxy
if (
!["host", "connection", "content-length"].includes(key.toLowerCase())
) {
headers.set(key, value);
}
});
// Prepare the request options
const requestOptions: RequestInit = {
method,
headers,
};
// Add body for methods that support it
if (["POST", "PUT", "PATCH"].includes(method) && request.body) {
requestOptions.body = await request.text();
}
// Make the request to FastAPI backend
const response = await fetch(targetUrl, requestOptions);
// Get response data
const responseText = await response.text();
console.log(
`Response from FastAPI: ${response.status} ${response.statusText}`,
);
// Create response with same status and headers
const proxyResponse = new NextResponse(responseText, {
status: response.status,
statusText: response.statusText,
});
// Copy response headers (except problematic ones)
response.headers.forEach((value, key) => {
if (!["connection", "transfer-encoding"].includes(key.toLowerCase())) {
proxyResponse.headers.set(key, value);
}
});
return proxyResponse;
} catch (error) {
console.error("Proxy request failed:", error);
return NextResponse.json(
{
error: "Proxy request failed",
message: error instanceof Error ? error.message : "Unknown error",
backend_url: BACKEND_URL,
timestamp: new Date().toISOString(),
},
{ status: 500 },
);
}
}
// HTTP method handlers
export async function GET(request: NextRequest) {
return proxyRequest(request, "GET");
}
export async function POST(request: NextRequest) {
return proxyRequest(request, "POST");
}
export async function PUT(request: NextRequest) {
return proxyRequest(request, "PUT");
}
export async function DELETE(request: NextRequest) {
return proxyRequest(request, "DELETE");
}
export async function PATCH(request: NextRequest) {
return proxyRequest(request, "PATCH");
}
export async function OPTIONS(request: NextRequest) {
return proxyRequest(request, "OPTIONS");
}

View file

@ -1,5 +1,6 @@
import LlamaStackClient from "llama-stack-client"; import LlamaStackClient from "llama-stack-client";
export const client = new LlamaStackClient({ export const client = new LlamaStackClient({
baseURL: process.env.NEXT_PUBLIC_LLAMA_STACK_BASE_URL, baseURL:
typeof window !== "undefined" ? `${window.location.origin}/api` : "/api",
}); });