mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
chore(ui): use proxy server for backend API calls; simplified k8s deployment (#2350)
# What does this PR do? - no more CORS middleware needed ## Test Plan ### Local test llama stack run starter --image-type conda npm run dev verify UI works in browser ### Deploy to k8s temporarily change ui-k8s.yaml.template to load from PR commit <img width="604" alt="image" src="https://github.com/user-attachments/assets/87fa2e52-1e93-4e32-9e0f-5b283b7a37b3" /> sh ./apply.sh $ kubectl get services go to external_ip:8322 and play around with UI <img width="1690" alt="image" src="https://github.com/user-attachments/assets/5b7ec827-4302-4435-a9eb-df423676d873" />
This commit is contained in:
parent
7c1998db25
commit
d96f6ec763
5 changed files with 109 additions and 17 deletions
|
@ -26,7 +26,6 @@ from aiohttp import hdrs
|
|||
from fastapi import Body, FastAPI, HTTPException, Request
|
||||
from fastapi import Path as FastapiPath
|
||||
from fastapi.exceptions import RequestValidationError
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse, StreamingResponse
|
||||
from openai import BadRequestError
|
||||
from pydantic import BaseModel, ValidationError
|
||||
|
@ -479,17 +478,6 @@ def main(args: argparse.Namespace | None = None):
|
|||
window_seconds=window_seconds,
|
||||
)
|
||||
|
||||
# --- CORS middleware for local development ---
|
||||
# TODO: move to reverse proxy
|
||||
ui_port = os.environ.get("LLAMA_STACK_UI_PORT", 8322)
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=[f"http://localhost:{ui_port}"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
try:
|
||||
impls = asyncio.run(construct_stack(config))
|
||||
except InvalidProviderError as e:
|
||||
|
|
105
llama_stack/ui/app/api/v1/[...path]/route.ts
Normal file
105
llama_stack/ui/app/api/v1/[...path]/route.ts
Normal file
|
@ -0,0 +1,105 @@
|
|||
import { NextRequest, NextResponse } from "next/server";
|
||||
|
||||
// Get backend URL from environment variable or default to localhost for development
|
||||
const BACKEND_URL =
|
||||
process.env.LLAMA_STACK_BACKEND_URL ||
|
||||
`http://localhost:${process.env.LLAMA_STACK_PORT || 8321}`;
|
||||
|
||||
async function proxyRequest(request: NextRequest, method: string) {
|
||||
try {
|
||||
// Extract the path from the request URL
|
||||
const url = new URL(request.url);
|
||||
const pathSegments = url.pathname.split("/");
|
||||
|
||||
// Remove /api from the path to get the actual API path
|
||||
// /api/v1/models/list -> /v1/models/list
|
||||
const apiPath = pathSegments.slice(2).join("/"); // Remove 'api' segment
|
||||
const targetUrl = `${BACKEND_URL}/${apiPath}${url.search}`;
|
||||
|
||||
console.log(`Proxying ${method} ${url.pathname} -> ${targetUrl}`);
|
||||
|
||||
// Prepare headers (exclude host and other problematic headers)
|
||||
const headers = new Headers();
|
||||
request.headers.forEach((value, key) => {
|
||||
// Skip headers that might cause issues in proxy
|
||||
if (
|
||||
!["host", "connection", "content-length"].includes(key.toLowerCase())
|
||||
) {
|
||||
headers.set(key, value);
|
||||
}
|
||||
});
|
||||
|
||||
// Prepare the request options
|
||||
const requestOptions: RequestInit = {
|
||||
method,
|
||||
headers,
|
||||
};
|
||||
|
||||
// Add body for methods that support it
|
||||
if (["POST", "PUT", "PATCH"].includes(method) && request.body) {
|
||||
requestOptions.body = await request.text();
|
||||
}
|
||||
|
||||
// Make the request to FastAPI backend
|
||||
const response = await fetch(targetUrl, requestOptions);
|
||||
|
||||
// Get response data
|
||||
const responseText = await response.text();
|
||||
|
||||
console.log(
|
||||
`Response from FastAPI: ${response.status} ${response.statusText}`,
|
||||
);
|
||||
|
||||
// Create response with same status and headers
|
||||
const proxyResponse = new NextResponse(responseText, {
|
||||
status: response.status,
|
||||
statusText: response.statusText,
|
||||
});
|
||||
|
||||
// Copy response headers (except problematic ones)
|
||||
response.headers.forEach((value, key) => {
|
||||
if (!["connection", "transfer-encoding"].includes(key.toLowerCase())) {
|
||||
proxyResponse.headers.set(key, value);
|
||||
}
|
||||
});
|
||||
|
||||
return proxyResponse;
|
||||
} catch (error) {
|
||||
console.error("Proxy request failed:", error);
|
||||
|
||||
return NextResponse.json(
|
||||
{
|
||||
error: "Proxy request failed",
|
||||
message: error instanceof Error ? error.message : "Unknown error",
|
||||
backend_url: BACKEND_URL,
|
||||
timestamp: new Date().toISOString(),
|
||||
},
|
||||
{ status: 500 },
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// HTTP method handlers
|
||||
export async function GET(request: NextRequest) {
|
||||
return proxyRequest(request, "GET");
|
||||
}
|
||||
|
||||
export async function POST(request: NextRequest) {
|
||||
return proxyRequest(request, "POST");
|
||||
}
|
||||
|
||||
export async function PUT(request: NextRequest) {
|
||||
return proxyRequest(request, "PUT");
|
||||
}
|
||||
|
||||
export async function DELETE(request: NextRequest) {
|
||||
return proxyRequest(request, "DELETE");
|
||||
}
|
||||
|
||||
export async function PATCH(request: NextRequest) {
|
||||
return proxyRequest(request, "PATCH");
|
||||
}
|
||||
|
||||
export async function OPTIONS(request: NextRequest) {
|
||||
return proxyRequest(request, "OPTIONS");
|
||||
}
|
|
@ -1,5 +1,6 @@
|
|||
import LlamaStackClient from "llama-stack-client";
|
||||
|
||||
export const client = new LlamaStackClient({
|
||||
baseURL: process.env.NEXT_PUBLIC_LLAMA_STACK_BASE_URL,
|
||||
baseURL:
|
||||
typeof window !== "undefined" ? `${window.location.origin}/api` : "/api",
|
||||
});
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue