mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
chore(ui): use proxy server for backend API calls; simplified k8s deployment (#2350)
# What does this PR do? - no more CORS middleware needed ## Test Plan ### Local test llama stack run starter --image-type conda npm run dev verify UI works in browser ### Deploy to k8s temporarily change ui-k8s.yaml.template to load from PR commit <img width="604" alt="image" src="https://github.com/user-attachments/assets/87fa2e52-1e93-4e32-9e0f-5b283b7a37b3" /> sh ./apply.sh $ kubectl get services go to external_ip:8322 and play around with UI <img width="1690" alt="image" src="https://github.com/user-attachments/assets/5b7ec827-4302-4435-a9eb-df423676d873" />
This commit is contained in:
parent
7c1998db25
commit
d96f6ec763
5 changed files with 109 additions and 17 deletions
|
@ -13,8 +13,6 @@ export POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-llamastack}
|
||||||
export INFERENCE_MODEL=${INFERENCE_MODEL:-meta-llama/Llama-3.2-3B-Instruct}
|
export INFERENCE_MODEL=${INFERENCE_MODEL:-meta-llama/Llama-3.2-3B-Instruct}
|
||||||
export SAFETY_MODEL=${SAFETY_MODEL:-meta-llama/Llama-Guard-3-1B}
|
export SAFETY_MODEL=${SAFETY_MODEL:-meta-llama/Llama-Guard-3-1B}
|
||||||
|
|
||||||
export NEXT_PUBLIC_LLAMA_STACK_BASE_URL=${NEXT_PUBLIC_LLAMA_STACK_BASE_URL:-}
|
|
||||||
|
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
set -x
|
set -x
|
||||||
|
|
||||||
|
|
|
@ -22,8 +22,8 @@ spec:
|
||||||
image: node:18-alpine
|
image: node:18-alpine
|
||||||
command: ["/bin/sh"]
|
command: ["/bin/sh"]
|
||||||
env:
|
env:
|
||||||
- name: NEXT_PUBLIC_LLAMA_STACK_BASE_URL
|
- name: LLAMA_STACK_BACKEND_URL
|
||||||
value: ${NEXT_PUBLIC_LLAMA_STACK_BASE_URL}
|
value: "http://llama-stack-service:8321"
|
||||||
- name: LLAMA_STACK_UI_PORT
|
- name: LLAMA_STACK_UI_PORT
|
||||||
value: "8322"
|
value: "8322"
|
||||||
args:
|
args:
|
||||||
|
|
|
@ -26,7 +26,6 @@ from aiohttp import hdrs
|
||||||
from fastapi import Body, FastAPI, HTTPException, Request
|
from fastapi import Body, FastAPI, HTTPException, Request
|
||||||
from fastapi import Path as FastapiPath
|
from fastapi import Path as FastapiPath
|
||||||
from fastapi.exceptions import RequestValidationError
|
from fastapi.exceptions import RequestValidationError
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
|
||||||
from fastapi.responses import JSONResponse, StreamingResponse
|
from fastapi.responses import JSONResponse, StreamingResponse
|
||||||
from openai import BadRequestError
|
from openai import BadRequestError
|
||||||
from pydantic import BaseModel, ValidationError
|
from pydantic import BaseModel, ValidationError
|
||||||
|
@ -479,17 +478,6 @@ def main(args: argparse.Namespace | None = None):
|
||||||
window_seconds=window_seconds,
|
window_seconds=window_seconds,
|
||||||
)
|
)
|
||||||
|
|
||||||
# --- CORS middleware for local development ---
|
|
||||||
# TODO: move to reverse proxy
|
|
||||||
ui_port = os.environ.get("LLAMA_STACK_UI_PORT", 8322)
|
|
||||||
app.add_middleware(
|
|
||||||
CORSMiddleware,
|
|
||||||
allow_origins=[f"http://localhost:{ui_port}"],
|
|
||||||
allow_credentials=True,
|
|
||||||
allow_methods=["*"],
|
|
||||||
allow_headers=["*"],
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
impls = asyncio.run(construct_stack(config))
|
impls = asyncio.run(construct_stack(config))
|
||||||
except InvalidProviderError as e:
|
except InvalidProviderError as e:
|
||||||
|
|
105
llama_stack/ui/app/api/v1/[...path]/route.ts
Normal file
105
llama_stack/ui/app/api/v1/[...path]/route.ts
Normal file
|
@ -0,0 +1,105 @@
|
||||||
|
import { NextRequest, NextResponse } from "next/server";
|
||||||
|
|
||||||
|
// Get backend URL from environment variable or default to localhost for development
|
||||||
|
const BACKEND_URL =
|
||||||
|
process.env.LLAMA_STACK_BACKEND_URL ||
|
||||||
|
`http://localhost:${process.env.LLAMA_STACK_PORT || 8321}`;
|
||||||
|
|
||||||
|
async function proxyRequest(request: NextRequest, method: string) {
|
||||||
|
try {
|
||||||
|
// Extract the path from the request URL
|
||||||
|
const url = new URL(request.url);
|
||||||
|
const pathSegments = url.pathname.split("/");
|
||||||
|
|
||||||
|
// Remove /api from the path to get the actual API path
|
||||||
|
// /api/v1/models/list -> /v1/models/list
|
||||||
|
const apiPath = pathSegments.slice(2).join("/"); // Remove 'api' segment
|
||||||
|
const targetUrl = `${BACKEND_URL}/${apiPath}${url.search}`;
|
||||||
|
|
||||||
|
console.log(`Proxying ${method} ${url.pathname} -> ${targetUrl}`);
|
||||||
|
|
||||||
|
// Prepare headers (exclude host and other problematic headers)
|
||||||
|
const headers = new Headers();
|
||||||
|
request.headers.forEach((value, key) => {
|
||||||
|
// Skip headers that might cause issues in proxy
|
||||||
|
if (
|
||||||
|
!["host", "connection", "content-length"].includes(key.toLowerCase())
|
||||||
|
) {
|
||||||
|
headers.set(key, value);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Prepare the request options
|
||||||
|
const requestOptions: RequestInit = {
|
||||||
|
method,
|
||||||
|
headers,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Add body for methods that support it
|
||||||
|
if (["POST", "PUT", "PATCH"].includes(method) && request.body) {
|
||||||
|
requestOptions.body = await request.text();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make the request to FastAPI backend
|
||||||
|
const response = await fetch(targetUrl, requestOptions);
|
||||||
|
|
||||||
|
// Get response data
|
||||||
|
const responseText = await response.text();
|
||||||
|
|
||||||
|
console.log(
|
||||||
|
`Response from FastAPI: ${response.status} ${response.statusText}`,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Create response with same status and headers
|
||||||
|
const proxyResponse = new NextResponse(responseText, {
|
||||||
|
status: response.status,
|
||||||
|
statusText: response.statusText,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Copy response headers (except problematic ones)
|
||||||
|
response.headers.forEach((value, key) => {
|
||||||
|
if (!["connection", "transfer-encoding"].includes(key.toLowerCase())) {
|
||||||
|
proxyResponse.headers.set(key, value);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return proxyResponse;
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Proxy request failed:", error);
|
||||||
|
|
||||||
|
return NextResponse.json(
|
||||||
|
{
|
||||||
|
error: "Proxy request failed",
|
||||||
|
message: error instanceof Error ? error.message : "Unknown error",
|
||||||
|
backend_url: BACKEND_URL,
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
},
|
||||||
|
{ status: 500 },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// HTTP method handlers
|
||||||
|
export async function GET(request: NextRequest) {
|
||||||
|
return proxyRequest(request, "GET");
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function POST(request: NextRequest) {
|
||||||
|
return proxyRequest(request, "POST");
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function PUT(request: NextRequest) {
|
||||||
|
return proxyRequest(request, "PUT");
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function DELETE(request: NextRequest) {
|
||||||
|
return proxyRequest(request, "DELETE");
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function PATCH(request: NextRequest) {
|
||||||
|
return proxyRequest(request, "PATCH");
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function OPTIONS(request: NextRequest) {
|
||||||
|
return proxyRequest(request, "OPTIONS");
|
||||||
|
}
|
|
@ -1,5 +1,6 @@
|
||||||
import LlamaStackClient from "llama-stack-client";
|
import LlamaStackClient from "llama-stack-client";
|
||||||
|
|
||||||
export const client = new LlamaStackClient({
|
export const client = new LlamaStackClient({
|
||||||
baseURL: process.env.NEXT_PUBLIC_LLAMA_STACK_BASE_URL,
|
baseURL:
|
||||||
|
typeof window !== "undefined" ? `${window.location.origin}/api` : "/api",
|
||||||
});
|
});
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue