From 9e78f2da96e43bbbfab390725b9020871a2820d5 Mon Sep 17 00:00:00 2001 From: Jiayi Ni Date: Fri, 8 Aug 2025 02:27:55 -0700 Subject: [PATCH 1/6] docs: fix the docs for NVIDIA Inference Provider (#3055) # What does this PR do? Fix the NVIDIA inference docs by updating API methods, model IDs, and embedding example. ## Test Plan N/A --- .../distributions/self_hosted_distro/nvidia.md | 2 +- llama_stack/distributions/nvidia/doc_template.md | 2 +- .../providers/remote/inference/nvidia/NVIDIA.md | 16 +++++++++------- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/docs/source/distributions/self_hosted_distro/nvidia.md b/docs/source/distributions/self_hosted_distro/nvidia.md index 6e399e6ce..e845c3c48 100644 --- a/docs/source/distributions/self_hosted_distro/nvidia.md +++ b/docs/source/distributions/self_hosted_distro/nvidia.md @@ -157,7 +157,7 @@ docker run \ If you've set up your local development environment, you can also build the image using your local virtual environment. ```bash -INFERENCE_MODEL=meta-llama/Llama-3.1-8b-Instruct +INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct llama stack build --distro nvidia --image-type venv llama stack run ./run.yaml \ --port 8321 \ diff --git a/llama_stack/distributions/nvidia/doc_template.md b/llama_stack/distributions/nvidia/doc_template.md index 3884e6b51..56e99e523 100644 --- a/llama_stack/distributions/nvidia/doc_template.md +++ b/llama_stack/distributions/nvidia/doc_template.md @@ -129,7 +129,7 @@ docker run \ If you've set up your local development environment, you can also build the image using your local virtual environment. ```bash -INFERENCE_MODEL=meta-llama/Llama-3.1-8b-Instruct +INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct llama stack build --distro nvidia --image-type venv llama stack run ./run.yaml \ --port 8321 \ diff --git a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md index 2505718e0..4a072215c 100644 --- a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md +++ b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md @@ -42,8 +42,8 @@ client.initialize() ### Create Completion ```python -response = client.completion( - model_id="meta-llama/Llama-3.1-8b-Instruct", +response = client.inference.completion( + model_id="meta-llama/Llama-3.1-8B-Instruct", content="Complete the sentence using one word: Roses are red, violets are :", stream=False, sampling_params={ @@ -56,8 +56,8 @@ print(f"Response: {response.content}") ### Create Chat Completion ```python -response = client.chat_completion( - model_id="meta-llama/Llama-3.1-8b-Instruct", +response = client.inference.chat_completion( + model_id="meta-llama/Llama-3.1-8B-Instruct", messages=[ { "role": "system", @@ -78,8 +78,10 @@ print(f"Response: {response.completion_message.content}") ### Create Embeddings ```python -response = client.embeddings( - model_id="meta-llama/Llama-3.1-8b-Instruct", contents=["foo", "bar", "baz"] +response = client.inference.embeddings( + model_id="nvidia/llama-3.2-nv-embedqa-1b-v2", + contents=["What is the capital of France?"], + task_type="query", ) print(f"Embeddings: {response.embeddings}") -``` +``` \ No newline at end of file From 9b70bb9d4b41272b81fd860641d143a7ebc30ebe Mon Sep 17 00:00:00 2001 From: Francisco Arceo Date: Fri, 8 Aug 2025 08:44:06 -0600 Subject: [PATCH 2/6] feat(ui): Adding Vector Store Files to Admin UI (#3041) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? This PR updates the UI to create new: 1. `/files/{file_id}` 2. `files/{file_id}/contents` 3. `files/{file_id}/contents/{content_id}` The list of files are clickable which brings the user to the FIles Detail page The File Details page shows all of the content The content details page shows the individual chunk/content parsed These only use our existing OpenAI compatible APIs. I have a separate branch where I expose the embedding and the portal is correctly populated. I included the FE rendering code for that in this PR. 1. `vector-stores/{vector_store_id}/files/{file_id}` Screenshot 2025-08-06 at 10 20
12 PM 2. `vector-stores/{vector_store_id}/files/{file_id}/contents` Screenshot 2025-08-06 at 10 21
23 PM 3. `vector-stores/{vector_store_id}/files/{file_id}/contents/{content_id}` Screenshot 2025-08-06 at 10 21
45 PM ## Test Plan I tested this locally and reviewed the code. I generated a significant share of the code with Claude and some manual intervention. After this, I'll begin adding tests to the UI. --------- Signed-off-by: Francisco Javier Arceo --- .../[fileId]/contents/[contentId]/page.tsx | 383 ++++++++++++++++++ .../[id]/files/[fileId]/contents/page.tsx | 297 ++++++++++++++ .../[id]/files/[fileId]/page.tsx | 258 ++++++++++++ .../ui/app/logs/vector-stores/layout.tsx | 25 +- .../ui/app/logs/vector-stores/page.tsx | 148 ++++--- .../vector-stores/vector-store-detail.tsx | 17 +- llama_stack/ui/lib/contents-api.ts | 112 +++++ llama_stack/ui/package-lock.json | 10 +- 8 files changed, 1175 insertions(+), 75 deletions(-) create mode 100644 llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.tsx create mode 100644 llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.tsx create mode 100644 llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/page.tsx create mode 100644 llama_stack/ui/lib/contents-api.ts diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.tsx b/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.tsx new file mode 100644 index 000000000..6896b992a --- /dev/null +++ b/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.tsx @@ -0,0 +1,383 @@ +"use client"; + +import { useEffect, useState } from "react"; +import { useParams, useRouter } from "next/navigation"; +import { useAuthClient } from "@/hooks/use-auth-client"; +import { ContentsAPI, VectorStoreContentItem } from "@/lib/contents-api"; +import type { VectorStore } from "llama-stack-client/resources/vector-stores/vector-stores"; +import type { VectorStoreFile } from "llama-stack-client/resources/vector-stores/files"; +import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; +import { Button } from "@/components/ui/button"; +import { Input } from "@/components/ui/input"; +import { Edit, Save, X, Trash2 } from "lucide-react"; +import { + DetailLoadingView, + DetailErrorView, + DetailNotFoundView, + DetailLayout, + PropertiesCard, + PropertyItem, +} from "@/components/layout/detail-layout"; +import { PageBreadcrumb, BreadcrumbSegment } from "@/components/layout/page-breadcrumb"; + +export default function ContentDetailPage() { + const params = useParams(); + const router = useRouter(); + const vectorStoreId = params.id as string; + const fileId = params.fileId as string; + const contentId = params.contentId as string; + const client = useAuthClient(); + + const getTextFromContent = (content: any): string => { + if (typeof content === 'string') { + return content; + } else if (content && content.type === 'text') { + return content.text; + } + return ''; + }; + + const [store, setStore] = useState(null); + const [file, setFile] = useState(null); + const [content, setContent] = useState(null); + const [isLoading, setIsLoading] = useState(true); + const [error, setError] = useState(null); + const [isEditing, setIsEditing] = useState(false); + const [editedContent, setEditedContent] = useState(""); + const [editedMetadata, setEditedMetadata] = useState>({}); + const [isEditingEmbedding, setIsEditingEmbedding] = useState(false); + const [editedEmbedding, setEditedEmbedding] = useState([]); + + useEffect(() => { + if (!vectorStoreId || !fileId || !contentId) return; + + const fetchData = async () => { + setIsLoading(true); + setError(null); + try { + const [storeResponse, fileResponse] = await Promise.all([ + client.vectorStores.retrieve(vectorStoreId), + client.vectorStores.files.retrieve(vectorStoreId, fileId), + ]); + + setStore(storeResponse as VectorStore); + setFile(fileResponse as VectorStoreFile); + + const contentsAPI = new ContentsAPI(client); + const contentsResponse = await contentsAPI.listContents(vectorStoreId, fileId); + const targetContent = contentsResponse.data.find(c => c.id === contentId); + + if (targetContent) { + setContent(targetContent); + setEditedContent(getTextFromContent(targetContent.content)); + setEditedMetadata({ ...targetContent.metadata }); + setEditedEmbedding(targetContent.embedding || []); + } else { + throw new Error(`Content ${contentId} not found`); + } + } catch (err) { + setError(err instanceof Error ? err : new Error("Failed to load content.")); + } finally { + setIsLoading(false); + } + }; + fetchData(); + }, [vectorStoreId, fileId, contentId, client]); + + const handleSave = async () => { + if (!content) return; + + try { + const updates: { content?: string; metadata?: Record } = {}; + + if (editedContent !== getTextFromContent(content.content)) { + updates.content = editedContent; + } + + if (JSON.stringify(editedMetadata) !== JSON.stringify(content.metadata)) { + updates.metadata = editedMetadata; + } + + if (Object.keys(updates).length > 0) { + const contentsAPI = new ContentsAPI(client); + const updatedContent = await contentsAPI.updateContent(vectorStoreId, fileId, contentId, updates); + setContent(updatedContent); + } + + setIsEditing(false); + } catch (err) { + console.error('Failed to update content:', err); + } + }; + + const handleDelete = async () => { + if (!confirm('Are you sure you want to delete this content?')) return; + + try { + const contentsAPI = new ContentsAPI(client); + await contentsAPI.deleteContent(vectorStoreId, fileId, contentId); + router.push(`/logs/vector-stores/${vectorStoreId}/files/${fileId}/contents`); + } catch (err) { + console.error('Failed to delete content:', err); + } + }; + + const handleCancel = () => { + setEditedContent(content ? getTextFromContent(content.content) : ""); + setEditedMetadata({ ...content?.metadata }); + setEditedEmbedding(content?.embedding || []); + setIsEditing(false); + setIsEditingEmbedding(false); + }; + + const title = `Content: ${contentId}`; + + const breadcrumbSegments: BreadcrumbSegment[] = [ + { label: "Vector Stores", href: "/logs/vector-stores" }, + { label: store?.name || vectorStoreId, href: `/logs/vector-stores/${vectorStoreId}` }, + { label: "Files", href: `/logs/vector-stores/${vectorStoreId}` }, + { label: fileId, href: `/logs/vector-stores/${vectorStoreId}/files/${fileId}` }, + { label: "Contents", href: `/logs/vector-stores/${vectorStoreId}/files/${fileId}/contents` }, + { label: contentId }, + ]; + + if (error) { + return ; + } + if (isLoading) { + return ; + } + if (!content) { + return ; + } + + const mainContent = ( + <> + + + Content +
+ {isEditing ? ( + <> + + + + ) : ( + <> + + + + )} +
+
+ + {isEditing ? ( +