llama-stack-mirror/llama_stack/ui/hooks/use-audio-recording.ts
Francisco Arceo f3d5459647
feat(UI): adding MVP playground UI (#2828)
# What does this PR do?
I've been tinkering a little with a simple chat playground in the UI, so
I'm opening the PR with what's kind of a WIP.

If you look at the first commit, that includes the big part of the
changes. The rest of the files changed come from adding installing the
`shadcn` components.

Note this is missing a lot; e.g.,
- sessions
- document upload
- audio (the shadcn components install these by default from
https://shadcn-chatbot-kit.vercel.app/docs/components/chat)

I still need to wire up a lot more to make it actually fully functional
but it does basic chat using the LS Typescript Client.

Basic demo: 

<img width="1329" height="1430" alt="Image"
src="https://github.com/user-attachments/assets/917a2096-36d4-4925-b83b-f1f2cda98698"
/>

<img width="1319" height="1424" alt="Image"
src="https://github.com/user-attachments/assets/fab1583b-1c72-4bf3-baf2-405aee13c6bb"
/>


<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->

---------

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
2025-07-30 19:44:16 -07:00

93 lines
2.6 KiB
TypeScript

import { useEffect, useRef, useState } from "react"
import { recordAudio } from "@/lib/audio-utils"
interface UseAudioRecordingOptions {
transcribeAudio?: (blob: Blob) => Promise<string>
onTranscriptionComplete?: (text: string) => void
}
export function useAudioRecording({
transcribeAudio,
onTranscriptionComplete,
}: UseAudioRecordingOptions) {
const [isListening, setIsListening] = useState(false)
const [isSpeechSupported, setIsSpeechSupported] = useState(!!transcribeAudio)
const [isRecording, setIsRecording] = useState(false)
const [isTranscribing, setIsTranscribing] = useState(false)
const [audioStream, setAudioStream] = useState<MediaStream | null>(null)
const activeRecordingRef = useRef<any>(null)
useEffect(() => {
const checkSpeechSupport = async () => {
const hasMediaDevices = !!(
navigator.mediaDevices && navigator.mediaDevices.getUserMedia
)
setIsSpeechSupported(hasMediaDevices && !!transcribeAudio)
}
checkSpeechSupport()
}, [transcribeAudio])
const stopRecording = async () => {
setIsRecording(false)
setIsTranscribing(true)
try {
// First stop the recording to get the final blob
recordAudio.stop()
// Wait for the recording promise to resolve with the final blob
const recording = await activeRecordingRef.current
if (transcribeAudio) {
const text = await transcribeAudio(recording)
onTranscriptionComplete?.(text)
}
} catch (error) {
console.error("Error transcribing audio:", error)
} finally {
setIsTranscribing(false)
setIsListening(false)
if (audioStream) {
audioStream.getTracks().forEach((track) => track.stop())
setAudioStream(null)
}
activeRecordingRef.current = null
}
}
const toggleListening = async () => {
if (!isListening) {
try {
setIsListening(true)
setIsRecording(true)
// Get audio stream first
const stream = await navigator.mediaDevices.getUserMedia({
audio: true,
})
setAudioStream(stream)
// Start recording with the stream
activeRecordingRef.current = recordAudio(stream)
} catch (error) {
console.error("Error recording audio:", error)
setIsListening(false)
setIsRecording(false)
if (audioStream) {
audioStream.getTracks().forEach((track) => track.stop())
setAudioStream(null)
}
}
} else {
await stopRecording()
}
}
return {
isListening,
isSpeechSupported,
isRecording,
isTranscribing,
audioStream,
toggleListening,
stopRecording,
}
}