Overview
The Unmute frontend is a Next.js application that provides the user interface for voice conversations. It handles microphone input, Opus audio encoding, WebSocket communication, and real-time audio playback. Technology Stack:- Framework: Next.js 14 (React 18, TypeScript)
- Audio: Web Audio API, Opus codec (opus-recorder)
- WebSocket: Native WebSocket API
- Styling: Tailwind CSS
- State Management: React hooks
Application Structure
Location:frontend/src/app/
frontend/src/app/
├── page.tsx # Root page (renders Unmute component)
├── layout.tsx # App layout
├── Unmute.tsx # Main conversation component
├── UnmuteConfigurator.tsx # Voice/character selection
├── UnmuteHeader.tsx # Header with controls
├── audioUtil.ts # Audio processing utilities
├── useAudioProcessor.ts # Audio input hook
├── useBackendServerUrl.ts # Server URL logic
└── ...
Main Component
File:frontend/src/app/Unmute.tsx
export default function Unmute() {
// WebSocket connection
const [ws, setWs] = useState<WebSocket | null>(null);
const [connectionState, setConnectionState] = useState<ConnectionState>('disconnected');
// Voice selection
const [selectedVoice, setSelectedVoice] = useState<Voice | null>(null);
// Audio state
const [isMuted, setIsMuted] = useState(false);
const [audioContext, setAudioContext] = useState<AudioContext | null>(null);
// Subtitles
const [userTranscript, setUserTranscript] = useState('');
const [botTranscript, setBotTranscript] = useState('');
// Connect/disconnect
const connect = useCallback(() => {
const websocket = new WebSocket(serverUrl, 'realtime');
websocket.onopen = () => {
setConnectionState('connected');
sendSessionUpdate(websocket, selectedVoice);
};
websocket.onmessage = (event) => {
handleServerMessage(JSON.parse(event.data));
};
websocket.onerror = () => {
setConnectionState('error');
};
websocket.onclose = () => {
setConnectionState('disconnected');
};
setWs(websocket);
}, [selectedVoice, serverUrl]);
return (
<div>
<UnmuteHeader onConnect={connect} />
{connectionState === 'connected' ? (
<ConversationView />
) : (
<UnmuteConfigurator onSelectVoice={setSelectedVoice} />
)}
</div>
);
}
WebSocket Communication
Connection Setup
const serverUrl = useBackendServerUrl();
const websocket = new WebSocket(serverUrl, 'realtime');
websocket.onopen = () => {
console.log('Connected to backend');
// Send initial session configuration
const sessionUpdate = {
type: 'session.update',
event_id: generateEventId(),
session: {
instructions: selectedVoice.instructions,
voice: selectedVoice.name,
allow_recording: allowRecording,
},
};
websocket.send(JSON.stringify(sessionUpdate));
};
Server URL Logic
File:frontend/src/app/useBackendServerUrl.ts
export function useBackendServerUrl(): string {
// In development: use localhost
if (process.env.NODE_ENV === 'development') {
return 'ws://localhost:8000/v1/realtime';
}
// In production: use same host, different protocol
const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
const host = window.location.host;
return `${protocol}//${host}/api/v1/realtime`;
}
/api/ prefix: Traefik routes /api/* to backend, strips prefix.
Message Handling
websocket.onmessage = (event) => {
const message = JSON.parse(event.data);
switch (message.type) {
case 'session.updated':
console.log('Session configured:', message.session);
break;
case 'conversation.item.input_audio_transcription.delta':
// User speech transcription
setUserTranscript(prev => prev + message.delta);
break;
case 'input_audio_buffer.speech_stopped':
// User stopped speaking
setUserTranscript('');
break;
case 'response.created':
// Bot started responding
setBotTranscript('');
break;
case 'response.text.delta':
// Bot speech transcription
setBotTranscript(prev => prev + message.delta);
break;
case 'response.audio.delta':
// Bot audio (base64 Opus)
playAudio(message.delta);
break;
case 'response.audio.done':
// Bot finished speaking
setBotTranscript('');
break;
case 'error':
console.error('Server error:', message.error);
showError(message.error.message);
break;
}
};
Audio Input
Microphone Capture
File:frontend/src/app/useAudioProcessor.ts
export function useAudioProcessor(
ws: WebSocket | null,
isMuted: boolean
) {
const [stream, setStream] = useState<MediaStream | null>(null);
const recorderRef = useRef<OpusRecorder | null>(null);
useEffect(() => {
if (!ws || isMuted) return;
// Request microphone access
navigator.mediaDevices.getUserMedia({ audio: true })
.then((mediaStream) => {
setStream(mediaStream);
// Create Opus recorder
const recorder = new OpusRecorder({
encoderPath: '/opus-encoder.js',
sourceNode: audioContext.createMediaStreamSource(mediaStream),
encoderSampleRate: 24000,
encoderApplication: 2048, // VOIP
streamPages: true,
numberOfChannels: 1,
});
// Send audio to backend
recorder.ondataavailable = (typedArray: Uint8Array) => {
const base64 = arrayBufferToBase64(typedArray.buffer);
ws.send(JSON.stringify({
type: 'input_audio_buffer.append',
event_id: generateEventId(),
audio: base64,
}));
};
recorder.start();
recorderRef.current = recorder;
})
.catch((error) => {
console.error('Microphone access denied:', error);
});
return () => {
if (recorderRef.current) {
recorderRef.current.stop();
}
if (stream) {
stream.getTracks().forEach(track => track.stop());
}
};
}, [ws, isMuted]);
}
Opus Encoding
Library:opus-recorder (WebAssembly Opus encoder)
Configuration:
- Sample rate: 24kHz
- Channels: Mono
- Application: VOIP (optimized for speech)
- Streaming: Pages sent as soon as encoded
Base64 Encoding
File:frontend/src/app/audioUtil.ts
export function arrayBufferToBase64(buffer: ArrayBuffer): string {
const bytes = new Uint8Array(buffer);
let binary = '';
for (let i = 0; i < bytes.byteLength; i++) {
binary += String.fromCharCode(bytes[i]);
}
return btoa(binary);
}
Audio Output
Opus Decoding
import OpusDecoder from 'opus-decoder';
const decoder = new OpusDecoder({
sampleRate: 24000,
channels: 1,
});
async function playAudio(base64Opus: string) {
// 1. Decode base64
const opusBytes = base64ToArrayBuffer(base64Opus);
// 2. Decode Opus to PCM
const pcmData = await decoder.decode(new Uint8Array(opusBytes));
// 3. Create AudioBuffer
const audioBuffer = audioContext.createBuffer(
1, // channels
pcmData.length, // length
24000 // sample rate
);
audioBuffer.getChannelData(0).set(pcmData);
// 4. Play
const source = audioContext.createBufferSource();
source.buffer = audioBuffer;
source.connect(audioContext.destination);
source.start();
}
Audio Queue Management
class AudioQueue {
private queue: AudioBuffer[] = [];
private isPlaying: boolean = false;
enqueue(buffer: AudioBuffer) {
this.queue.push(buffer);
this.playNext();
}
private playNext() {
if (this.isPlaying || this.queue.length === 0) return;
this.isPlaying = true;
const buffer = this.queue.shift()!;
const source = audioContext.createBufferSource();
source.buffer = buffer;
source.connect(audioContext.destination);
source.onended = () => {
this.isPlaying = false;
this.playNext();
};
source.start();
}
clear() {
this.queue = [];
}
}
Voice Configuration
UnmuteConfigurator Component
File:frontend/src/app/UnmuteConfigurator.tsx
export function UnmuteConfigurator({
onSelectVoice,
}: {
onSelectVoice: (voice: Voice) => void;
}) {
const [voices, setVoices] = useState<Voice[]>([]);
const [loading, setLoading] = useState(true);
useEffect(() => {
// Fetch available voices
fetch('/api/v1/voices')
.then(res => res.json())
.then(data => {
setVoices(data);
setLoading(false);
});
}, []);
return (
<div className="grid grid-cols-2 gap-4">
{voices.map(voice => (
<VoiceCard
key={voice.name}
voice={voice}
onClick={() => onSelectVoice(voice)}
/>
))}
</div>
);
}
Voice Upload
File:frontend/src/app/VoiceUpload.tsx
export function VoiceUpload({
onVoiceCreated,
}: {
onVoiceCreated: (voiceName: string) => void;
}) {
const handleFileUpload = async (file: File) => {
const formData = new FormData();
formData.append('file', file);
const response = await fetch('/api/v1/voices', {
method: 'POST',
body: formData,
});
const data = await response.json();
onVoiceCreated(data.name); // "custom:abc123"
};
return (
<input
type="file"
accept="audio/*"
onChange={(e) => {
const file = e.target.files?.[0];
if (file) handleFileUpload(file);
}}
/>
);
}
Subtitles
SingleRoleSubtitles Component
File:frontend/src/app/SingleRoleSubtitles.tsx
export function SingleRoleSubtitles({
role,
text,
}: {
role: 'user' | 'assistant';
text: string;
}) {
return (
<div
className={`
subtitle
${role === 'user' ? 'user-subtitle' : 'bot-subtitle'}
`}
>
<span className="role">{role === 'user' ? 'You' : 'Bot'}:</span>
<span className="text">{text}</span>
</div>
);
}
Subtitle Display
<div className="subtitles-container">
{userTranscript && (
<SingleRoleSubtitles role="user" text={userTranscript} />
)}
{botTranscript && (
<SingleRoleSubtitles role="assistant" text={botTranscript} />
)}
</div>
Audio Visualization
useAudioVisualizerCircle Hook
File:frontend/src/app/useAudioVisualizerCircle.ts
export function useAudioVisualizerCircle(
audioContext: AudioContext | null,
stream: MediaStream | null
) {
const canvasRef = useRef<HTMLCanvasElement>(null);
useEffect(() => {
if (!audioContext || !stream) return;
const analyser = audioContext.createAnalyser();
const source = audioContext.createMediaStreamSource(stream);
source.connect(analyser);
analyser.fftSize = 256;
const bufferLength = analyser.frequencyBinCount;
const dataArray = new Uint8Array(bufferLength);
const canvas = canvasRef.current!;
const ctx = canvas.getContext('2d')!;
function draw() {
requestAnimationFrame(draw);
analyser.getByteFrequencyData(dataArray);
// Draw circle with radius based on volume
const average = dataArray.reduce((a, b) => a + b) / bufferLength;
const radius = 50 + (average / 255) * 50;
ctx.clearRect(0, 0, canvas.width, canvas.height);
ctx.beginPath();
ctx.arc(
canvas.width / 2,
canvas.height / 2,
radius,
0,
2 * Math.PI
);
ctx.fillStyle = '#4CAF50';
ctx.fill();
}
draw();
}, [audioContext, stream]);
return canvasRef;
}
Keyboard Shortcuts
File:frontend/src/app/useKeyboardShortcuts.ts
const ALLOW_DEV_MODE = false; // Set to true for debugging
export function useKeyboardShortcuts({
onToggleSubtitles,
onToggleDevMode,
}: {
onToggleSubtitles: () => void;
onToggleDevMode: () => void;
}) {
useEffect(() => {
const handleKeyPress = (event: KeyboardEvent) => {
if (event.key === 's' || event.key === 'S') {
onToggleSubtitles();
}
if (ALLOW_DEV_MODE && (event.key === 'd' || event.key === 'D')) {
onToggleDevMode();
}
};
window.addEventListener('keydown', handleKeyPress);
return () => window.removeEventListener('keydown', handleKeyPress);
}, [onToggleSubtitles, onToggleDevMode]);
}
S: Toggle subtitlesD: Toggle dev mode (if enabled)
Wake Lock
File:frontend/src/app/useWakeLock.ts
Prevents screen from sleeping during conversation:
export function useWakeLock(isConnected: boolean) {
const wakeLockRef = useRef<WakeLockSentinel | null>(null);
useEffect(() => {
if (!isConnected) return;
if ('wakeLock' in navigator) {
navigator.wakeLock.request('screen')
.then((wakeLock) => {
wakeLockRef.current = wakeLock;
})
.catch((err) => {
console.error('Wake lock failed:', err);
});
}
return () => {
if (wakeLockRef.current) {
wakeLockRef.current.release();
}
};
}, [isConnected]);
}
Deployment
Docker
File:frontend/hot-reloading.Dockerfile
FROM node:20-alpine
WORKDIR /app
# Install pnpm
RUN npm install -g pnpm
# Install dependencies
COPY package.json pnpm-lock.yaml ./
RUN pnpm install
# Copy source
COPY . .
# Development server (hot reload)
CMD ["pnpm", "dev"]
Docker Compose
File:docker-compose.yml:18
frontend:
image: unmute-frontend:latest
build:
context: frontend/
dockerfile: hot-reloading.Dockerfile
volumes:
- ./frontend/src:/app/src # Hot reload in dev
labels:
- "traefik.enable=true"
- "traefik.http.routers.frontend.rule=PathPrefix(`/`)"
- "traefik.http.services.frontend.loadbalancer.server.port=3000"
Running Locally
cd frontend
# Install dependencies
pnpm install
# Development server
pnpm dev
# Production build
pnpm build
pnpm start
Error Handling
Connection Errors
websocket.onerror = (error) => {
console.error('WebSocket error:', error);
setConnectionState('error');
setErrorMessage('Failed to connect to server');
};
websocket.onclose = (event) => {
if (event.code === 1011) {
setErrorMessage('Server error: ' + event.reason);
} else {
setErrorMessage('Connection closed');
}
setConnectionState('disconnected');
};
Server Errors
if (message.type === 'error') {
if (message.error.type === 'fatal') {
// Fatal error - disconnect
setErrorMessage(message.error.message);
ws.close();
} else if (message.error.type === 'warning') {
// Warning - show but continue
showWarning(message.error.message);
} else {
// Validation error - log
console.error('Server error:', message.error);
}
}