Skip to main content

Overview

The Unmute frontend is a Next.js application that provides the user interface for voice conversations. It handles microphone input, Opus audio encoding, WebSocket communication, and real-time audio playback. Technology Stack:
  • Framework: Next.js 14 (React 18, TypeScript)
  • Audio: Web Audio API, Opus codec (opus-recorder)
  • WebSocket: Native WebSocket API
  • Styling: Tailwind CSS
  • State Management: React hooks

Application Structure

Location: frontend/src/app/
frontend/src/app/
├── page.tsx              # Root page (renders Unmute component)
├── layout.tsx            # App layout
├── Unmute.tsx            # Main conversation component
├── UnmuteConfigurator.tsx # Voice/character selection
├── UnmuteHeader.tsx      # Header with controls
├── audioUtil.ts          # Audio processing utilities
├── useAudioProcessor.ts  # Audio input hook
├── useBackendServerUrl.ts # Server URL logic
└── ...

Main Component

File: frontend/src/app/Unmute.tsx
export default function Unmute() {
  // WebSocket connection
  const [ws, setWs] = useState<WebSocket | null>(null);
  const [connectionState, setConnectionState] = useState<ConnectionState>('disconnected');
  
  // Voice selection
  const [selectedVoice, setSelectedVoice] = useState<Voice | null>(null);
  
  // Audio state
  const [isMuted, setIsMuted] = useState(false);
  const [audioContext, setAudioContext] = useState<AudioContext | null>(null);
  
  // Subtitles
  const [userTranscript, setUserTranscript] = useState('');
  const [botTranscript, setBotTranscript] = useState('');
  
  // Connect/disconnect
  const connect = useCallback(() => {
    const websocket = new WebSocket(serverUrl, 'realtime');
    
    websocket.onopen = () => {
      setConnectionState('connected');
      sendSessionUpdate(websocket, selectedVoice);
    };
    
    websocket.onmessage = (event) => {
      handleServerMessage(JSON.parse(event.data));
    };
    
    websocket.onerror = () => {
      setConnectionState('error');
    };
    
    websocket.onclose = () => {
      setConnectionState('disconnected');
    };
    
    setWs(websocket);
  }, [selectedVoice, serverUrl]);
  
  return (
    <div>
      <UnmuteHeader onConnect={connect} />
      {connectionState === 'connected' ? (
        <ConversationView />
      ) : (
        <UnmuteConfigurator onSelectVoice={setSelectedVoice} />
      )}
    </div>
  );
}

WebSocket Communication

Connection Setup

const serverUrl = useBackendServerUrl();
const websocket = new WebSocket(serverUrl, 'realtime');

websocket.onopen = () => {
  console.log('Connected to backend');
  
  // Send initial session configuration
  const sessionUpdate = {
    type: 'session.update',
    event_id: generateEventId(),
    session: {
      instructions: selectedVoice.instructions,
      voice: selectedVoice.name,
      allow_recording: allowRecording,
    },
  };
  
  websocket.send(JSON.stringify(sessionUpdate));
};

Server URL Logic

File: frontend/src/app/useBackendServerUrl.ts
export function useBackendServerUrl(): string {
  // In development: use localhost
  if (process.env.NODE_ENV === 'development') {
    return 'ws://localhost:8000/v1/realtime';
  }
  
  // In production: use same host, different protocol
  const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
  const host = window.location.host;
  
  return `${protocol}//${host}/api/v1/realtime`;
}
Why /api/ prefix: Traefik routes /api/* to backend, strips prefix.

Message Handling

websocket.onmessage = (event) => {
  const message = JSON.parse(event.data);
  
  switch (message.type) {
    case 'session.updated':
      console.log('Session configured:', message.session);
      break;
    
    case 'conversation.item.input_audio_transcription.delta':
      // User speech transcription
      setUserTranscript(prev => prev + message.delta);
      break;
    
    case 'input_audio_buffer.speech_stopped':
      // User stopped speaking
      setUserTranscript('');
      break;
    
    case 'response.created':
      // Bot started responding
      setBotTranscript('');
      break;
    
    case 'response.text.delta':
      // Bot speech transcription
      setBotTranscript(prev => prev + message.delta);
      break;
    
    case 'response.audio.delta':
      // Bot audio (base64 Opus)
      playAudio(message.delta);
      break;
    
    case 'response.audio.done':
      // Bot finished speaking
      setBotTranscript('');
      break;
    
    case 'error':
      console.error('Server error:', message.error);
      showError(message.error.message);
      break;
  }
};

Audio Input

Microphone Capture

File: frontend/src/app/useAudioProcessor.ts
export function useAudioProcessor(
  ws: WebSocket | null,
  isMuted: boolean
) {
  const [stream, setStream] = useState<MediaStream | null>(null);
  const recorderRef = useRef<OpusRecorder | null>(null);
  
  useEffect(() => {
    if (!ws || isMuted) return;
    
    // Request microphone access
    navigator.mediaDevices.getUserMedia({ audio: true })
      .then((mediaStream) => {
        setStream(mediaStream);
        
        // Create Opus recorder
        const recorder = new OpusRecorder({
          encoderPath: '/opus-encoder.js',
          sourceNode: audioContext.createMediaStreamSource(mediaStream),
          encoderSampleRate: 24000,
          encoderApplication: 2048, // VOIP
          streamPages: true,
          numberOfChannels: 1,
        });
        
        // Send audio to backend
        recorder.ondataavailable = (typedArray: Uint8Array) => {
          const base64 = arrayBufferToBase64(typedArray.buffer);
          
          ws.send(JSON.stringify({
            type: 'input_audio_buffer.append',
            event_id: generateEventId(),
            audio: base64,
          }));
        };
        
        recorder.start();
        recorderRef.current = recorder;
      })
      .catch((error) => {
        console.error('Microphone access denied:', error);
      });
    
    return () => {
      if (recorderRef.current) {
        recorderRef.current.stop();
      }
      if (stream) {
        stream.getTracks().forEach(track => track.stop());
      }
    };
  }, [ws, isMuted]);
}

Opus Encoding

Library: opus-recorder (WebAssembly Opus encoder) Configuration:
  • Sample rate: 24kHz
  • Channels: Mono
  • Application: VOIP (optimized for speech)
  • Streaming: Pages sent as soon as encoded
Frame Size: ~20ms (determined by Opus encoder)

Base64 Encoding

File: frontend/src/app/audioUtil.ts
export function arrayBufferToBase64(buffer: ArrayBuffer): string {
  const bytes = new Uint8Array(buffer);
  let binary = '';
  
  for (let i = 0; i < bytes.byteLength; i++) {
    binary += String.fromCharCode(bytes[i]);
  }
  
  return btoa(binary);
}

Audio Output

Opus Decoding

import OpusDecoder from 'opus-decoder';

const decoder = new OpusDecoder({
  sampleRate: 24000,
  channels: 1,
});

async function playAudio(base64Opus: string) {
  // 1. Decode base64
  const opusBytes = base64ToArrayBuffer(base64Opus);
  
  // 2. Decode Opus to PCM
  const pcmData = await decoder.decode(new Uint8Array(opusBytes));
  
  // 3. Create AudioBuffer
  const audioBuffer = audioContext.createBuffer(
    1,                    // channels
    pcmData.length,       // length
    24000                 // sample rate
  );
  
  audioBuffer.getChannelData(0).set(pcmData);
  
  // 4. Play
  const source = audioContext.createBufferSource();
  source.buffer = audioBuffer;
  source.connect(audioContext.destination);
  source.start();
}

Audio Queue Management

class AudioQueue {
  private queue: AudioBuffer[] = [];
  private isPlaying: boolean = false;
  
  enqueue(buffer: AudioBuffer) {
    this.queue.push(buffer);
    this.playNext();
  }
  
  private playNext() {
    if (this.isPlaying || this.queue.length === 0) return;
    
    this.isPlaying = true;
    const buffer = this.queue.shift()!;
    
    const source = audioContext.createBufferSource();
    source.buffer = buffer;
    source.connect(audioContext.destination);
    
    source.onended = () => {
      this.isPlaying = false;
      this.playNext();
    };
    
    source.start();
  }
  
  clear() {
    this.queue = [];
  }
}

Voice Configuration

UnmuteConfigurator Component

File: frontend/src/app/UnmuteConfigurator.tsx
export function UnmuteConfigurator({
  onSelectVoice,
}: {
  onSelectVoice: (voice: Voice) => void;
}) {
  const [voices, setVoices] = useState<Voice[]>([]);
  const [loading, setLoading] = useState(true);
  
  useEffect(() => {
    // Fetch available voices
    fetch('/api/v1/voices')
      .then(res => res.json())
      .then(data => {
        setVoices(data);
        setLoading(false);
      });
  }, []);
  
  return (
    <div className="grid grid-cols-2 gap-4">
      {voices.map(voice => (
        <VoiceCard
          key={voice.name}
          voice={voice}
          onClick={() => onSelectVoice(voice)}
        />
      ))}
    </div>
  );
}

Voice Upload

File: frontend/src/app/VoiceUpload.tsx
export function VoiceUpload({
  onVoiceCreated,
}: {
  onVoiceCreated: (voiceName: string) => void;
}) {
  const handleFileUpload = async (file: File) => {
    const formData = new FormData();
    formData.append('file', file);
    
    const response = await fetch('/api/v1/voices', {
      method: 'POST',
      body: formData,
    });
    
    const data = await response.json();
    onVoiceCreated(data.name); // "custom:abc123"
  };
  
  return (
    <input
      type="file"
      accept="audio/*"
      onChange={(e) => {
        const file = e.target.files?.[0];
        if (file) handleFileUpload(file);
      }}
    />
  );
}

Subtitles

SingleRoleSubtitles Component

File: frontend/src/app/SingleRoleSubtitles.tsx
export function SingleRoleSubtitles({
  role,
  text,
}: {
  role: 'user' | 'assistant';
  text: string;
}) {
  return (
    <div
      className={`
        subtitle
        ${role === 'user' ? 'user-subtitle' : 'bot-subtitle'}
      `}
    >
      <span className="role">{role === 'user' ? 'You' : 'Bot'}:</span>
      <span className="text">{text}</span>
    </div>
  );
}

Subtitle Display

<div className="subtitles-container">
  {userTranscript && (
    <SingleRoleSubtitles role="user" text={userTranscript} />
  )}
  {botTranscript && (
    <SingleRoleSubtitles role="assistant" text={botTranscript} />
  )}
</div>

Audio Visualization

useAudioVisualizerCircle Hook

File: frontend/src/app/useAudioVisualizerCircle.ts
export function useAudioVisualizerCircle(
  audioContext: AudioContext | null,
  stream: MediaStream | null
) {
  const canvasRef = useRef<HTMLCanvasElement>(null);
  
  useEffect(() => {
    if (!audioContext || !stream) return;
    
    const analyser = audioContext.createAnalyser();
    const source = audioContext.createMediaStreamSource(stream);
    source.connect(analyser);
    
    analyser.fftSize = 256;
    const bufferLength = analyser.frequencyBinCount;
    const dataArray = new Uint8Array(bufferLength);
    
    const canvas = canvasRef.current!;
    const ctx = canvas.getContext('2d')!;
    
    function draw() {
      requestAnimationFrame(draw);
      
      analyser.getByteFrequencyData(dataArray);
      
      // Draw circle with radius based on volume
      const average = dataArray.reduce((a, b) => a + b) / bufferLength;
      const radius = 50 + (average / 255) * 50;
      
      ctx.clearRect(0, 0, canvas.width, canvas.height);
      ctx.beginPath();
      ctx.arc(
        canvas.width / 2,
        canvas.height / 2,
        radius,
        0,
        2 * Math.PI
      );
      ctx.fillStyle = '#4CAF50';
      ctx.fill();
    }
    
    draw();
  }, [audioContext, stream]);
  
  return canvasRef;
}

Keyboard Shortcuts

File: frontend/src/app/useKeyboardShortcuts.ts
const ALLOW_DEV_MODE = false; // Set to true for debugging

export function useKeyboardShortcuts({
  onToggleSubtitles,
  onToggleDevMode,
}: {
  onToggleSubtitles: () => void;
  onToggleDevMode: () => void;
}) {
  useEffect(() => {
    const handleKeyPress = (event: KeyboardEvent) => {
      if (event.key === 's' || event.key === 'S') {
        onToggleSubtitles();
      }
      
      if (ALLOW_DEV_MODE && (event.key === 'd' || event.key === 'D')) {
        onToggleDevMode();
      }
    };
    
    window.addEventListener('keydown', handleKeyPress);
    return () => window.removeEventListener('keydown', handleKeyPress);
  }, [onToggleSubtitles, onToggleDevMode]);
}
Shortcuts:
  • S: Toggle subtitles
  • D: Toggle dev mode (if enabled)

Wake Lock

File: frontend/src/app/useWakeLock.ts Prevents screen from sleeping during conversation:
export function useWakeLock(isConnected: boolean) {
  const wakeLockRef = useRef<WakeLockSentinel | null>(null);
  
  useEffect(() => {
    if (!isConnected) return;
    
    if ('wakeLock' in navigator) {
      navigator.wakeLock.request('screen')
        .then((wakeLock) => {
          wakeLockRef.current = wakeLock;
        })
        .catch((err) => {
          console.error('Wake lock failed:', err);
        });
    }
    
    return () => {
      if (wakeLockRef.current) {
        wakeLockRef.current.release();
      }
    };
  }, [isConnected]);
}

Deployment

Docker

File: frontend/hot-reloading.Dockerfile
FROM node:20-alpine

WORKDIR /app

# Install pnpm
RUN npm install -g pnpm

# Install dependencies
COPY package.json pnpm-lock.yaml ./
RUN pnpm install

# Copy source
COPY . .

# Development server (hot reload)
CMD ["pnpm", "dev"]

Docker Compose

File: docker-compose.yml:18
frontend:
  image: unmute-frontend:latest
  build:
    context: frontend/
    dockerfile: hot-reloading.Dockerfile
  volumes:
    - ./frontend/src:/app/src  # Hot reload in dev
  labels:
    - "traefik.enable=true"
    - "traefik.http.routers.frontend.rule=PathPrefix(`/`)"
    - "traefik.http.services.frontend.loadbalancer.server.port=3000"

Running Locally

cd frontend

# Install dependencies
pnpm install

# Development server
pnpm dev

# Production build
pnpm build
pnpm start

Error Handling

Connection Errors

websocket.onerror = (error) => {
  console.error('WebSocket error:', error);
  setConnectionState('error');
  setErrorMessage('Failed to connect to server');
};

websocket.onclose = (event) => {
  if (event.code === 1011) {
    setErrorMessage('Server error: ' + event.reason);
  } else {
    setErrorMessage('Connection closed');
  }
  setConnectionState('disconnected');
};

Server Errors

if (message.type === 'error') {
  if (message.error.type === 'fatal') {
    // Fatal error - disconnect
    setErrorMessage(message.error.message);
    ws.close();
  } else if (message.error.type === 'warning') {
    // Warning - show but continue
    showWarning(message.error.message);
  } else {
    // Validation error - log
    console.error('Server error:', message.error);
  }
}

Next Steps

Build docs developers (and LLMs) love