Skip to main content
The Recall AI SDK provides powerful transcription capabilities with speaker identification, timing information, and real-time streaming.

Getting transcripts

Retrieve the complete transcript for a recorded meeting:
const transcript = await recall.bot.getTranscript({ 
  id: 'bot-id',
  enhanced_diarization: true 
});

console.log(transcript);

Enhanced diarization

Diarization identifies who said what in a conversation. Enhanced diarization provides more accurate speaker identification:

When to use enhanced diarization

Multiple speakers

Meetings with 3+ participants benefit most from enhanced accuracy.

Similar voices

When speakers have similar vocal characteristics or accents.

Overlapping speech

When participants frequently talk over each other.

Critical accuracy

Legal, medical, or compliance scenarios requiring precise attribution.

Transcript structure

Transcripts contain detailed timing and speaker information:
const transcript = await recall.bot.getTranscript({ 
  id: 'bot-id',
  enhanced_diarization: true 
});

// Example transcript structure:
// {
//   words: [
//     {
//       word: 'Hello',
//       speaker: 'speaker_0',
//       start_time: 0.5,
//       end_time: 0.8,
//       confidence: 0.98
//     },
//     {
//       word: 'everyone',
//       speaker: 'speaker_0',
//       start_time: 0.9,
//       end_time: 1.3,
//       confidence: 0.95
//     },
//     ...
//   ]
// }

// Process transcript
transcript.words.forEach(word => {
  console.log(`[${word.start_time}s] ${word.speaker}: ${word.word}`);
});

Configuring transcription providers

Choose your transcription provider when creating a bot:
const bot = await recall.bot.create({
  meeting_url: 'https://zoom.us/j/123456789',
  transcription_options: {
    provider: 'assembly_ai'
  }
});

Real-time transcription

Stream transcripts as the meeting happens:

Configure real-time streaming

const bot = await recall.bot.create({
  meeting_url: 'https://zoom.us/j/123456789',
  real_time_transcription: {
    destination_url: 'https://yourapp.com/webhooks/transcript',
    partial_results: true,
    enhanced_diarization: true
  }
});

Real-time transcription parameters

destination_url
string
required
Webhook URL where transcripts will be sent in real-time.
partial_results
boolean
If true, receive interim transcripts before they’re finalized. Useful for live captioning.Default: false
enhanced_diarization
boolean
Enable enhanced speaker identification in real-time streams.Default: false

Handling real-time webhooks

import express from 'express';

const app = express();
app.use(express.json());

app.post('/webhooks/transcript', (req, res) => {
  const { bot_id, words, is_final } = req.body;
  
  if (is_final) {
    // Final transcript segment
    console.log(`[${bot_id}] Final:`, words);
    
    // Store in database
    saveTranscript(bot_id, words);
  } else {
    // Partial/interim result
    console.log(`[${bot_id}] Partial:`, words);
    
    // Send to live captioning system
    updateLiveCaptions(words);
  }
  
  res.sendStatus(200);
});

app.listen(3000);

Processing transcripts

Group by speaker

function groupBySpeaker(transcript) {
  const speakers = {};
  
  transcript.words.forEach(word => {
    if (!speakers[word.speaker]) {
      speakers[word.speaker] = [];
    }
    speakers[word.speaker].push(word.word);
  });
  
  // Convert to readable format
  return Object.entries(speakers).map(([speaker, words]) => ({
    speaker,
    text: words.join(' '),
    word_count: words.length
  }));
}

const transcript = await recall.bot.getTranscript({ 
  id: 'bot-id',
  enhanced_diarization: true 
});

const grouped = groupBySpeaker(transcript);
console.log(grouped);
// [
//   { speaker: 'speaker_0', text: 'Hello everyone...', word_count: 45 },
//   { speaker: 'speaker_1', text: 'Thanks for joining...', word_count: 38 }
// ]

Format as dialogue

function formatAsDialogue(transcript) {
  let dialogue = [];
  let currentSpeaker = null;
  let currentText = [];
  
  transcript.words.forEach(word => {
    if (word.speaker !== currentSpeaker) {
      // New speaker
      if (currentSpeaker !== null) {
        dialogue.push({
          speaker: currentSpeaker,
          text: currentText.join(' ')
        });
      }
      currentSpeaker = word.speaker;
      currentText = [word.word];
    } else {
      currentText.push(word.word);
    }
  });
  
  // Add last segment
  if (currentText.length > 0) {
    dialogue.push({
      speaker: currentSpeaker,
      text: currentText.join(' ')
    });
  }
  
  return dialogue;
}

const dialogue = formatAsDialogue(transcript);
dialogue.forEach(turn => {
  console.log(`${turn.speaker}: ${turn.text}`);
});

Extract timestamps

function getTimestampedTranscript(transcript) {
  let segments = [];
  let currentSegment = {
    start_time: null,
    end_time: null,
    speaker: null,
    words: []
  };
  
  transcript.words.forEach(word => {
    if (word.speaker !== currentSegment.speaker) {
      // Save previous segment
      if (currentSegment.words.length > 0) {
        segments.push({
          ...currentSegment,
          text: currentSegment.words.join(' ')
        });
      }
      
      // Start new segment
      currentSegment = {
        start_time: word.start_time,
        end_time: word.end_time,
        speaker: word.speaker,
        words: [word.word]
      };
    } else {
      currentSegment.words.push(word.word);
      currentSegment.end_time = word.end_time;
    }
  });
  
  // Add last segment
  if (currentSegment.words.length > 0) {
    segments.push({
      ...currentSegment,
      text: currentSegment.words.join(' ')
    });
  }
  
  return segments;
}

const timestamped = getTimestampedTranscript(transcript);
timestamped.forEach(segment => {
  const start = formatTime(segment.start_time);
  const end = formatTime(segment.end_time);
  console.log(`[${start} - ${end}] ${segment.speaker}: ${segment.text}`);
});

function formatTime(seconds) {
  const mins = Math.floor(seconds / 60);
  const secs = Math.floor(seconds % 60);
  return `${mins}:${secs.toString().padStart(2, '0')}`;
}

Search transcripts

function searchTranscript(transcript, query) {
  const results = [];
  const words = transcript.words;
  const queryLower = query.toLowerCase();
  
  for (let i = 0; i < words.length; i++) {
    const word = words[i];
    if (word.word.toLowerCase().includes(queryLower)) {
      // Get context (5 words before and after)
      const start = Math.max(0, i - 5);
      const end = Math.min(words.length, i + 6);
      const context = words.slice(start, end);
      
      results.push({
        timestamp: word.start_time,
        speaker: word.speaker,
        context: context.map(w => w.word).join(' '),
        matched_word: word.word
      });
    }
  }
  
  return results;
}

const transcript = await recall.bot.getTranscript({ id: 'bot-id' });
const results = searchTranscript(transcript, 'pricing');

results.forEach(result => {
  console.log(`[${formatTime(result.timestamp)}] ${result.speaker}:`);
  console.log(`  "...${result.context}..."\n`);
});

Calculate speaker metrics

function calculateSpeakerMetrics(transcript) {
  const metrics = {};
  
  transcript.words.forEach(word => {
    if (!metrics[word.speaker]) {
      metrics[word.speaker] = {
        word_count: 0,
        total_duration: 0,
        segments: 0
      };
    }
    
    metrics[word.speaker].word_count++;
    metrics[word.speaker].total_duration += (word.end_time - word.start_time);
  });
  
  // Calculate percentages and format
  const totalWords = transcript.words.length;
  const totalDuration = transcript.words[transcript.words.length - 1].end_time;
  
  return Object.entries(metrics).map(([speaker, data]) => ({
    speaker,
    word_count: data.word_count,
    word_percentage: ((data.word_count / totalWords) * 100).toFixed(1),
    speaking_time: data.total_duration.toFixed(1),
    time_percentage: ((data.total_duration / totalDuration) * 100).toFixed(1)
  }));
}

const metrics = calculateSpeakerMetrics(transcript);
console.log('Speaker Metrics:');
metrics.forEach(m => {
  console.log(`${m.speaker}:`);
  console.log(`  Words: ${m.word_count} (${m.word_percentage}%)`);
  console.log(`  Time: ${m.speaking_time}s (${m.time_percentage}%)`);
});

Best practices

Use enhanced diarization

Enable enhanced diarization for better speaker identification accuracy.

Handle partial results

When using real-time transcription, distinguish between partial and final results.

Cache transcripts

Store processed transcripts to avoid repeated API calls.

Validate webhooks

Always verify webhook signatures for real-time transcription endpoints.

Complete example

import { Recall } from '@recall-ai/sdk';
import express from 'express';

const recall = new Recall({
  apiKey: process.env.RECALL_API_KEY,
  region: 'us-west-2'
});

const app = express();
app.use(express.json());

// Store real-time transcripts
const liveTranscripts = new Map();

// Create bot with real-time transcription
async function startRecording(meetingUrl) {
  const bot = await recall.bot.create({
    meeting_url: meetingUrl,
    bot_name: 'Transcription Bot',
    transcription_options: {
      provider: 'assembly_ai'
    },
    real_time_transcription: {
      destination_url: 'https://yourapp.com/webhooks/transcript',
      partial_results: true,
      enhanced_diarization: true
    }
  });
  
  liveTranscripts.set(bot.id, []);
  return bot.id;
}

// Handle real-time transcript webhooks
app.post('/webhooks/transcript', (req, res) => {
  const { bot_id, words, is_final, speaker } = req.body;
  
  if (is_final) {
    const transcripts = liveTranscripts.get(bot_id) || [];
    transcripts.push({ speaker, words, timestamp: Date.now() });
    liveTranscripts.set(bot_id, transcripts);
    
    console.log(`[${bot_id}] ${speaker}: ${words.map(w => w.word).join(' ')}`);
  }
  
  res.sendStatus(200);
});

// Get complete transcript after meeting
async function getCompleteTranscript(botId) {
  const transcript = await recall.bot.getTranscript({ 
    id: botId,
    enhanced_diarization: true 
  });
  
  // Format as dialogue
  const dialogue = formatAsDialogue(transcript);
  
  // Calculate metrics
  const metrics = calculateSpeakerMetrics(transcript);
  
  return { dialogue, metrics };
}

app.listen(3000, () => {
  console.log('Transcription server running on port 3000');
});

Build docs developers (and LLMs) love