The Recall AI SDK provides powerful transcription capabilities with speaker identification, timing information, and real-time streaming.
Getting transcripts
Retrieve the complete transcript for a recorded meeting:
const transcript = await recall . bot . getTranscript ({
id: 'bot-id' ,
enhanced_diarization: true
});
console . log ( transcript );
Enhanced diarization
Diarization identifies who said what in a conversation. Enhanced diarization provides more accurate speaker identification:
Enhanced (Recommended)
Standard
const transcript = await recall . bot . getTranscript ({
id: 'bot-id' ,
enhanced_diarization: true
});
Enhanced diarization uses advanced AI models to better distinguish between speakers, especially in meetings with similar-sounding voices or overlapping speech.
const transcript = await recall . bot . getTranscript ({
id: 'bot-id' ,
enhanced_diarization: false
});
Standard diarization is faster but may be less accurate with speaker identification.
When to use enhanced diarization
Multiple speakers Meetings with 3+ participants benefit most from enhanced accuracy.
Similar voices When speakers have similar vocal characteristics or accents.
Overlapping speech When participants frequently talk over each other.
Critical accuracy Legal, medical, or compliance scenarios requiring precise attribution.
Transcript structure
Transcripts contain detailed timing and speaker information:
const transcript = await recall . bot . getTranscript ({
id: 'bot-id' ,
enhanced_diarization: true
});
// Example transcript structure:
// {
// words: [
// {
// word: 'Hello',
// speaker: 'speaker_0',
// start_time: 0.5,
// end_time: 0.8,
// confidence: 0.98
// },
// {
// word: 'everyone',
// speaker: 'speaker_0',
// start_time: 0.9,
// end_time: 1.3,
// confidence: 0.95
// },
// ...
// ]
// }
// Process transcript
transcript . words . forEach ( word => {
console . log ( `[ ${ word . start_time } s] ${ word . speaker } : ${ word . word } ` );
});
Configuring transcription providers
Choose your transcription provider when creating a bot:
Assembly AI
Deepgram
Custom settings
const bot = await recall . bot . create ({
meeting_url: 'https://zoom.us/j/123456789' ,
transcription_options: {
provider: 'assembly_ai'
}
});
const bot = await recall . bot . create ({
meeting_url: 'https://zoom.us/j/123456789' ,
transcription_options: {
provider: 'deepgram'
}
});
const bot = await recall . bot . create ({
meeting_url: 'https://zoom.us/j/123456789' ,
transcription_options: {
provider: 'assembly_ai' ,
language_code: 'es' , // Spanish
custom_vocabulary: [ 'API' , 'SDK' , 'webhook' ]
}
});
Real-time transcription
Stream transcripts as the meeting happens:
const bot = await recall . bot . create ({
meeting_url: 'https://zoom.us/j/123456789' ,
real_time_transcription: {
destination_url: 'https://yourapp.com/webhooks/transcript' ,
partial_results: true ,
enhanced_diarization: true
}
});
Real-time transcription parameters
Webhook URL where transcripts will be sent in real-time.
If true, receive interim transcripts before they’re finalized. Useful for live captioning. Default: false
Enable enhanced speaker identification in real-time streams. Default: false
Handling real-time webhooks
Express.js
Next.js API Route
Validation
import express from 'express' ;
const app = express ();
app . use ( express . json ());
app . post ( '/webhooks/transcript' , ( req , res ) => {
const { bot_id , words , is_final } = req . body ;
if ( is_final ) {
// Final transcript segment
console . log ( `[ ${ bot_id } ] Final:` , words );
// Store in database
saveTranscript ( bot_id , words );
} else {
// Partial/interim result
console . log ( `[ ${ bot_id } ] Partial:` , words );
// Send to live captioning system
updateLiveCaptions ( words );
}
res . sendStatus ( 200 );
});
app . listen ( 3000 );
// pages/api/webhooks/transcript.js
export default async function handler ( req , res ) {
if ( req . method !== 'POST' ) {
return res . status ( 405 ). end ();
}
const { bot_id , words , is_final , speaker } = req . body ;
// Process transcript
if ( is_final ) {
await db . transcripts . create ({
bot_id ,
speaker ,
words ,
timestamp: new Date ()
});
}
// Broadcast to connected clients via WebSocket
broadcastTranscript ({ bot_id , words , speaker });
res . status ( 200 ). json ({ received: true });
}
import crypto from 'crypto' ;
function verifyWebhookSignature ( req , secret ) {
const signature = req . headers [ 'x-recall-signature' ];
const body = JSON . stringify ( req . body );
const expectedSignature = crypto
. createHmac ( 'sha256' , secret )
. update ( body )
. digest ( 'hex' );
return signature === expectedSignature ;
}
app . post ( '/webhooks/transcript' , ( req , res ) => {
// Verify webhook authenticity
if ( ! verifyWebhookSignature ( req , process . env . WEBHOOK_SECRET )) {
return res . status ( 401 ). send ( 'Invalid signature' );
}
// Process transcript...
const { bot_id , words } = req . body ;
res . sendStatus ( 200 );
});
Processing transcripts
Group by speaker
function groupBySpeaker ( transcript ) {
const speakers = {};
transcript . words . forEach ( word => {
if ( ! speakers [ word . speaker ]) {
speakers [ word . speaker ] = [];
}
speakers [ word . speaker ]. push ( word . word );
});
// Convert to readable format
return Object . entries ( speakers ). map (([ speaker , words ]) => ({
speaker ,
text: words . join ( ' ' ),
word_count: words . length
}));
}
const transcript = await recall . bot . getTranscript ({
id: 'bot-id' ,
enhanced_diarization: true
});
const grouped = groupBySpeaker ( transcript );
console . log ( grouped );
// [
// { speaker: 'speaker_0', text: 'Hello everyone...', word_count: 45 },
// { speaker: 'speaker_1', text: 'Thanks for joining...', word_count: 38 }
// ]
function formatAsDialogue ( transcript ) {
let dialogue = [];
let currentSpeaker = null ;
let currentText = [];
transcript . words . forEach ( word => {
if ( word . speaker !== currentSpeaker ) {
// New speaker
if ( currentSpeaker !== null ) {
dialogue . push ({
speaker: currentSpeaker ,
text: currentText . join ( ' ' )
});
}
currentSpeaker = word . speaker ;
currentText = [ word . word ];
} else {
currentText . push ( word . word );
}
});
// Add last segment
if ( currentText . length > 0 ) {
dialogue . push ({
speaker: currentSpeaker ,
text: currentText . join ( ' ' )
});
}
return dialogue ;
}
const dialogue = formatAsDialogue ( transcript );
dialogue . forEach ( turn => {
console . log ( ` ${ turn . speaker } : ${ turn . text } ` );
});
function getTimestampedTranscript ( transcript ) {
let segments = [];
let currentSegment = {
start_time: null ,
end_time: null ,
speaker: null ,
words: []
};
transcript . words . forEach ( word => {
if ( word . speaker !== currentSegment . speaker ) {
// Save previous segment
if ( currentSegment . words . length > 0 ) {
segments . push ({
... currentSegment ,
text: currentSegment . words . join ( ' ' )
});
}
// Start new segment
currentSegment = {
start_time: word . start_time ,
end_time: word . end_time ,
speaker: word . speaker ,
words: [ word . word ]
};
} else {
currentSegment . words . push ( word . word );
currentSegment . end_time = word . end_time ;
}
});
// Add last segment
if ( currentSegment . words . length > 0 ) {
segments . push ({
... currentSegment ,
text: currentSegment . words . join ( ' ' )
});
}
return segments ;
}
const timestamped = getTimestampedTranscript ( transcript );
timestamped . forEach ( segment => {
const start = formatTime ( segment . start_time );
const end = formatTime ( segment . end_time );
console . log ( `[ ${ start } - ${ end } ] ${ segment . speaker } : ${ segment . text } ` );
});
function formatTime ( seconds ) {
const mins = Math . floor ( seconds / 60 );
const secs = Math . floor ( seconds % 60 );
return ` ${ mins } : ${ secs . toString (). padStart ( 2 , '0' ) } ` ;
}
Search transcripts
function searchTranscript ( transcript , query ) {
const results = [];
const words = transcript . words ;
const queryLower = query . toLowerCase ();
for ( let i = 0 ; i < words . length ; i ++ ) {
const word = words [ i ];
if ( word . word . toLowerCase (). includes ( queryLower )) {
// Get context (5 words before and after)
const start = Math . max ( 0 , i - 5 );
const end = Math . min ( words . length , i + 6 );
const context = words . slice ( start , end );
results . push ({
timestamp: word . start_time ,
speaker: word . speaker ,
context: context . map ( w => w . word ). join ( ' ' ),
matched_word: word . word
});
}
}
return results ;
}
const transcript = await recall . bot . getTranscript ({ id: 'bot-id' });
const results = searchTranscript ( transcript , 'pricing' );
results . forEach ( result => {
console . log ( `[ ${ formatTime ( result . timestamp ) } ] ${ result . speaker } :` );
console . log ( ` "... ${ result . context } ..." \n ` );
});
Calculate speaker metrics
function calculateSpeakerMetrics ( transcript ) {
const metrics = {};
transcript . words . forEach ( word => {
if ( ! metrics [ word . speaker ]) {
metrics [ word . speaker ] = {
word_count: 0 ,
total_duration: 0 ,
segments: 0
};
}
metrics [ word . speaker ]. word_count ++ ;
metrics [ word . speaker ]. total_duration += ( word . end_time - word . start_time );
});
// Calculate percentages and format
const totalWords = transcript . words . length ;
const totalDuration = transcript . words [ transcript . words . length - 1 ]. end_time ;
return Object . entries ( metrics ). map (([ speaker , data ]) => ({
speaker ,
word_count: data . word_count ,
word_percentage: (( data . word_count / totalWords ) * 100 ). toFixed ( 1 ),
speaking_time: data . total_duration . toFixed ( 1 ),
time_percentage: (( data . total_duration / totalDuration ) * 100 ). toFixed ( 1 )
}));
}
const metrics = calculateSpeakerMetrics ( transcript );
console . log ( 'Speaker Metrics:' );
metrics . forEach ( m => {
console . log ( ` ${ m . speaker } :` );
console . log ( ` Words: ${ m . word_count } ( ${ m . word_percentage } %)` );
console . log ( ` Time: ${ m . speaking_time } s ( ${ m . time_percentage } %)` );
});
Best practices
Use enhanced diarization Enable enhanced diarization for better speaker identification accuracy.
Handle partial results When using real-time transcription, distinguish between partial and final results.
Cache transcripts Store processed transcripts to avoid repeated API calls.
Validate webhooks Always verify webhook signatures for real-time transcription endpoints.
Complete example
import { Recall } from '@recall-ai/sdk' ;
import express from 'express' ;
const recall = new Recall ({
apiKey: process . env . RECALL_API_KEY ,
region: 'us-west-2'
});
const app = express ();
app . use ( express . json ());
// Store real-time transcripts
const liveTranscripts = new Map ();
// Create bot with real-time transcription
async function startRecording ( meetingUrl ) {
const bot = await recall . bot . create ({
meeting_url: meetingUrl ,
bot_name: 'Transcription Bot' ,
transcription_options: {
provider: 'assembly_ai'
},
real_time_transcription: {
destination_url: 'https://yourapp.com/webhooks/transcript' ,
partial_results: true ,
enhanced_diarization: true
}
});
liveTranscripts . set ( bot . id , []);
return bot . id ;
}
// Handle real-time transcript webhooks
app . post ( '/webhooks/transcript' , ( req , res ) => {
const { bot_id , words , is_final , speaker } = req . body ;
if ( is_final ) {
const transcripts = liveTranscripts . get ( bot_id ) || [];
transcripts . push ({ speaker , words , timestamp: Date . now () });
liveTranscripts . set ( bot_id , transcripts );
console . log ( `[ ${ bot_id } ] ${ speaker } : ${ words . map ( w => w . word ). join ( ' ' ) } ` );
}
res . sendStatus ( 200 );
});
// Get complete transcript after meeting
async function getCompleteTranscript ( botId ) {
const transcript = await recall . bot . getTranscript ({
id: botId ,
enhanced_diarization: true
});
// Format as dialogue
const dialogue = formatAsDialogue ( transcript );
// Calculate metrics
const metrics = calculateSpeakerMetrics ( transcript );
return { dialogue , metrics };
}
app . listen ( 3000 , () => {
console . log ( 'Transcription server running on port 3000' );
});