The Podcast Agent provides a comprehensive suite of tools for processing podcast videos, including automatic transcription with speaker identification, AI-assisted video editing, and semantic knowledge base creation from podcast content.
from podcast_agent.geminivideo import process_video# Process a single videooutput_path = process_video("path/to/podcast.mp4")print(f"Transcript saved to: {output_path}")
The transcription tool uses visual analysis to identify speakers:
podcast_agent/geminivideo.py
prompt = f"""The name of this podcast is The Rollup. There are two hosts:- Andy: Light blonde, curly hair, longer on top with wave, light complexion.- Rob: Short dark hair, slightly receding, light to medium skin, short beard.Any other speakers are guests.Transcribe this interview, identify speakers, and return JSON format:[ {{ "speaker": "Speaker Name", "content": "What they said" }}]"""
Customize the speaker descriptions in the prompt to match your podcast’s hosts and guests for accurate identification.
Transcripts are saved as JSON files in the jsonoutputs/ directory:
[ { "speaker": "Andy", "content": "Welcome to The Rollup! Today we're discussing..." }, { "speaker": "Rob", "content": "Thanks for having me. I'm excited to talk about..." }, { "speaker": "Guest", "content": "It's great to be here. Let me share some insights on..." }]
from podcast_agent.geminivideo import mainimport os# Process all videos in a directoryvideo_dir = "split_videos"video_files = [ os.path.join(video_dir, f) for f in os.listdir(video_dir) if f.lower().endswith(('.mov', '.mp4', '.avi', '.mkv', '.webm'))]for video_path in video_files: try: process_video(video_path) except Exception as e: print(f"Failed to process {video_path}: {str(e)}")
from podcast_agent.aiagenteditor import process_video# Process video with custom instructionsoutput_path = process_video( videopath="podcast_episode.mp4", custom_instructions="Focus on technical discussions, remove pauses longer than 2 seconds")if output_path: print(f"Edited video saved to: {output_path}")
# Example 1: Technical content focusprocess_video( "tech_podcast.mp4", custom_instructions="Keep all technical discussions, remove casual banter")# Example 2: Pacing improvementprocess_video( "interview.mp4", custom_instructions="Remove pauses longer than 3 seconds, keep all Q&A segments")# Example 3: Highlight reelprocess_video( "long_episode.mp4", custom_instructions="Extract only the most insightful moments and key takeaways")
from podcast_agent.podcast_knowledge_base import PodcastKnowledgeBase# Initialize with persistent storagekb = PodcastKnowledgeBase(collection_name="podcast_knowledge")# Process all JSON transcriptskb.process_all_json_files(directory="jsonoutputs/")
# Query the knowledge baseresults = kb.query_knowledge_base( query="What did they say about machine learning?", n_results=5)# Format and display resultsformatted = kb.format_query_results(results)print(formatted)
# Complex semantic searchqueries = [ "What are the challenges in blockchain scalability?", "How does NFT technology work?", "What did guests say about DeFi adoption?"]for query in queries: print(f"\n=== Query: {query} ===") results = kb.query_knowledge_base(query, n_results=3) for i, result in enumerate(results, 1): print(f"\n{i}. [{result['metadata']['speaker']}] " f"(Score: {result['relevance_score']:.2f})") print(f" {result['content'][:200]}...")
# Clear the entire collectionkb.clear_collection()# Re-process all fileskb.process_all_json_files()# Get processed file listprocessed_files = kb.get_processed_files()print(f"Knowledge base contains {len(processed_files)} transcripts")