yt-dlp makes the best effort to be a good command-line program and should be callable from any programming language.
Your program should avoid parsing the normal stdout since they may change in future versions. Instead, use options such as -J, --print, --progress-template, --exec etc. to create console output that you can reliably reproduce and parse.
If you are porting code from youtube-dl to yt-dlp, note that we do not guarantee the return value of YoutubeDL.extract_info to be JSON serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through YoutubeDL.sanitize_info as shown in the examples below.
import jsonimport yt_dlpURL = 'https://www.youtube.com/watch?v=BaW_jenozKc'# ℹ️ See help(yt_dlp.YoutubeDL) for a list of available options and public functionsydl_opts = {}with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(URL, download=False) # ℹ️ ydl.sanitize_info makes the info json-serializable print(json.dumps(ydl.sanitize_info(info)))
import yt_dlpURLS = ['https://www.youtube.com/watch?v=BaW_jenozKc']ydl_opts = { 'format': 'm4a/bestaudio/best', # ℹ️ See help(yt_dlp.postprocessor) for a list of available Postprocessors and their arguments 'postprocessors': [{ # Extract audio using ffmpeg 'key': 'FFmpegExtractAudio', 'preferredcodec': 'm4a', }]}with yt_dlp.YoutubeDL(ydl_opts) as ydl: error_code = ydl.download(URLS)
Download only videos that match specific criteria:
import yt_dlpURLS = ['https://www.youtube.com/watch?v=BaW_jenozKc']def longer_than_a_minute(info, *, incomplete): """Download only videos longer than a minute (or with unknown duration)""" duration = info.get('duration') if duration and duration < 60: return 'The video is too short'ydl_opts = { 'match_filter': longer_than_a_minute,}with yt_dlp.YoutubeDL(ydl_opts) as ydl: error_code = ydl.download(URLS)
import yt_dlpURLS = ['https://www.youtube.com/watch?v=BaW_jenozKc']class MyLogger: def debug(self, msg): # For compatibility with youtube-dl, both debug and info are passed into debug # You can distinguish them by the prefix '[debug] ' if msg.startswith('[debug] '): pass else: self.info(msg) def info(self, msg): pass def warning(self, msg): pass def error(self, msg): print(msg)# ℹ️ See "progress_hooks" in help(yt_dlp.YoutubeDL)def my_hook(d): if d['status'] == 'finished': print('Done downloading, now post-processing ...')ydl_opts = { 'logger': MyLogger(), 'progress_hooks': [my_hook],}with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download(URLS)
import yt_dlpURLS = ['https://www.youtube.com/watch?v=BaW_jenozKc']# ℹ️ See help(yt_dlp.postprocessor.PostProcessor)class MyCustomPP(yt_dlp.postprocessor.PostProcessor): def run(self, info): self.to_screen('Doing stuff') return [], infowith yt_dlp.YoutubeDL() as ydl: # ℹ️ "when" can take any value in yt_dlp.utils.POSTPROCESS_WHEN ydl.add_post_processor(MyCustomPP(), when='pre_process') ydl.download(URLS)
import yt_dlpURLS = ['https://www.youtube.com/watch?v=BaW_jenozKc']def format_selector(ctx): """ Select the best video and the best audio that won't result in an mkv. NOTE: This is just an example and does not handle all cases """ # formats are already sorted worst to best formats = ctx.get('formats')[::-1] # acodec='none' means there is no audio best_video = next(f for f in formats if f['vcodec'] != 'none' and f['acodec'] == 'none') # find compatible audio extension audio_ext = {'mp4': 'm4a', 'webm': 'webm'}[best_video['ext']] # vcodec='none' means there is no video best_audio = next(f for f in formats if ( f['acodec'] != 'none' and f['vcodec'] == 'none' and f['ext'] == audio_ext)) # These are the minimum required fields for a merged format yield { 'format_id': f'{best_video["format_id"]}+{best_audio["format_id"]}', 'ext': best_video['ext'], 'requested_formats': [best_video, best_audio], # Must be + separated list of protocols 'protocol': f'{best_video["protocol"]}+{best_audio["protocol"]}' }ydl_opts = { 'format': format_selector,}with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download(URLS)
The return value of YoutubeDL.extract_info is not guaranteed to be JSON serializable or a standard dictionary in yt-dlp. Always use sanitize_info() if you need serializable output.