Je cherchais un moyen de télécharger les vidéos d'une liste de lecture (playlist) Youtube.
N'ayant rien trouvé d'intéressant, j'ai pondu ce super script : yepisode.py ©
Requiert Python 3.6+ ainsi que les modules requests et youtube-dl.
"""
File: yepisodes.py
Download a complete Youtube playlist.
Python 3.6+, requires:
- requests
- youtube-dl
Example:
python yepisodes.py \
"PLBvbiyw33QLlycUu2NL8sk52tBzc1WwV_" \
"pyconfr2013-conf{number:02d}.mkv"
Set up a proxy if videos are blocked in your country.
Some proxies here: http://www.xroxy.com/proxylist.htm
export HTTPS_PROXY="socks4://203.91.118.71:1080"
"""
import os
import pathlib
import re
import sys
import time
from typing import Dict, Generator, Tuple
import requests
import youtube_dl
__version__ = "1.0.1"
YT_URL_LIST = "https://www.youtube.com/embed/videoseries?list={}"
YT_URL_VIDEO = "https://www.youtube.com/watch?v={}"
def download(uid: str, output: pathlib.Path) -> None:
"""Download a video using youtube-dl."""
options = {
"quiet": True,
"format": "bestvideo+bestaudio",
"merge_output_format": output.suffix[1:],
"outtmpl": output.stem,
}
with youtube_dl.YoutubeDL(options) as ydl:
start = time.time()
try:
ydl.download([YT_URL_VIDEO.format(uid)])
except youtube_dl.DownloadError:
print("✗", 'options="' + " ".join(options) + '"')
return
elapsed = time.time() - start
mio = output.stat().st_size / 1024 / 1024 / elapsed
print(f"✓ @ {mio:.2f} Mio/s")
def download_all(playlist: str, fmt: str) -> None:
"""Download all videos from a given playlist."""
for number, idx in find_video_ids(playlist):
output = pathlib.Path(fmt.format(number=number))
if output.exists():
print(output.name, "✓")
continue
print(output.name, end=" ")
sys.stdout.flush()
download(idx, output)
def find_video_ids(playlist: str) -> Generator[Tuple[int, str], None, None]:
"""Find video URLs."""
data = get_html(playlist)
videos = re.findall(r'\\"videoId\\":\\"([^"]+)\\"', data)
seen = set()
number = 1
for video in videos:
if video in seen:
continue
yield number, video
seen.add(video)
number += 1
def get_html(uid: str) -> Dict[str, str]:
"""Find the HTML data from sources of a given playlist URL."""
headers = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) "
"Gecko/20180326 Firefox/59.0.1"
)
}
with requests.get(YT_URL_LIST.format(uid), headers=headers) as req:
return req.text
def main(*args: str) -> int:
"""Main logic."""
try:
download_all(*args[:2])
except ValueError:
print("Required arguments: the playlist ID and the output format")
print(
"Example:",
os.path.basename(sys.executable),
__file__,
'"PLBvbiyw33QLlycUu2NL8sk52tBzc1WwV_"',
'"video-{number:02}.mkv"',
)
return 1
return 0
if __name__ == "__main__":
exit(main(*sys.argv[1:]))
Un exemple afin d'illuster une utilisation :
$ python yepisodes.py "PLBvbiyw33QLlycUu2NL8sk52tBzc1WwV_" "pyconfr2013-conf_{number:02}.mkv"
pyconfr2013-conf_01.mkv ✓ @ 3.25 Mio/s
pyconfr2013-conf_02.mkv ...
Toutes les vidéos de la PyConFR 2013 seront téléchargées dans le dossier courant. Simple comme bonjour !
Historique
- 2021-07-11 : Correction de la gestion des arguments (remplacement du
ValueError
parTypeError
dansmain()
). - 2021-05-12 : Plus besoin de
PhantomJS
ni du moduleobjectpath
.