Épisodes Youtube

Je cherchais un moyen de télécharger les vidéos d'une liste de lecture (playlist) Youtube.
N'ayant rien trouvé d'intéressant, j'ai pondu ce super script : yepisode.py ^©

Requiert Python 3.6+ ainsi que les modules requests et youtube-dl.

"""
File: yepisodes.py

Download a complete Youtube playlist.
Python 3.6+, requires:
    - requests
    - youtube-dl

Example:
    python yepisodes.py \
        "PLBvbiyw33QLlycUu2NL8sk52tBzc1WwV_" \
        "pyconfr2013-conf{number:02d}.mkv"

Set up a proxy if videos are blocked in your country.
Some proxies here: http://www.xroxy.com/proxylist.htm
    export HTTPS_PROXY="socks4://203.91.118.71:1080"
"""

import os
import pathlib
import re
import sys
import time
from typing import Dict, Generator, Tuple

import requests
import youtube_dl

__version__ = "1.0.1"

YT_URL_LIST = "https://www.youtube.com/embed/videoseries?list={}"
YT_URL_VIDEO = "https://www.youtube.com/watch?v={}"


def download(uid: str, output: pathlib.Path) -> None:
    """Download a video using youtube-dl."""

    options = {
        "quiet": True,
        "format": "bestvideo+bestaudio",
        "merge_output_format": output.suffix[1:],
        "outtmpl": output.stem,
    }
    with youtube_dl.YoutubeDL(options) as ydl:
        start = time.time()
        try:
            ydl.download([YT_URL_VIDEO.format(uid)])
        except youtube_dl.DownloadError:
            print("✗", 'options="' + " ".join(options) + '"')
            return
        elapsed = time.time() - start

    mio = output.stat().st_size / 1024 / 1024 / elapsed
    print(f"✓ @ {mio:.2f} Mio/s")


def download_all(playlist: str, fmt: str) -> None:
    """Download all videos from a given playlist."""

    for number, idx in find_video_ids(playlist):
        output = pathlib.Path(fmt.format(number=number))
        if output.exists():
            print(output.name, "✓")
            continue

        print(output.name, end=" ")
        sys.stdout.flush()
        download(idx, output)


def find_video_ids(playlist: str) -> Generator[Tuple[int, str], None, None]:
    """Find video URLs."""

    data = get_html(playlist)
    videos = re.findall(r'\\"videoId\\":\\"([^"]+)\\"', data)
    seen = set()
    number = 1
    for video in videos:
        if video in seen:
            continue
        yield number, video
        seen.add(video)
        number += 1


def get_html(uid: str) -> Dict[str, str]:
    """Find the HTML data from sources of a given playlist URL."""
    headers = {
        "User-Agent": (
            "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) "
            "Gecko/20180326 Firefox/59.0.1"
        )
    }
    with requests.get(YT_URL_LIST.format(uid), headers=headers) as req:
        return req.text


def main(*args: str) -> int:
    """Main logic."""

    try:
        download_all(*args[:2])
    except ValueError:
        print("Required arguments: the playlist ID and the output format")
        print(
            "Example:",
            os.path.basename(sys.executable),
            __file__,
            '"PLBvbiyw33QLlycUu2NL8sk52tBzc1WwV_"',
            '"video-{number:02}.mkv"',
        )
        return 1
    return 0


if __name__ == "__main__":
    exit(main(*sys.argv[1:]))

Un exemple afin d'illuster une utilisation :

$ python yepisodes.py "PLBvbiyw33QLlycUu2NL8sk52tBzc1WwV_" "pyconfr2013-conf_{number:02}.mkv"
pyconfr2013-conf_01.mkv ✓ @ 3.25 Mio/s
pyconfr2013-conf_02.mkv ...

Toutes les vidéos de la PyConFR 2013 seront téléchargées dans le dossier courant. Simple comme bonjour !

Historique

2021-07-11 : Correction de la gestion des arguments (remplacement du ValueError par TypeError dans main()).
2021-05-12 : Plus besoin de PhantomJS ni du module objectpath.

Blog

Luma

Liens

GitHub

Historique