nixpkgs/overlays/podl.sh

   1 #!/bin/bash
   2 # shellcheck disable=SC1004
   3 # shellcheck disable=SC2016
   4 # shellcheck disable=SC2086
   5 # shellcheck disable=SC2155
   6 # Name: podl - podcast downloader with caching
   7 # Version: 2021-10-22
   8 # Last version: https://git.sourcephile.fr/julm/home-julm.git/blob_plain/HEAD:/.config/nixpkgs/overlays/podl.sh
   9 # Synopsis:
  10 #   $ mkdir LaMéthodeScientifique 3Blue1Brown
  11 #   $ echo http://radiofrance-podcast.net/podcast09/rss_14312.xml >LaMéthodeScientifique/.feed
  12 #   $ echo https://youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw >3Blue1Brown/.feed
  13 #   $ podl
  14 # Description:
  15 #   podl is a wrapper around yt-dlp(1) <https://yt-dlp.org/>
  16 #   to download podcasts from feeds whose URI(s) you write
  17 #   in ".feed" files in or below the current working directory.
  18 #   The feed formats currently supported are:
  19 #   - RSS
  20 #   - Youtube's Atom
  21 #   - Every input format supported by yt-dlp,
  22 #     when using a ".page" instead of a ".feed" file.
  23 #   It downloads much more quickly than simply
  24 #   running those commands directly on the feed
  25 #   or on each entries of the feed, because
  26 #   to decide whether a podcast has already been downloaded or not,
  27 #   it relies only on the feed's content and on a ".downloaded" file
  28 #   it creates next to those ".feed" files;
  29 #   avoiding the cost of network activity when not necessary.
  30 # Environment:
  31 #   - $SKIP_DOWNLOAD: if set, skip the download command
  32 #     but still register the entry in ".downloaded".
  33 #     Useful when adding the feed if you only want
  34 #     a few entries from the feed:
  35 #     run SKIP_DOWNLOAD=set podl, then edit ".downloaded"
  36 #     to remove the entries you want, then run podl again.
  37 #     This trick does not work with ".page" downloads.
  38 #   - $YT: options passed to yt-dlp.
  39 #   - $XTRACE: if set, enables set -x on the generated commands.
  40 # Files:
  41 #   - ".yt-dlp": optional yt-dlp config,
  42 #     (looked up in parent directories).
  43 #   - ".url.xpath": custom XPath selector for the URL
  44 #     (looked up in parent directories).
  45 # License: GNU GPLv3+
  46 # Bugs: Julien Moutinho <julm+podl@sourcephile.fr>
  47 set -eu
  48
  49 look_up() {
  50   local key=$1; shift
  51   (
  52   while test "$PWD" != / -a "$PWD" != //
  53   do
  54     test ! -f "$key" || {
  55       "$@" "$PWD/$key"
  56       return
  57     }
  58     cd ..
  59   done
  60   )
  61 }
  62
  63 find -H "$@" -type f '(' -name .feed -o -name .page ')' |
  64 sort |
  65 while IFS= read -r found; do
  66   IFS=$(printf ' \n\r')
  67   src="$(readlink -e "$found")"
  68   dst="$(dirname "$found")"
  69   dst="$(readlink -e "$dst")"
  70   export dst
  71   echo >&2 "$dst"
  72   (
  73   cd "$dst"
  74   export YT="$(look_up .yt-dlp printf -- '--config-location %s') ${YT-}"
  75   case $found in
  76    (*/.page)
  77     yt-dlp $YT \
  78      ${SKIP_DOWNLOAD:+--skip-download} \
  79      --download-archive .downloaded \
  80      --batch-file "$src"
  81     ;;
  82    (*/.feed)
  83     feeds=$(sed "$src" \
  84      -e 's@.*youtube\.com/channel/\([^/]\+\).*@https://www.youtube.com/feeds/videos.xml?channel_id=\1@' \
  85      -e 's@.*youtube\.com/user/\([^/]\+\).*@https://www.youtube.com/feeds/videos.xml?user=\1@' \
  86      -e 's@.*youtube\.com.*list=\([^&]\+\).*@https://www.youtube.com/feeds/videos.xml?playlist_id=\1@' \
  87     )
  88     for feed in $feeds; do
  89       export url_xpath="$(look_up .url.xpath cat)"
  90       curl -Ls "$feed" |
  91       xml select -T \
  92        -N atom="http://www.w3.org/2005/Atom" \
  93        -N yt="http://www.youtube.com/xml/schemas/2015" \
  94        -N mrss="http://search.yahoo.com/mrss/" \
  95        -t -m "/rss/channel/item" \
  96        -o "title='" -v "translate(translate(title,'\"','_'),\"'/:?&|$IFS\",\"’_____   \")" -o "'" -n \
  97        -o "guid='" -v "translate(translate(guid,'\"','_'),\"'$IFS\",\"’   \")" -o "'" -n \
  98        -o "url='" -v "translate(${url_xpath:-"enclosure[1]/@url"},\"'$IFS\",\"’   \")" -o "'" -n \
  99        -o "published='" -v "translate(pubDate,\"'$IFS\",\"’   \")" -o "'" -n \
 100        -o '
 101         file=${url##*/}
 102         file=${file%%\#*}
 103         file=${file%%\?*}
 104         # remove leading whitespace characters
 105         title="${title#"${title%%[![:space:]]*}"}"
 106         # remove trailing whitespace characters
 107         title="${title%"${title##*[![:space:]]}"}"
 108         test -z "$url" ||
 109         grep -qxF -e "url $url" -e "guid $guid" .downloaded || {
 110           published=$(date +%Y-%m-%d -d "$published")
 111           echo >&2 "$dst/$published - $title"
 112           if test ! "${SKIP_DOWNLOAD:+set}"
 113           then
 114             yt-dlp $YT \
 115              --output "$published - ${title//%/%%}.%(ext)s" \
 116              "$url"
 117           fi
 118           { flock --exclusive 3
 119             echo >&3 "guid $guid"
 120             echo >&3 "url $url"
 121           } 3>>.downloaded
 122         }
 123        ' -n -b \
 124        -t -m "/atom:feed/atom:entry[yt:videoId]" \
 125        -o "title='" -v "translate(translate(atom:title,'\"','_'),\"'/:?&|$IFS\",\"’_____   \")" -o "'" -n \
 126        -o "url='" -v "translate(${url_xpath:-"atom:link[@rel='alternate']/@href"},\"'$IFS\",\"’   \")" -o "'" -n \
 127        -o "published='" -v "translate(atom:published,\"'$IFS\",\"’   \")" -o "'" -n \
 128        -o "id='" -v "translate(yt:videoId,\"'$IFS\",\"’   \")" -o "'" -n \
 129        -o '
 130         # remove leading whitespace characters
 131         title="${title#"${title%%[![:space:]]*}"}"
 132         # remove trailing whitespace characters
 133         title="${title%"${title##*[![:space:]]}"}"
 134         grep -qxF "youtube $id" .downloaded || {
 135           published=$(date +%Y-%m-%d -d "$published")
 136           echo >&2 "$dst/$published - $title.$id"
 137           if test "${SKIP_DOWNLOAD:+set}"
 138           then
 139             { flock --exclusive 3
 140               echo >&3 "youtube $id"
 141             } 3>>.downloaded
 142           else
 143             yt-dlp $YT \
 144              --download-archive .downloaded \
 145              --output "$published - ${title//%/%%}.%(id)s.%(format_id)s.%(ext)s" \
 146              "$url"
 147           fi
 148         }
 149        ' |
 150       "$SHELL" -seu${XTRACE:+x}
 151     done;;
 152   esac
 153   )
 154 done