nixpkgs/overlays/podl.sh

   1 #!/usr/bin/env bash
   2 # shellcheck disable=SC1004
   3 # shellcheck disable=SC2016
   4 # shellcheck disable=SC2086
   5 # shellcheck disable=SC2155
   6 # Name: podl - podcast downloader with caching
   7 # Version: 2021-10-22
   8 # Last version: https://git.code.sourcephile.fr/~julm/julm-nix/tree/main/item/nixpkgs/overlays/podl.sh
   9 # Synopsis:
  10 #   $ mkdir LaMéthodeScientifique 3Blue1Brown
  11 #   $ echo >LaMéthodeScientifique/.feed http://radiofrance-podcast.net/podcast09/rss_14312.xml
  12 #   $ echo >3Blue1Brown/.feed https://youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw
  13 #   $ echo >>3Blue1Brown/.feed https://www.youtube.com/feeds/videos.xml?channel_id=UCYO_jab_esuFRV4b17AJtAw
  14 #   $ podl
  15 # Description:
  16 #   podl is a wrapper around yt-dlp(1) <https://yt-dlp.org/>
  17 #   to download podcasts from feeds whose URI(s) you write
  18 #   in ".feed" files in or below the current working directory.
  19 #   The feed formats currently supported are:
  20 #   - RSS
  21 #   - Youtube's Atom
  22 #   - Every input format supported by yt-dlp,
  23 #     when using a ".page" instead of a ".feed" file.
  24 #   It downloads much more quickly than simply
  25 #   running those commands directly on the feed
  26 #   or on each entries of the feed, because
  27 #   to decide whether a podcast has already been downloaded or not,
  28 #   it relies only on the feed's content and on a ".downloaded" file
  29 #   it creates next to those ".feed" files;
  30 #   avoiding the cost of network activity when not necessary.
  31 # Environment:
  32 #   - $SKIP_DOWNLOAD: if set, skip the download command
  33 #     but still register the entry in ".downloaded".
  34 #     Useful when adding the feed if you only want
  35 #     a few entries from the feed:
  36 #     run SKIP_DOWNLOAD=set podl, then edit ".downloaded"
  37 #     to remove the entries you want, then run podl again.
  38 #     This trick does not work with ".page" downloads.
  39 #   - $YT: options passed to yt-dlp.
  40 #   - $XTRACE: if set, enables set -x on the generated commands.
  41 # Files:
  42 #   - ".yt-dlp": optional yt-dlp config,
  43 #     (looked up in parent directories).
  44 #   - ".url.xpath": custom XPath selector for the URL
  45 #     (looked up in parent directories).
  46 # SPDX-License-Identifier: GPL-3.0-or-later
  47 # Bugs: Julien Moutinho <julm+podl@sourcephile.fr>
  48 set -eu
  49
  50 look_up() {
  51   local key=$1; shift
  52   (
  53   while test "$PWD" != / -a "$PWD" != //
  54   do
  55     test ! -f "$key" || {
  56       "$@" "$PWD/$key"
  57       return
  58     }
  59     cd ..
  60   done
  61   )
  62 }
  63
  64 find -H "$@" -type f '(' -name .feed -o -name .page ')' |
  65 sort |
  66 while IFS= read -r found; do
  67   IFS=$(printf ' \n\r')
  68   src="$(readlink -e "$found")"
  69   dst="$(dirname "$found")"
  70   dst="$(readlink -e "$dst")"
  71   export dst
  72   echo >&2 "$dst"
  73   (
  74   cd "$dst"
  75   export YT="$(look_up .yt-dlp printf -- '--config-location %s') ${YT-}"
  76   case $found in
  77    (*/.page)
  78     yt-dlp $YT \
  79      ${SKIP_DOWNLOAD:+--skip-download} \
  80      --download-archive .downloaded \
  81      --batch-file "$src"
  82     ;;
  83    (*/.feed)
  84     feeds=$(sed "$src" \
  85      -e 's@.*youtube\.com/channel/\([^/]\+\).*@https://www.youtube.com/feeds/videos.xml?channel_id=\1@' \
  86      -e 's@.*youtube\.com/user/\([^/]\+\).*@https://www.youtube.com/feeds/videos.xml?user=\1@' \
  87      -e 's@.*youtube\.com.*list=\([^&]\+\).*@https://www.youtube.com/feeds/videos.xml?playlist_id=\1@' \
  88     )
  89     for feed in $feeds; do
  90       (
  91       file=$(mktemp)
  92       trap "rm -f '$file'" EXIT
  93       curl -Ls "$feed" -o "$file"
  94       case $(file --mime-type "$file" | cut -f 2 -d :) in
  95         (' 'text/html)
  96           export html_match_xpath="$(look_up .html.match.xpath cat)"
  97           export html_url_xpath="$(look_up .html.url.xpath cat)"
  98           xml format --html <"$file" 2>/dev/null |
  99           xml select --text \
 100            -t -m "${html_match_xpath:-'//a'}" \
 101            -o "url='" -v "translate(${html_url_xpath:-"@href"},\"'$IFS\",\"’   \")" -o "'" -n \
 102            -o '
 103             test -z "$url" ||
 104             grep -qxF "url $url" .downloaded || {
 105               if test ! "${skip_download:+set}"
 106               then
 107                 yt-dlp $YT "$url"
 108               fi
 109               { flock --exclusive 3
 110                 echo >&3 "url $url"
 111               } 3>>.downloaded
 112             }
 113            ' -n
 114           ;;
 115         (' 'text/xml)
 116           export url_xpath="$(look_up .url.xpath cat)"
 117           xml select <"$file" --text \
 118            -N atom="http://www.w3.org/2005/Atom" \
 119            -N yt="http://www.youtube.com/xml/schemas/2015" \
 120            -N mrss="http://search.yahoo.com/mrss/" \
 121            -t -m "/rss/channel/item" \
 122            -o "title='" -v "translate(translate(title,'\"','_'),\"'/:?&|$IFS\",\"’_____   \")" -o "'" -n \
 123            -o "guid='" -v "translate(translate(guid,'\"','_'),\"'$IFS\",\"’   \")" -o "'" -n \
 124            -o "url='" -v "translate(${url_xpath:-"enclosure[1]/@url"},\"'$IFS\",\"’   \")" -o "'" -n \
 125            -o "published='" -v "translate(pubDate,\"'$IFS\",\"’   \")" -o "'" -n \
 126            -o '
 127             file=${url##*/}
 128             file=${file%%\#*}
 129             file=${file%%\?*}
 130             # remove leading whitespace characters
 131             title="${title#"${title%%[![:space:]]*}"}"
 132             # remove trailing whitespace characters
 133             title="${title%"${title##*[![:space:]]}"}"
 134             test -z "$url" ||
 135             grep -qxF -e "url $url" -e "guid $guid" .downloaded || {
 136               published=$(date +%Y-%m-%d -d "$published")
 137               echo >&2 "$dst/$published - $title"
 138               if test ! "${SKIP_DOWNLOAD:+set}"
 139               then
 140                 yt-dlp $YT \
 141                  --output "$published - ${title//%/%%}.%(ext)s" \
 142                  "$url"
 143               fi
 144               { flock --exclusive 3
 145                 echo >&3 "guid $guid"
 146                 echo >&3 "url $url"
 147               } 3>>.downloaded
 148             }
 149            ' -n -b \
 150            -t -m "/atom:feed/atom:entry[yt:videoId]" \
 151            -o "title='" -v "translate(translate(atom:title,'\"','_'),\"'/:?&|$IFS\",\"’_____   \")" -o "'" -n \
 152            -o "url='" -v "translate(${url_xpath:-"atom:link[@rel='alternate']/@href"},\"'$IFS\",\"’   \")" -o "'" -n \
 153            -o "published='" -v "translate(atom:published,\"'$IFS\",\"’   \")" -o "'" -n \
 154            -o "id='" -v "translate(yt:videoId,\"'$IFS\",\"’   \")" -o "'" -n \
 155            -o '
 156             # remove leading whitespace characters
 157             title="${title#"${title%%[![:space:]]*}"}"
 158             # remove trailing whitespace characters
 159             title="${title%"${title##*[![:space:]]}"}"
 160             grep -qxF "youtube $id" .downloaded || {
 161               published=$(date +%Y-%m-%d -d "$published")
 162               echo >&2 "$dst/$published - $title.$id"
 163               if test "${SKIP_DOWNLOAD:+set}"
 164               then
 165                 { flock --exclusive 3
 166                   echo >&3 "youtube $id"
 167                 } 3>>.downloaded
 168               else
 169                 yt-dlp $YT \
 170                  --download-archive .downloaded \
 171                  --output "$published - ${title//%/%%}.%(id)s.%(format_id)s.%(ext)s" \
 172                  "$url"
 173               fi
 174             }
 175            ';;
 176       esac |
 177       "$SHELL" -seu${XTRACE:+x}
 178       )
 179     done;;
 180   esac
 181   )
 182 done
 183
 184