#!/bin/bash
# shellcheck disable=SC1004
# shellcheck disable=SC2016
# shellcheck disable=SC2086
# shellcheck disable=SC2155
# Name: podl - podcast downloader with caching
# Version: 2021-10-22
# Last version: https://git.code.sourcephile.fr/~julm/julm-nix/tree/main/item/nixpkgs/overlays/podl.sh
# Synopsis:
#   $ mkdir LaMéthodeScientifique 3Blue1Brown
#   $ echo >LaMéthodeScientifique/.feed http://radiofrance-podcast.net/podcast09/rss_14312.xml
#   $ echo >3Blue1Brown/.feed https://youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw
#   $ echo >>3Blue1Brown/.feed https://www.youtube.com/feeds/videos.xml?channel_id=UCYO_jab_esuFRV4b17AJtAw
#   $ podl
# Description:
#   podl is a wrapper around yt-dlp(1) <https://yt-dlp.org/>
#   to download podcasts from feeds whose URI(s) you write
#   in ".feed" files in or below the current working directory.
#   The feed formats currently supported are:
#   - RSS
#   - Youtube's Atom
#   - Every input format supported by yt-dlp,
#     when using a ".page" instead of a ".feed" file.
#   It downloads much more quickly than simply
#   running those commands directly on the feed
#   or on each entries of the feed, because
#   to decide whether a podcast has already been downloaded or not,
#   it relies only on the feed's content and on a ".downloaded" file
#   it creates next to those ".feed" files;
#   avoiding the cost of network activity when not necessary.
# Environment:
#   - $SKIP_DOWNLOAD: if set, skip the download command
#     but still register the entry in ".downloaded".
#     Useful when adding the feed if you only want
#     a few entries from the feed:
#     run SKIP_DOWNLOAD=set podl, then edit ".downloaded"
#     to remove the entries you want, then run podl again.
#     This trick does not work with ".page" downloads.
#   - $YT: options passed to yt-dlp.
#   - $XTRACE: if set, enables set -x on the generated commands.
# Files:
#   - ".yt-dlp": optional yt-dlp config,
#     (looked up in parent directories).
#   - ".url.xpath": custom XPath selector for the URL
#     (looked up in parent directories).
# SPDX-License-Identifier: GPL-3.0-or-later
# Bugs: Julien Moutinho <julm+podl@sourcephile.fr>
set -eu

look_up() {
  local key=$1; shift
  (
  while test "$PWD" != / -a "$PWD" != //
  do
    test ! -f "$key" || {
      "$@" "$PWD/$key"
      return
    }
    cd ..
  done
  )
}

find -H "$@" -type f '(' -name .feed -o -name .page ')' |
sort |
while IFS= read -r found; do
  IFS=$(printf ' \n\r')
  src="$(readlink -e "$found")"
  dst="$(dirname "$found")"
  dst="$(readlink -e "$dst")"
  export dst
  echo >&2 "$dst"
  (
  cd "$dst"
  export YT="$(look_up .yt-dlp printf -- '--config-location %s') ${YT-}"
  case $found in
   (*/.page)
    yt-dlp $YT \
     ${SKIP_DOWNLOAD:+--skip-download} \
     --download-archive .downloaded \
     --batch-file "$src"
    ;;
   (*/.feed)
    feeds=$(sed "$src" \
     -e 's@.*youtube\.com/channel/\([^/]\+\).*@https://www.youtube.com/feeds/videos.xml?channel_id=\1@' \
     -e 's@.*youtube\.com/user/\([^/]\+\).*@https://www.youtube.com/feeds/videos.xml?user=\1@' \
     -e 's@.*youtube\.com.*list=\([^&]\+\).*@https://www.youtube.com/feeds/videos.xml?playlist_id=\1@' \
    )
    for feed in $feeds; do
      export url_xpath="$(look_up .url.xpath cat)"
      curl -Ls "$feed" |
      xml select -T \
       -N atom="http://www.w3.org/2005/Atom" \
       -N yt="http://www.youtube.com/xml/schemas/2015" \
       -N mrss="http://search.yahoo.com/mrss/" \
       -t -m "/rss/channel/item" \
       -o "title='" -v "translate(translate(title,'\"','_'),\"'/:?&|$IFS\",\"’_____   \")" -o "'" -n \
       -o "guid='" -v "translate(translate(guid,'\"','_'),\"'$IFS\",\"’   \")" -o "'" -n \
       -o "url='" -v "translate(${url_xpath:-"enclosure[1]/@url"},\"'$IFS\",\"’   \")" -o "'" -n \
       -o "published='" -v "translate(pubDate,\"'$IFS\",\"’   \")" -o "'" -n \
       -o '
        file=${url##*/}
        file=${file%%\#*}
        file=${file%%\?*}
        # remove leading whitespace characters
        title="${title#"${title%%[![:space:]]*}"}"
        # remove trailing whitespace characters
        title="${title%"${title##*[![:space:]]}"}"
        test -z "$url" ||
        grep -qxF -e "url $url" -e "guid $guid" .downloaded || {
          published=$(date +%Y-%m-%d -d "$published")
          echo >&2 "$dst/$published - $title"
          if test ! "${SKIP_DOWNLOAD:+set}"
          then
            yt-dlp $YT \
             --output "$published - ${title//%/%%}.%(ext)s" \
             "$url"
          fi
          { flock --exclusive 3
            echo >&3 "guid $guid"
            echo >&3 "url $url"
          } 3>>.downloaded
        }
       ' -n -b \
       -t -m "/atom:feed/atom:entry[yt:videoId]" \
       -o "title='" -v "translate(translate(atom:title,'\"','_'),\"'/:?&|$IFS\",\"’_____   \")" -o "'" -n \
       -o "url='" -v "translate(${url_xpath:-"atom:link[@rel='alternate']/@href"},\"'$IFS\",\"’   \")" -o "'" -n \
       -o "published='" -v "translate(atom:published,\"'$IFS\",\"’   \")" -o "'" -n \
       -o "id='" -v "translate(yt:videoId,\"'$IFS\",\"’   \")" -o "'" -n \
       -o '
        # remove leading whitespace characters
        title="${title#"${title%%[![:space:]]*}"}"
        # remove trailing whitespace characters
        title="${title%"${title##*[![:space:]]}"}"
        grep -qxF "youtube $id" .downloaded || {
          published=$(date +%Y-%m-%d -d "$published")
          echo >&2 "$dst/$published - $title.$id"
          if test "${SKIP_DOWNLOAD:+set}"
          then
            { flock --exclusive 3
              echo >&3 "youtube $id"
            } 3>>.downloaded
          else
            yt-dlp $YT \
             --download-archive .downloaded \
             --output "$published - ${title//%/%%}.%(id)s.%(format_id)s.%(ext)s" \
             "$url"
          fi
        }
       ' |
      "$SHELL" -seu${XTRACE:+x}
    done;;
  esac
  )
done