#!/bin/bash # shellcheck disable=SC1004 # shellcheck disable=SC2016 # shellcheck disable=SC2086 # shellcheck disable=SC2155 # Name: podl - podcast downloader with caching # Version: 2021-10-22 # Last version: https://git.code.sourcephile.fr/~julm/julm-nix/tree/main/item/nixpkgs/overlays/podl.sh # Synopsis: # $ mkdir LaMéthodeScientifique 3Blue1Brown # $ echo >LaMéthodeScientifique/.feed http://radiofrance-podcast.net/podcast09/rss_14312.xml # $ echo >3Blue1Brown/.feed https://youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw # $ echo >>3Blue1Brown/.feed https://www.youtube.com/feeds/videos.xml?channel_id=UCYO_jab_esuFRV4b17AJtAw # $ podl # Description: # podl is a wrapper around yt-dlp(1) # to download podcasts from feeds whose URI(s) you write # in ".feed" files in or below the current working directory. # The feed formats currently supported are: # - RSS # - Youtube's Atom # - Every input format supported by yt-dlp, # when using a ".page" instead of a ".feed" file. # It downloads much more quickly than simply # running those commands directly on the feed # or on each entries of the feed, because # to decide whether a podcast has already been downloaded or not, # it relies only on the feed's content and on a ".downloaded" file # it creates next to those ".feed" files; # avoiding the cost of network activity when not necessary. # Environment: # - $SKIP_DOWNLOAD: if set, skip the download command # but still register the entry in ".downloaded". # Useful when adding the feed if you only want # a few entries from the feed: # run SKIP_DOWNLOAD=set podl, then edit ".downloaded" # to remove the entries you want, then run podl again. # This trick does not work with ".page" downloads. # - $YT: options passed to yt-dlp. # - $XTRACE: if set, enables set -x on the generated commands. # Files: # - ".yt-dlp": optional yt-dlp config, # (looked up in parent directories). # - ".url.xpath": custom XPath selector for the URL # (looked up in parent directories). # SPDX-License-Identifier: GPL-3.0-or-later # Bugs: Julien Moutinho set -eu look_up() { local key=$1; shift ( while test "$PWD" != / -a "$PWD" != // do test ! -f "$key" || { "$@" "$PWD/$key" return } cd .. done ) } find -H "$@" -type f '(' -name .feed -o -name .page ')' | sort | while IFS= read -r found; do IFS=$(printf ' \n\r') src="$(readlink -e "$found")" dst="$(dirname "$found")" dst="$(readlink -e "$dst")" export dst echo >&2 "$dst" ( cd "$dst" export YT="$(look_up .yt-dlp printf -- '--config-location %s') ${YT-}" case $found in (*/.page) yt-dlp $YT \ ${SKIP_DOWNLOAD:+--skip-download} \ --download-archive .downloaded \ --batch-file "$src" ;; (*/.feed) feeds=$(sed "$src" \ -e 's@.*youtube\.com/channel/\([^/]\+\).*@https://www.youtube.com/feeds/videos.xml?channel_id=\1@' \ -e 's@.*youtube\.com/user/\([^/]\+\).*@https://www.youtube.com/feeds/videos.xml?user=\1@' \ -e 's@.*youtube\.com.*list=\([^&]\+\).*@https://www.youtube.com/feeds/videos.xml?playlist_id=\1@' \ ) for feed in $feeds; do export url_xpath="$(look_up .url.xpath cat)" curl -Ls "$feed" | xml select -T \ -N atom="http://www.w3.org/2005/Atom" \ -N yt="http://www.youtube.com/xml/schemas/2015" \ -N mrss="http://search.yahoo.com/mrss/" \ -t -m "/rss/channel/item" \ -o "title='" -v "translate(translate(title,'\"','_'),\"'/:?&|$IFS\",\"’_____ \")" -o "'" -n \ -o "guid='" -v "translate(translate(guid,'\"','_'),\"'$IFS\",\"’ \")" -o "'" -n \ -o "url='" -v "translate(${url_xpath:-"enclosure[1]/@url"},\"'$IFS\",\"’ \")" -o "'" -n \ -o "published='" -v "translate(pubDate,\"'$IFS\",\"’ \")" -o "'" -n \ -o ' file=${url##*/} file=${file%%\#*} file=${file%%\?*} # remove leading whitespace characters title="${title#"${title%%[![:space:]]*}"}" # remove trailing whitespace characters title="${title%"${title##*[![:space:]]}"}" test -z "$url" || grep -qxF -e "url $url" -e "guid $guid" .downloaded || { published=$(date +%Y-%m-%d -d "$published") echo >&2 "$dst/$published - $title" if test ! "${SKIP_DOWNLOAD:+set}" then yt-dlp $YT \ --output "$published - ${title//%/%%}.%(ext)s" \ "$url" fi { flock --exclusive 3 echo >&3 "guid $guid" echo >&3 "url $url" } 3>>.downloaded } ' -n -b \ -t -m "/atom:feed/atom:entry[yt:videoId]" \ -o "title='" -v "translate(translate(atom:title,'\"','_'),\"'/:?&|$IFS\",\"’_____ \")" -o "'" -n \ -o "url='" -v "translate(${url_xpath:-"atom:link[@rel='alternate']/@href"},\"'$IFS\",\"’ \")" -o "'" -n \ -o "published='" -v "translate(atom:published,\"'$IFS\",\"’ \")" -o "'" -n \ -o "id='" -v "translate(yt:videoId,\"'$IFS\",\"’ \")" -o "'" -n \ -o ' # remove leading whitespace characters title="${title#"${title%%[![:space:]]*}"}" # remove trailing whitespace characters title="${title%"${title##*[![:space:]]}"}" grep -qxF "youtube $id" .downloaded || { published=$(date +%Y-%m-%d -d "$published") echo >&2 "$dst/$published - $title.$id" if test "${SKIP_DOWNLOAD:+set}" then { flock --exclusive 3 echo >&3 "youtube $id" } 3>>.downloaded else yt-dlp $YT \ --download-archive .downloaded \ --output "$published - ${title//%/%%}.%(id)s.%(format_id)s.%(ext)s" \ "$url" fi } ' | "$SHELL" -seu${XTRACE:+x} done;; esac ) done