#!/usr/bin/env bash
# shellcheck disable=SC1004
# shellcheck disable=SC2016
# shellcheck disable=SC2064
# shellcheck disable=SC2086
# shellcheck disable=SC2155
# Name: podl - podcast downloader with caching
# Version: 2021-10-22
# Last version: https://git.code.sourcephile.fr/~julm/julm-nix/tree/main/item/nixpkgs/overlays/podl.sh
# Synopsis:
# $ mkdir LaMéthodeScientifique 3Blue1Brown
# $ echo >LaMéthodeScientifique/.feed http://radiofrance-podcast.net/podcast09/rss_14312.xml
# $ echo >3Blue1Brown/.feed https://youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw
# $ echo >>3Blue1Brown/.feed https://www.youtube.com/feeds/videos.xml?channel_id=UCYO_jab_esuFRV4b17AJtAw
# $ podl
# Description:
# podl is a wrapper around yt-dlp(1)
# to download podcasts from feeds whose URI(s) you write
# in ".feed" files in or below the current working directory.
# The feed formats currently supported are:
# - RSS
# - Youtube's Atom
# - Every input format supported by yt-dlp,
# when using a ".page" instead of a ".feed" file.
# It downloads much more quickly than simply
# running those commands directly on the feed
# or on each entries of the feed, because
# to decide whether a podcast has already been downloaded or not,
# it relies only on the feed's content and on a ".downloaded" file
# it creates next to those ".feed" files;
# avoiding the cost of network activity when not necessary.
# Environment:
# - $PODL_SKIP_DOWNLOAD: if set, skip the download command
# but still register the entry in ".downloaded".
# Useful when adding the feed if you only want
# a few entries from the feed:
# run PODL_SKIP_DOWNLOAD=set podl, then edit ".downloaded"
# to remove the entries you want, then run podl again.
# This trick does not work with ".page" downloads.
# - $YT: options passed to yt-dlp.
# - $PODL_XTRACE: if set, enables set -x on the generated commands.
# Files:
# - ".yt-dlp": optional yt-dlp config,
# (looked up in parent directories).
# - ".url.xpath": custom XPath selector for the URL
# (looked up in parent directories).
# SPDX-License-Identifier: GPL-3.0-or-later
# Bugs: Julien Moutinho
set -eu
look_up() {
local key=$1; shift
(
while test "$PWD" != / -a "$PWD" != //
do
test ! -f "$key" || {
"$@" "$PWD/$key"
return
}
cd ..
done
)
}
find -H "$@" -type f '(' -name .feed -o -name .page ')' |
sort |
while IFS= read -r found; do
IFS=$(printf ' \n\r')
src="$(readlink -e "$found")"
dst="$(dirname "$found")"
dst="$(readlink -e "$dst")"
export dst
echo >&2 "$dst"
(
cd "$dst"
export YT="$(look_up .yt-dlp printf -- '--config-location %s') ${YT-}"
case $found in
(*/.page)
yt-dlp $YT \
${PODL_SKIP_DOWNLOAD:+--skip-download} \
--download-archive .downloaded \
--batch-file "$src"
;;
(*/.feed)
feeds=$(sed "$src" \
-e 's@.*youtube\.com/channel/\([^/]\+\).*@https://www.youtube.com/feeds/videos.xml?channel_id=\1@' \
-e 's@.*youtube\.com/user/\([^/]\+\).*@https://www.youtube.com/feeds/videos.xml?user=\1@' \
-e 's@.*youtube\.com.*list=\([^&]\+\).*@https://www.youtube.com/feeds/videos.xml?playlist_id=\1@' \
)
for feed in $feeds; do
(
file=$(mktemp)
trap "rm -f '$file'" EXIT
curl -Ls "$feed" -o "$file"
case $(file --mime-type "$file" | cut -f 2 -d :) in
(' 'text/html)
export html_match_xpath="$(look_up .html.match.xpath cat)"
export html_url_xpath="$(look_up .html.url.xpath cat)"
xml format --html <"$file" 2>/dev/null |
xml select --text \
-t -m "${html_match_xpath:-'//a'}" \
-o "url='" -v "translate(${html_url_xpath:-"@href"},\"'$IFS\",\"’ \")" -o "'" -n \
-o '
test -z "$url" ||
grep -qxF "url $url" .downloaded || {
if test ! "${skip_download:+set}"
then
yt-dlp $YT "$url"
fi
{ flock --exclusive 3
echo >&3 "url $url"
} 3>>.downloaded
}
' -n
;;
(' 'text/xml)
export url_xpath="$(look_up .url.xpath cat)"
xml select <"$file" --text \
-N atom="http://www.w3.org/2005/Atom" \
-N yt="http://www.youtube.com/xml/schemas/2015" \
-N mrss="http://search.yahoo.com/mrss/" \
-t -m "/rss/channel/item" \
-o "title='" -v "translate(translate(title,'\"','_'),\"'/:?&|$IFS\",\"’_____ \")" -o "'" -n \
-o "guid='" -v "translate(translate(guid,'\"','_'),\"'$IFS\",\"’ \")" -o "'" -n \
-o "url='" -v "translate(${url_xpath:-"enclosure[1]/@url"},\"'$IFS\",\"’ \")" -o "'" -n \
-o "published='" -v "translate(pubDate,\"'$IFS\",\"’ \")" -o "'" -n \
-o '
file=${url##*/}
file=${file%%\#*}
file=${file%%\?*}
# remove leading whitespace characters
title="${title#"${title%%[![:space:]]*}"}"
# remove trailing whitespace characters
title="${title%"${title##*[![:space:]]}"}"
test -z "$url" ||
grep -qxF -e "url $url" -e "guid $guid" .downloaded || {
published=$(date +%Y-%m-%d -d "$published")
echo >&2 "$dst/$published - $title"
if test ! "${PODL_SKIP_DOWNLOAD:+set}"
then
yt-dlp $YT \
--output "$published - ${title//%/%%}.%(ext)s" \
"$url"
fi
{ flock --exclusive 3
echo >&3 "guid $guid"
echo >&3 "url $url"
} 3>>.downloaded
}
' -n -b \
-t -m "/atom:feed/atom:entry[yt:videoId]" \
-o "title='" -v "translate(translate(atom:title,'\"','_'),\"'/:?&|$IFS\",\"’_____ \")" -o "'" -n \
-o "url='" -v "translate(${url_xpath:-"atom:link[@rel='alternate']/@href"},\"'$IFS\",\"’ \")" -o "'" -n \
-o "published='" -v "translate(atom:published,\"'$IFS\",\"’ \")" -o "'" -n \
-o "id='" -v "translate(yt:videoId,\"'$IFS\",\"’ \")" -o "'" -n \
-o '
# remove leading whitespace characters
title="${title#"${title%%[![:space:]]*}"}"
# remove trailing whitespace characters
title="${title%"${title##*[![:space:]]}"}"
grep -qxF "youtube $id" .downloaded || {
published=$(date +%Y-%m-%d -d "$published")
echo >&2 "$dst/$published - $title.$id"
if test "${PODL_SKIP_DOWNLOAD:+set}"
then
{ flock --exclusive 3
echo >&3 "youtube $id"
} 3>>.downloaded
else
yt-dlp $YT \
--download-archive .downloaded \
--output "%(release_date>%Y-%m-%d,upload_date>%Y-%m-%d|$published)s - ${title//%/%%}.%(id)s.%(format_id)s.%(ext)s" \
"$url"
fi
}
';;
esac |
"$SHELL" -seu${PODL_XTRACE:+x}
)
done;;
esac
)
done