]>
Git — Sourcephile - julm/julm-nix.git/blob - nixpkgs/overlays/podl.sh
2 # shellcheck disable=SC1004
3 # shellcheck disable=SC2016
4 # shellcheck disable=SC2086
5 # shellcheck disable=SC2155
6 # Name: podl - podcast downloader with caching
8 # Last version: https://git.code.sourcephile.fr/~julm/julm-nix/tree/main/item/nixpkgs/overlays/podl.sh
10 # $ mkdir LaMéthodeScientifique 3Blue1Brown
11 # $ echo >LaMéthodeScientifique/.feed http://radiofrance-podcast.net/podcast09/rss_14312.xml
12 # $ echo >3Blue1Brown/.feed https://youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw
13 # $ echo >>3Blue1Brown/.feed https://www.youtube.com/feeds/videos.xml?channel_id=UCYO_jab_esuFRV4b17AJtAw
16 # podl is a wrapper around yt-dlp(1) <https://yt-dlp.org/>
17 # to download podcasts from feeds whose URI(s) you write
18 # in ".feed" files in or below the current working directory.
19 # The feed formats currently supported are:
22 # - Every input format supported by yt-dlp,
23 # when using a ".page" instead of a ".feed" file.
24 # It downloads much more quickly than simply
25 # running those commands directly on the feed
26 # or on each entries of the feed, because
27 # to decide whether a podcast has already been downloaded or not,
28 # it relies only on the feed's content and on a ".downloaded" file
29 # it creates next to those ".feed" files;
30 # avoiding the cost of network activity when not necessary.
32 # - $SKIP_DOWNLOAD: if set, skip the download command
33 # but still register the entry in ".downloaded".
34 # Useful when adding the feed if you only want
35 # a few entries from the feed:
36 # run SKIP_DOWNLOAD=set podl, then edit ".downloaded"
37 # to remove the entries you want, then run podl again.
38 # This trick does not work with ".page" downloads.
39 # - $YT: options passed to yt-dlp.
40 # - $XTRACE: if set, enables set -x on the generated commands.
42 # - ".yt-dlp": optional yt-dlp config,
43 # (looked up in parent directories).
44 # - ".url.xpath": custom XPath selector for the URL
45 # (looked up in parent directories).
46 # SPDX-License-Identifier: GPL-3.0-or-later
47 # Bugs: Julien Moutinho <julm+podl@sourcephile.fr>
53 while test "$PWD" != / -a "$PWD" != //
64 find -H "$@" -type f
'(' -name .feed
-o -name .page
')' |
66 while IFS
= read -r found
; do
68 src
="$(readlink -e "$found")"
69 dst
="$(dirname "$found")"
70 dst
="$(readlink -e "$dst")"
75 export YT
="$(look_up .yt-dlp printf -- '--config-location %s') ${YT-}"
79 ${SKIP_DOWNLOAD:+--skip-download} \
80 --download-archive .downloaded \
85 -e 's@.*youtube\.com/channel/\([^/]\+\).*@https://www.youtube.com/feeds/videos.xml?channel_id=\1@' \
86 -e 's@.*youtube\.com/user/\([^/]\+\).*@https://www.youtube.com/feeds/videos.xml?user=\1@' \
87 -e 's@.*youtube\.com.*list=\([^&]\+\).*@https://www.youtube.com/feeds/videos.xml?playlist_id=\1@' \
89 for feed
in $feeds; do
92 trap "rm -f '$file'" EXIT
93 curl
-Ls "$feed" -o "$file"
94 case $(file --mime-type "$file" | cut -f 2 -d :) in
96 export html_match_xpath
="$(look_up .html.match.xpath cat)"
97 export html_url_xpath
="$(look_up .html.url.xpath cat)"
98 xml format
--html <"$file" 2>/dev
/null
|
100 -t -m "${html_match_xpath:-'//a'}" \
101 -o "url='" -v "translate(${html_url_xpath:-"@href"},\"'$IFS\",\"’ \")" -o "'" -n \
104 grep -qxF "url $url" .downloaded || {
105 if test ! "${skip_download:+set}"
109 { flock --exclusive 3
116 export url_xpath
="$(look_up .url.xpath cat)"
117 xml
select <"$file" --text \
118 -N atom
="http://www.w3.org/2005/Atom" \
119 -N yt
="http://www.youtube.com/xml/schemas/2015" \
120 -N mrss
="http://search.yahoo.com/mrss/" \
121 -t -m "/rss/channel/item" \
122 -o "title='" -v "translate(translate(title,'\"','_'),\"'/:?&|$IFS\",\"’_____ \")" -o "'" -n \
123 -o "guid='" -v "translate(translate(guid,'\"','_'),\"'$IFS\",\"’ \")" -o "'" -n \
124 -o "url='" -v "translate(${url_xpath:-"enclosure[1]/@url"},\"'$IFS\",\"’ \")" -o "'" -n \
125 -o "published='" -v "translate(pubDate,\"'$IFS\",\"’ \")" -o "'" -n \
130 # remove leading whitespace characters
131 title="${title#"${title%%[![:space:]]*}"}"
132 # remove trailing whitespace characters
133 title="${title%"${title##*[![:space:]]}"}"
135 grep -qxF -e "url $url" -e "guid $guid" .downloaded || {
136 published=$(date +%Y-%m-%d -d "$published")
137 echo >&2 "$dst/$published - $title"
138 if test ! "${SKIP_DOWNLOAD:+set}"
141 --output "$published - ${title//%/%%}.%(ext)s" \
144 { flock --exclusive 3
145 echo >&3 "guid $guid"
150 -t -m "/atom:feed/atom:entry[yt:videoId]" \
151 -o "title='" -v "translate(translate(atom:title,'\"','_'),\"'/:?&|$IFS\",\"’_____ \")" -o "'" -n \
152 -o "url='" -v "translate(${url_xpath:-"atom:link[@rel='alternate']/@href"},\"'$IFS\",\"’ \")" -o "'" -n \
153 -o "published='" -v "translate(atom:published,\"'$IFS\",\"’ \")" -o "'" -n \
154 -o "id='" -v "translate(yt:videoId,\"'$IFS\",\"’ \")" -o "'" -n \
156 # remove leading whitespace characters
157 title="${title#"${title%%[![:space:]]*}"}"
158 # remove trailing whitespace characters
159 title="${title%"${title##*[![:space:]]}"}"
160 grep -qxF "youtube $id" .downloaded || {
161 published=$(date +%Y-%m-%d -d "$published")
162 echo >&2 "$dst/$published - $title.$id"
163 if test "${SKIP_DOWNLOAD:+set}"
165 { flock --exclusive 3
166 echo >&3 "youtube $id"
170 --download-archive .downloaded \
171 --output "$published - ${title//%/%%}.%(id)s.%(format_id)s.%(ext)s" \
177 "$SHELL" -seu${XTRACE:+x}