]> Git — Sourcephile - julm/julm-nix.git/blob - nixpkgs/overlays/podl.sh
nix: always set registry.nixpkgs
[julm/julm-nix.git] / nixpkgs / overlays / podl.sh
1 #!/usr/bin/env bash
2 # shellcheck disable=SC1004
3 # shellcheck disable=SC2016
4 # shellcheck disable=SC2064
5 # shellcheck disable=SC2086
6 # shellcheck disable=SC2155
7 # Name: podl - podcast downloader with caching
8 # Version: 2021-10-22
9 # Latest version: https://git.sourcephile.fr/julm/julm-nix/tree/main/item/nixpkgs/overlays/podl.sh
10 # Synopsis:
11 # $ mkdir LaMéthodeScientifique 3Blue1Brown
12 # $ echo >LaMéthodeScientifique/.feed http://radiofrance-podcast.net/podcast09/rss_14312.xml
13 # $ echo >3Blue1Brown/.feed https://youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw
14 # $ echo >>3Blue1Brown/.feed https://www.youtube.com/feeds/videos.xml?channel_id=UCYO_jab_esuFRV4b17AJtAw
15 # $ podl
16 # Description:
17 # podl is a wrapper around yt-dlp(1) <https://yt-dlp.org/>
18 # to download podcasts from feeds whose URI(s) you write
19 # in ".feed" files in or below the current working directory.
20 # The feed formats currently supported are:
21 # - RSS
22 # - Youtube's Atom
23 # - Every input format supported by yt-dlp,
24 # when using a ".page" instead of a ".feed" file.
25 # It downloads much more quickly than simply
26 # running those commands directly on the feed
27 # or on each entries of the feed, because
28 # to decide whether a podcast has already been downloaded or not,
29 # it relies only on the feed's content and on a ".downloaded" file
30 # it creates next to those ".feed" files;
31 # avoiding the cost of network activity when not necessary.
32 # Environment:
33 # - $PODL_SKIP_DOWNLOAD: if set, skip the download command
34 # but still register the entry in ".downloaded".
35 # Useful when adding the feed if you only want
36 # a few entries from the feed:
37 # run PODL_SKIP_DOWNLOAD=set podl, then edit ".downloaded"
38 # to remove the entries you want, then run podl again.
39 # This trick does not work with ".page" downloads.
40 # - $YT: options passed to yt-dlp.
41 # - $PODL_XTRACE: if set, enables set -x on the generated commands.
42 # Files:
43 # - ".yt-dlp": optional yt-dlp config,
44 # (looked up in parent directories).
45 # - ".url.xpath": custom XPath selector for the URL
46 # (looked up in parent directories).
47 # SPDX-License-Identifier: GPL-3.0-or-later
48 # Bugs: Julien Moutinho <julm+podl@sourcephile.fr>
49 set -eu
50
51 look_up() {
52 local key=$1; shift
53 (
54 while test "$PWD" != / -a "$PWD" != //
55 do
56 test ! -f "$key" || {
57 "$@" "$PWD/$key"
58 return
59 }
60 cd ..
61 done
62 )
63 }
64
65 find -H "$@" -type f '(' -name .feed -o -name .page ')' |
66 sort |
67 while IFS= read -r found; do
68 IFS=$(printf ' \n\r')
69 src="$(readlink -e "$found")"
70 dst="$(dirname "$found")"
71 dst="$(readlink -e "$dst")"
72 export dst
73 echo >&2 "$dst"
74 (
75 cd "$dst"
76 export YT="$(look_up .yt-dlp printf -- '--config-location %s') ${YT-}"
77 case $found in
78 (*/.page)
79 yt-dlp $YT \
80 ${PODL_SKIP_DOWNLOAD:+--skip-download} \
81 --download-archive .downloaded \
82 --batch-file "$src"
83 ;;
84 (*/.feed)
85 feeds=$(sed "$src" \
86 -e 's@.*youtube\.com/channel/\([^/]\+\).*@https://www.youtube.com/feeds/videos.xml?channel_id=\1@' \
87 -e 's@.*youtube\.com/user/\([^/]\+\).*@https://www.youtube.com/feeds/videos.xml?user=\1@' \
88 -e 's@.*youtube\.com.*list=\([^&]\+\).*@https://www.youtube.com/feeds/videos.xml?playlist_id=\1@' \
89 )
90 for feed in $feeds; do
91 (
92 file=$(mktemp)
93 trap "rm -f '$file'" EXIT
94 curl -Ls "$feed" -o "$file"
95 case $(file --mime-type "$file" | cut -f 2 -d :) in
96 (' 'text/html)
97 export html_match_xpath="$(look_up .html.match.xpath cat)"
98 export html_url_xpath="$(look_up .html.url.xpath cat)"
99 xml format --html <"$file" 2>/dev/null |
100 xml select --text \
101 -t -m "${html_match_xpath:-'//a'}" \
102 -o "url='" -v "translate(${html_url_xpath:-"@href"},\"'$IFS\",\" \")" -o "'" -n \
103 -o '
104 test -z "$url" ||
105 grep -qxF "url $url" .downloaded || {
106 if test ! "${skip_download:+set}"
107 then
108 yt-dlp $YT "$url"
109 fi
110 { flock --exclusive 3
111 echo >&3 "url $url"
112 } 3>>.downloaded
113 }
114 ' -n
115 ;;
116 (' 'text/xml)
117 export url_xpath="$(look_up .url.xpath cat)"
118 xml select <"$file" --text \
119 -N atom="http://www.w3.org/2005/Atom" \
120 -N yt="http://www.youtube.com/xml/schemas/2015" \
121 -N mrss="http://search.yahoo.com/mrss/" \
122 -t -m "/rss/channel/item" \
123 -o "title='" -v "translate(translate(title,'\"','_'),\"'/:?&|$IFS\",\"’_____ \")" -o "'" -n \
124 -o "guid='" -v "translate(translate(guid,'\"','_'),\"'$IFS\",\" \")" -o "'" -n \
125 -o "url='" -v "translate(${url_xpath:-"enclosure[1]/@url"},\"'$IFS\",\" \")" -o "'" -n \
126 -o "published='" -v "translate(pubDate,\"'$IFS\",\" \")" -o "'" -n \
127 -o '
128 file=${url##*/}
129 file=${file%%\#*}
130 file=${file%%\?*}
131 # remove leading whitespace characters
132 title="${title#"${title%%[![:space:]]*}"}"
133 # remove trailing whitespace characters
134 title="${title%"${title##*[![:space:]]}"}"
135 test -z "$url" ||
136 grep -qxF -e "url $url" -e "guid $guid" .downloaded || {
137 published=$(date +%Y-%m-%d -d "$published")
138 echo >&2 "$dst/$published - $title"
139 if test ! "${PODL_SKIP_DOWNLOAD:+set}"
140 then
141 yt-dlp $YT \
142 --output "$published - ${title//%/%%}.%(ext)s" \
143 "$url"
144 fi
145 { flock --exclusive 3
146 echo >&3 "guid $guid"
147 echo >&3 "url $url"
148 } 3>>.downloaded
149 }
150 ' -n -b \
151 -t -m "/atom:feed/atom:entry[yt:videoId]" \
152 -o "title='" -v "translate(translate(atom:title,'\"','_'),\"'/:?&|$IFS\",\"’_____ \")" -o "'" -n \
153 -o "url='" -v "translate(${url_xpath:-"atom:link[@rel='alternate']/@href"},\"'$IFS\",\" \")" -o "'" -n \
154 -o "published='" -v "translate(atom:published,\"'$IFS\",\" \")" -o "'" -n \
155 -o "id='" -v "translate(yt:videoId,\"'$IFS\",\" \")" -o "'" -n \
156 -o '
157 # remove leading whitespace characters
158 title="${title#"${title%%[![:space:]]*}"}"
159 # remove trailing whitespace characters
160 title="${title%"${title##*[![:space:]]}"}"
161 grep -qxF "youtube $id" .downloaded || {
162 published=$(date +%Y-%m-%d -d "$published")
163 echo >&2 "$dst/$published - $title.$id"
164 if test "${PODL_SKIP_DOWNLOAD:+set}"
165 then
166 { flock --exclusive 3
167 echo >&3 "youtube $id"
168 } 3>>.downloaded
169 else
170 yt-dlp $YT \
171 --download-archive .downloaded \
172 --output "%(release_date>%Y-%m-%d,upload_date>%Y-%m-%d|$published)s - ${title//%/%%}.%(id)s.%(format_id)s.%(ext)s" \
173 "$url"
174 fi
175 }
176 ';;
177 esac |
178 "$SHELL" -seu${PODL_XTRACE:+x}
179 rm -f '$file'
180 trap '' EXIT
181 )
182 done;;
183 esac
184 )
185 done
186
187