]> Git — Sourcephile - julm/julm-nix.git/blob - nixpkgs/overlays/podl.sh
replace unmaintained youtube-dl by yt-dlp
[julm/julm-nix.git] / nixpkgs / overlays / podl.sh
1 #!/bin/bash
2 # shellcheck disable=SC1004
3 # shellcheck disable=SC2016
4 # shellcheck disable=SC2086
5 # shellcheck disable=SC2155
6 # Name: podl - podcast downloader with caching
7 # Version: 2021-10-22
8 # Last version: https://git.sourcephile.fr/julm/home-julm.git/blob_plain/HEAD:/.config/nixpkgs/overlays/podl.sh
9 # Synopsis:
10 # $ mkdir LaMéthodeScientifique 3Blue1Brown
11 # $ echo http://radiofrance-podcast.net/podcast09/rss_14312.xml >LaMéthodeScientifique/.feed
12 # $ echo https://youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw >3Blue1Brown/.feed
13 # $ podl
14 # Description:
15 # podl is a wrapper around yt-dlp(1) <https://yt-dlp.org/>
16 # to download podcasts from feeds whose URI(s) you write
17 # in ".feed" files in or below the current working directory.
18 # The feed formats currently supported are:
19 # - RSS
20 # - Youtube's Atom
21 # - Every input format supported by yt-dlp,
22 # when using a ".page" instead of a ".feed" file.
23 # It downloads much more quickly than simply
24 # running those commands directly on the feed
25 # or on each entries of the feed, because
26 # to decide whether a podcast has already been downloaded or not,
27 # it relies only on the feed's content and on a ".downloaded" file
28 # it creates next to those ".feed" files;
29 # avoiding the cost of network activity when not necessary.
30 # Environment:
31 # - $SKIP_DOWNLOAD: if set, skip the download command
32 # but still register the entry in ".downloaded".
33 # Useful when adding the feed if you only want
34 # a few entries from the feed:
35 # run SKIP_DOWNLOAD=set podl, then edit ".downloaded"
36 # to remove the entries you want, then run podl again.
37 # This trick does not work with ".page" downloads.
38 # - $YT: options passed to yt-dlp.
39 # - $XTRACE: if set, enables set -x on the generated commands.
40 # Files:
41 # - ".yt-dlp": optional yt-dlp config,
42 # (looked up in parent directories).
43 # - ".url.xpath": custom XPath selector for the URL
44 # (looked up in parent directories).
45 # License: GNU GPLv3+
46 # Bugs: Julien Moutinho <julm+podl@sourcephile.fr>
47 set -eu
48
49 look_up() {
50 local key=$1; shift
51 (
52 while test "$PWD" != / -a "$PWD" != //
53 do
54 test ! -f "$key" || {
55 "$@" "$PWD/$key"
56 return
57 }
58 cd ..
59 done
60 )
61 }
62
63 find -H "$@" -type f '(' -name .feed -o -name .page ')' |
64 sort |
65 while IFS= read -r found; do
66 IFS=$(printf ' \n\r')
67 src="$(readlink -e "$found")"
68 dst="$(dirname "$found")"
69 dst="$(readlink -e "$dst")"
70 export dst
71 echo >&2 "$dst"
72 (
73 cd "$dst"
74 export YT="$(look_up .yt-dlp printf -- '--config-location %s') ${YT-}"
75 case $found in
76 (*/.page)
77 yt-dlp $YT \
78 ${SKIP_DOWNLOAD:+--skip-download} \
79 --download-archive .downloaded \
80 --batch-file "$src"
81 ;;
82 (*/.feed)
83 feeds=$(sed "$src" \
84 -e 's@.*youtube\.com/channel/\([^/]\+\).*@https://www.youtube.com/feeds/videos.xml?channel_id=\1@' \
85 -e 's@.*youtube\.com/user/\([^/]\+\).*@https://www.youtube.com/feeds/videos.xml?user=\1@' \
86 -e 's@.*youtube\.com.*list=\([^&]\+\).*@https://www.youtube.com/feeds/videos.xml?playlist_id=\1@' \
87 )
88 for feed in $feeds; do
89 export url_xpath="$(look_up .url.xpath cat)"
90 curl -Ls "$feed" |
91 xml select -T \
92 -N atom="http://www.w3.org/2005/Atom" \
93 -N yt="http://www.youtube.com/xml/schemas/2015" \
94 -N mrss="http://search.yahoo.com/mrss/" \
95 -t -m "/rss/channel/item" \
96 -o "title='" -v "translate(translate(title,'\"','_'),\"'/:?&|$IFS\",\"’_____ \")" -o "'" -n \
97 -o "guid='" -v "translate(translate(guid,'\"','_'),\"'$IFS\",\" \")" -o "'" -n \
98 -o "url='" -v "translate(${url_xpath:-"enclosure[1]/@url"},\"'$IFS\",\" \")" -o "'" -n \
99 -o "published='" -v "translate(pubDate,\"'$IFS\",\" \")" -o "'" -n \
100 -o '
101 file=${url##*/}
102 file=${file%%\#*}
103 file=${file%%\?*}
104 # remove leading whitespace characters
105 title="${title#"${title%%[![:space:]]*}"}"
106 # remove trailing whitespace characters
107 title="${title%"${title##*[![:space:]]}"}"
108 test -z "$url" ||
109 grep -qxF -e "url $url" -e "guid $guid" .downloaded || {
110 published=$(date +%Y-%m-%d -d "$published")
111 echo >&2 "$dst/$published - $title"
112 if test ! "${SKIP_DOWNLOAD:+set}"
113 then
114 yt-dlp $YT \
115 --output "$published - ${title//%/%%}.%(ext)s" \
116 "$url"
117 fi
118 { flock --exclusive 3
119 echo >&3 "guid $guid"
120 echo >&3 "url $url"
121 } 3>>.downloaded
122 }
123 ' -n -b \
124 -t -m "/atom:feed/atom:entry[yt:videoId]" \
125 -o "title='" -v "translate(translate(atom:title,'\"','_'),\"'/:?&|$IFS\",\"’_____ \")" -o "'" -n \
126 -o "url='" -v "translate(${url_xpath:-"atom:link[@rel='alternate']/@href"},\"'$IFS\",\" \")" -o "'" -n \
127 -o "published='" -v "translate(atom:published,\"'$IFS\",\" \")" -o "'" -n \
128 -o "id='" -v "translate(yt:videoId,\"'$IFS\",\" \")" -o "'" -n \
129 -o '
130 # remove leading whitespace characters
131 title="${title#"${title%%[![:space:]]*}"}"
132 # remove trailing whitespace characters
133 title="${title%"${title##*[![:space:]]}"}"
134 grep -qxF "youtube $id" .downloaded || {
135 published=$(date +%Y-%m-%d -d "$published")
136 echo >&2 "$dst/$published - $title.$id"
137 if test "${SKIP_DOWNLOAD:+set}"
138 then
139 { flock --exclusive 3
140 echo >&3 "youtube $id"
141 } 3>>.downloaded
142 else
143 yt-dlp $YT \
144 --download-archive .downloaded \
145 --output "$published - ${title//%/%%}.%(id)s.%(format_id)s.%(ext)s" \
146 "$url"
147 fi
148 }
149 ' |
150 "$SHELL" -seu${XTRACE:+x}
151 done;;
152 esac
153 )
154 done