]>
Git — Sourcephile - julm/julm-nix.git/blob - nixpkgs/overlays/podl.sh
   2 # shellcheck disable=SC1004 
   3 # shellcheck disable=SC2016 
   4 # shellcheck disable=SC2064 
   5 # shellcheck disable=SC2086 
   6 # shellcheck disable=SC2155 
   7 # Name: podl - podcast downloader with caching 
   9 # Latest version: https://git.sourcephile.fr/julm/julm-nix/tree/main/item/nixpkgs/overlays/podl.sh 
  11 #   $ mkdir LaMéthodeScientifique 3Blue1Brown 
  12 #   $ echo >LaMéthodeScientifique/.feed http://radiofrance-podcast.net/podcast09/rss_14312.xml 
  13 #   $ echo >3Blue1Brown/.feed https://youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw 
  14 #   $ echo >>3Blue1Brown/.feed https://www.youtube.com/feeds/videos.xml?channel_id=UCYO_jab_esuFRV4b17AJtAw 
  17 #   podl is a wrapper around yt-dlp(1) <https://yt-dlp.org/> 
  18 #   to download podcasts from feeds whose URI(s) you write 
  19 #   in ".feed" files in or below the current working directory. 
  20 #   The feed formats currently supported are: 
  23 #   - Every input format supported by yt-dlp, 
  24 #     when using a ".page" instead of a ".feed" file. 
  25 #   It downloads much more quickly than simply 
  26 #   running those commands directly on the feed 
  27 #   or on each entries of the feed, because 
  28 #   to decide whether a podcast has already been downloaded or not, 
  29 #   it relies only on the feed's content and on a ".downloaded" file 
  30 #   it creates next to those ".feed" files; 
  31 #   avoiding the cost of network activity when not necessary. 
  33 #   - $PODL_SKIP_DOWNLOAD: if set, skip the download command 
  34 #     but still register the entry in ".downloaded". 
  35 #     Useful when adding the feed if you only want 
  36 #     a few entries from the feed: 
  37 #     run PODL_SKIP_DOWNLOAD=set podl, then edit ".downloaded" 
  38 #     to remove the entries you want, then run podl again. 
  39 #     This trick does not work with ".page" downloads. 
  40 #   - $YT: options passed to yt-dlp. 
  41 #   - $PODL_XTRACE: if set, enables set -x on the generated commands. 
  43 #   - ".yt-dlp": optional yt-dlp config, 
  44 #     (looked up in parent directories). 
  45 #   - ".url.xpath": custom XPath selector for the URL 
  46 #     (looked up in parent directories). 
  47 # SPDX-License-Identifier: GPL-3.0-or-later 
  48 # Bugs: Julien Moutinho <julm+podl@sourcephile.fr> 
  54   while test "$PWD" != / -a "$PWD" != // 
  65 find -H "$@" -type f 
'(' -name .feed 
-o -name .page 
')' | 
  67 while IFS
= read -r found
; do 
  69   src
="$(readlink -e "$found")" 
  70   dst
="$(dirname "$found")" 
  71   dst
="$(readlink -e "$dst")" 
  76   export YT
="$(look_up .yt-dlp printf -- '--config-location %s') ${YT-}" 
  80      ${PODL_SKIP_DOWNLOAD:+--skip-download} \
 
  81      --download-archive .downloaded \
 
  86      -e 's@.*youtube\.com/channel/\([^/]\+\).*@https://www.youtube.com/feeds/videos.xml?channel_id=\1@' \
 
  87      -e 's@.*youtube\.com/user/\([^/]\+\).*@https://www.youtube.com/feeds/videos.xml?user=\1@' \
 
  88      -e 's@.*youtube\.com.*list=\([^&]\+\).*@https://www.youtube.com/feeds/videos.xml?playlist_id=\1@' \
 
  90     for feed 
in $feeds; do 
  93       trap "rm -f '$file'" EXIT
 
  94       curl 
-Ls "$feed" -o "$file" 
  95       case $(file --mime-type "$file" | cut -f 2 -d :) in 
  97           export html_match_xpath
="$(look_up .html.match.xpath cat)" 
  98           export html_url_xpath
="$(look_up .html.url.xpath cat)" 
  99           xml format 
--html <"$file" 2>/dev
/null 
| 
 101            -t -m "${html_match_xpath:-'//a'}" \
 
 102            -o "url='" -v "translate(${html_url_xpath:-"@href"},\"'$IFS\",\"’   \")" -o "'" -n \
 
 105             grep -qxF "url $url" .downloaded || { 
 106               if test ! "${skip_download:+set}" 
 110               { flock --exclusive 3 
 117           export url_xpath
="$(look_up .url.xpath cat)" 
 118           xml 
select <"$file" --text \
 
 119            -N atom
="http://www.w3.org/2005/Atom" \
 
 120            -N yt
="http://www.youtube.com/xml/schemas/2015" \
 
 121            -N mrss
="http://search.yahoo.com/mrss/" \
 
 122            -t -m "/rss/channel/item" \
 
 123            -o "title='" -v "translate(translate(title,'\"','_'),\"'/:?&|$IFS\",\"’_____   \")" -o "'" -n \
 
 124            -o "guid='" -v "translate(translate(guid,'\"','_'),\"'$IFS\",\"’   \")" -o "'" -n \
 
 125            -o "url='" -v "translate(${url_xpath:-"enclosure[1]/@url"},\"'$IFS\",\"’   \")" -o "'" -n \
 
 126            -o "published='" -v "translate(pubDate,\"'$IFS\",\"’   \")" -o "'" -n \
 
 131             # remove leading whitespace characters 
 132             title="${title#"${title%%[![:space:]]*}"}" 
 133             # remove trailing whitespace characters 
 134             title="${title%"${title##*[![:space:]]}"}" 
 136             grep -qxF -e "url $url" -e "guid $guid" .downloaded || { 
 137               published=$(date +%Y-%m-%d -d "$published") 
 138               echo >&2 "$dst/$published - $title" 
 139               if test ! "${PODL_SKIP_DOWNLOAD:+set}" 
 142                  --output "$published - ${title//%/%%}.%(ext)s" \ 
 145               { flock --exclusive 3 
 146                 echo >&3 "guid $guid" 
 151            -t -m "/atom:feed/atom:entry[yt:videoId]" \
 
 152            -o "title='" -v "translate(translate(atom:title,'\"','_'),\"'/:?&|$IFS\",\"’_____   \")" -o "'" -n \
 
 153            -o "url='" -v "translate(${url_xpath:-"atom:link[@rel='alternate']/@href"},\"'$IFS\",\"’   \")" -o "'" -n \
 
 154            -o "published='" -v "translate(atom:published,\"'$IFS\",\"’   \")" -o "'" -n \
 
 155            -o "id='" -v "translate(yt:videoId,\"'$IFS\",\"’   \")" -o "'" -n \
 
 157             # remove leading whitespace characters 
 158             title="${title#"${title%%[![:space:]]*}"}" 
 159             # remove trailing whitespace characters 
 160             title="${title%"${title##*[![:space:]]}"}" 
 161             grep -qxF "youtube $id" .downloaded || { 
 162               published=$(date +%Y-%m-%d -d "$published") 
 163               echo >&2 "$dst/$published - $title.$id" 
 164               if test "${PODL_SKIP_DOWNLOAD:+set}" 
 166                 { flock --exclusive 3 
 167                   echo >&3 "youtube $id" 
 171                  --download-archive .downloaded \ 
 172                  --output "%(release_date>%Y-%m-%d,upload_date>%Y-%m-%d|$published)s - ${title//%/%%}.%(id)s.%(format_id)s.%(ext)s" \ 
 178       "$SHELL" -seu${PODL_XTRACE:+x}