From 696580d6696600e9669ca0bbd55199fea5edd2db Mon Sep 17 00:00:00 2001 From: Julien Moutinho <julm+julm-nix@sourcephile.fr> Date: Fri, 8 Apr 2022 05:06:43 +0200 Subject: [PATCH] podl: add HTML support --- nixpkgs/overlays/podl.nix | 2 +- nixpkgs/overlays/podl.sh | 151 +++++++++++++++++++++++--------------- 2 files changed, 91 insertions(+), 62 deletions(-) diff --git a/nixpkgs/overlays/podl.nix b/nixpkgs/overlays/podl.nix index 44f9e9b..f7116c3 100644 --- a/nixpkgs/overlays/podl.nix +++ b/nixpkgs/overlays/podl.nix @@ -2,7 +2,7 @@ self: super: { podl = super.writeShellScriptBin "podl" '' PATH=${with self; lib.makeBinPath [ coreutils utillinux findutils gnugrep gnused - curl aria2 yt-dlp xmlstarlet + curl aria2 yt-dlp xmlstarlet file ]} ${builtins.readFile ./podl.sh} ''; diff --git a/nixpkgs/overlays/podl.sh b/nixpkgs/overlays/podl.sh index edde9a3..30b49a3 100644 --- a/nixpkgs/overlays/podl.sh +++ b/nixpkgs/overlays/podl.sh @@ -87,69 +87,98 @@ while IFS= read -r found; do -e 's@.*youtube\.com.*list=\([^&]\+\).*@https://www.youtube.com/feeds/videos.xml?playlist_id=\1@' \ ) for feed in $feeds; do - export url_xpath="$(look_up .url.xpath cat)" - curl -Ls "$feed" | - xml select -T \ - -N atom="http://www.w3.org/2005/Atom" \ - -N yt="http://www.youtube.com/xml/schemas/2015" \ - -N mrss="http://search.yahoo.com/mrss/" \ - -t -m "/rss/channel/item" \ - -o "title='" -v "translate(translate(title,'\"','_'),\"'/:?&|$IFS\",\"â_____ \")" -o "'" -n \ - -o "guid='" -v "translate(translate(guid,'\"','_'),\"'$IFS\",\"â \")" -o "'" -n \ - -o "url='" -v "translate(${url_xpath:-"enclosure[1]/@url"},\"'$IFS\",\"â \")" -o "'" -n \ - -o "published='" -v "translate(pubDate,\"'$IFS\",\"â \")" -o "'" -n \ - -o ' - file=${url##*/} - file=${file%%\#*} - file=${file%%\?*} - # remove leading whitespace characters - title="${title#"${title%%[![:space:]]*}"}" - # remove trailing whitespace characters - title="${title%"${title##*[![:space:]]}"}" - test -z "$url" || - grep -qxF -e "url $url" -e "guid $guid" .downloaded || { - published=$(date +%Y-%m-%d -d "$published") - echo >&2 "$dst/$published - $title" - if test ! "${SKIP_DOWNLOAD:+set}" - then - yt-dlp $YT \ - --output "$published - ${title//%/%%}.%(ext)s" \ - "$url" - fi - { flock --exclusive 3 - echo >&3 "guid $guid" - echo >&3 "url $url" - } 3>>.downloaded - } - ' -n -b \ - -t -m "/atom:feed/atom:entry[yt:videoId]" \ - -o "title='" -v "translate(translate(atom:title,'\"','_'),\"'/:?&|$IFS\",\"â_____ \")" -o "'" -n \ - -o "url='" -v "translate(${url_xpath:-"atom:link[@rel='alternate']/@href"},\"'$IFS\",\"â \")" -o "'" -n \ - -o "published='" -v "translate(atom:published,\"'$IFS\",\"â \")" -o "'" -n \ - -o "id='" -v "translate(yt:videoId,\"'$IFS\",\"â \")" -o "'" -n \ - -o ' - # remove leading whitespace characters - title="${title#"${title%%[![:space:]]*}"}" - # remove trailing whitespace characters - title="${title%"${title##*[![:space:]]}"}" - grep -qxF "youtube $id" .downloaded || { - published=$(date +%Y-%m-%d -d "$published") - echo >&2 "$dst/$published - $title.$id" - if test "${SKIP_DOWNLOAD:+set}" - then - { flock --exclusive 3 - echo >&3 "youtube $id" - } 3>>.downloaded - else - yt-dlp $YT \ - --download-archive .downloaded \ - --output "$published - ${title//%/%%}.%(id)s.%(format_id)s.%(ext)s" \ - "$url" - fi - } - ' | + ( + file=$(mktemp) + trap "rm -f '$file'" EXIT + curl -Ls "$feed" -o "$file" + case $(file --mime-type "$file" | cut -f 2 -d :) in + (' 'text/html) + export html_match_xpath="$(look_up .html.match.xpath cat)" + export html_url_xpath="$(look_up .html.url.xpath cat)" + xml format --html <"$file" 2>/dev/null | + xml select --text \ + -t -m "${html_match_xpath:-'//a'}" \ + -o "url='" -v "translate(${html_url_xpath:-"@href"},\"'$IFS\",\"â \")" -o "'" -n \ + -o ' + test -z "$url" || + grep -qxF "url $url" .downloaded || { + if test ! "${skip_download:+set}" + then + yt-dlp $YT "$url" + fi + { flock --exclusive 3 + echo >&3 "url $url" + } 3>>.downloaded + } + ' -n + ;; + (' 'text/xml) + export url_xpath="$(look_up .url.xpath cat)" + xml select <"$file" --text \ + -N atom="http://www.w3.org/2005/Atom" \ + -N yt="http://www.youtube.com/xml/schemas/2015" \ + -N mrss="http://search.yahoo.com/mrss/" \ + -t -m "/rss/channel/item" \ + -o "title='" -v "translate(translate(title,'\"','_'),\"'/:?&|$IFS\",\"â_____ \")" -o "'" -n \ + -o "guid='" -v "translate(translate(guid,'\"','_'),\"'$IFS\",\"â \")" -o "'" -n \ + -o "url='" -v "translate(${url_xpath:-"enclosure[1]/@url"},\"'$IFS\",\"â \")" -o "'" -n \ + -o "published='" -v "translate(pubDate,\"'$IFS\",\"â \")" -o "'" -n \ + -o ' + file=${url##*/} + file=${file%%\#*} + file=${file%%\?*} + # remove leading whitespace characters + title="${title#"${title%%[![:space:]]*}"}" + # remove trailing whitespace characters + title="${title%"${title##*[![:space:]]}"}" + test -z "$url" || + grep -qxF -e "url $url" -e "guid $guid" .downloaded || { + published=$(date +%Y-%m-%d -d "$published") + echo >&2 "$dst/$published - $title" + if test ! "${SKIP_DOWNLOAD:+set}" + then + yt-dlp $YT \ + --output "$published - ${title//%/%%}.%(ext)s" \ + "$url" + fi + { flock --exclusive 3 + echo >&3 "guid $guid" + echo >&3 "url $url" + } 3>>.downloaded + } + ' -n -b \ + -t -m "/atom:feed/atom:entry[yt:videoId]" \ + -o "title='" -v "translate(translate(atom:title,'\"','_'),\"'/:?&|$IFS\",\"â_____ \")" -o "'" -n \ + -o "url='" -v "translate(${url_xpath:-"atom:link[@rel='alternate']/@href"},\"'$IFS\",\"â \")" -o "'" -n \ + -o "published='" -v "translate(atom:published,\"'$IFS\",\"â \")" -o "'" -n \ + -o "id='" -v "translate(yt:videoId,\"'$IFS\",\"â \")" -o "'" -n \ + -o ' + # remove leading whitespace characters + title="${title#"${title%%[![:space:]]*}"}" + # remove trailing whitespace characters + title="${title%"${title##*[![:space:]]}"}" + grep -qxF "youtube $id" .downloaded || { + published=$(date +%Y-%m-%d -d "$published") + echo >&2 "$dst/$published - $title.$id" + if test "${SKIP_DOWNLOAD:+set}" + then + { flock --exclusive 3 + echo >&3 "youtube $id" + } 3>>.downloaded + else + yt-dlp $YT \ + --download-archive .downloaded \ + --output "$published - ${title//%/%%}.%(id)s.%(format_id)s.%(ext)s" \ + "$url" + fi + } + ';; + esac | "$SHELL" -seu${XTRACE:+x} + ) done;; esac ) done + + -- 2.47.2