From b1b5b8aebf9fc61344adaa5142e761d2a1c65dd8 Mon Sep 17 00:00:00 2001 From: tim Date: Mon, 4 Sep 2017 21:07:51 +0200 Subject: [PATCH] Add a new show to download_daily_show.sh --- download_daily_shows.sh | 60 ++++++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/download_daily_shows.sh b/download_daily_shows.sh index 927afef..e9fe851 100755 --- a/download_daily_shows.sh +++ b/download_daily_shows.sh @@ -17,9 +17,9 @@ YOUTUBE_DL="python3 /usr/local/bin/youtube-dl" KEEP_DAYS=100 ARGS=-qwc -if [ ! -d ${DOWNLOAD_DIR} ]; then +if [ ! -d "${DOWNLOAD_DIR}" ]; then echo "Error: directory not found: ${DOWNLOAD_DIR}" - exit +# exit fi OPTIND=1 # Reset is necessary if getopts was used previously @@ -60,41 +60,54 @@ function download_show { show="$2" mkdir -p "${TMP_DIR}/${show}" - mkdir -p "${DOWNLOAD_DIR}/${show}" # Get a chosen video format for each site if [[ "${url}" =~ cc.com ]]; then - FORMAT="1028/907/498/500" + #FORMAT="1028/907/498/500" FORMAT="best[width<=?640]" elif [[ "${url}" =~ cbs.com ]]; then FORMAT="rtmp-496-0" + elif [[ "${url}" =~ nbc.com ]]; then + FORMAT="best[width<=?640]" + fi + + if [ -f "${DOWNLOAD_DIR}/${show}.archive" ]; then + cp "${DOWNLOAD_DIR}/${show}.archive" "${TMP_DIR}/${show}.archive" fi # Try and download twice (resume if partially downloaded) for i in 1 2; do ${YOUTUBE_DL} ${ARGS} -f ${FORMAT} \ -o "${TMP_DIR}/${show}/%(upload_date)s/%(title)s-%(id)s.%(ext)s" \ - --download-archive "${DOWNLOAD_DIR}/${show}.archive" \ + --download-archive "${TMP_DIR}/${show}.archive" \ ${url} # Wait 10 minutes before trying again #sleep 600 - cp -nr "${TMP_DIR}/${show}"/* "${DOWNLOAD_DIR}/${show}"/ + if [ -d "${DOWNLOAD_DIR}/${show}" ]; then + mkdir -p "${DOWNLOAD_DIR}/${show}" + cp -nr "${TMP_DIR}/${show}"/* "${DOWNLOAD_DIR}/${show}"/ + cp "${TMP_DIR}/${show}.archive" "${DOWNLOAD_DIR}/${show}.archive" + fi done # Move files from ${DOWNLOAD_DIR}/The Daily Show/NA/ to ${DOWNLOAD_DIR}/The Daily Show/$date - while IFS= read -r -d '' file; do - dir=$( stat -c %y "$file" | sed -e 's/\([0-9]*\)-\([0-9]*\)-\([0-9]*\).*/\1\2\3/' ); - mkdir -p "${DOWNLOAD_DIR}/${show}/${dir}/" - # Keep a copy in /NA/ to avoid re-downloading - cp -n "$file" "${DOWNLOAD_DIR}/${show}/${dir}/" - done < <(find "${DOWNLOAD_DIR}/${show}/NA" -type f -print0) - # Remove old copies in /NA/, since we don't need to - # worry about re-downloading them any more - find "${DOWNLOAD_DIR}/${show}/NA/" -mindepth 1 -mtime +7 -delete + if [ -d "${DOWNLOAD_DIR}/${show}"/NA ]; then + while IFS= read -r -d '' file; do + dir=$( stat -c %y "$file" | sed -e 's/\([0-9]*\)-\([0-9]*\)-\([0-9]*\).*/\1\2\3/' ); + mkdir -p "${DOWNLOAD_DIR}/${show}/${dir}/" + # Keep a copy in /NA/ to avoid re-downloading + cp -n "$file" "${DOWNLOAD_DIR}/${show}/${dir}/" + done < <(find "${DOWNLOAD_DIR}/${show}/NA" -type f -print0) + # Remove old copies in /NA/, since we don't need to + # worry about re-downloading them any more + find "${DOWNLOAD_DIR}/${show}/NA/" -mindepth 1 -mtime +7 -delete + fi # Remove old shows - find "${DOWNLOAD_DIR}/${show}/" -mindepth 1 -mtime +${KEEP_DAYS} -delete + if [ -d "${DOWNLOAD_DIR}/${show}" ]; then + find "${DOWNLOAD_DIR}/${show}/" -mindepth 1 -mtime +${KEEP_DAYS} -delete + fi } @@ -103,20 +116,25 @@ if [[ -n ${url} ]]; then show="The Daily Show" elif [[ "${url}" =~ cbs.com ]]; then show="The Late Show" + elif [[ "${url}" =~ nbc.com ]]; then + show="Meet the Press" fi download_show "${url}" "${show}" else - #for show in "The Daily Show" "The Late Show"; do + #for show in "The Daily Show" "The Late Show" "Meet the Press"; do for show in "The Daily Show"; do url= if [[ "${show}" =~ Daily ]]; then url=http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes/ elif [[ "${show}" =~ Late ]]; then # Get URL from http://www.cbs.com/shows/the-late-show-with-stephen-colbert/video/ - # Requires Unotelly DNS: - # 197.242.94.51 - # 197.189.212.164 - url=$( wget -qO- http://www.cbs.com/shows/the-late-show-with-stephen-colbert/video/ | grep -E "/video/[^/]+/" | grep '"url":' | perl -pe 's/.*?"url":"(.*?)".*/$1/ims' | head -1 ) + # Requires Unlocator/Unotelly DNS + #url=$( wget -qO- http://www.cbs.com/shows/the-late-show-with-stephen-colbert/video/ | grep -E "/video/[^/]+/" | grep '"url":' | perl -pe 's/.*?"url":"(.*?)".*/$1/ims' | head -1 ) + url=$( wget -qO- http://www.cbs.com/shows/the-late-show-with-stephen-colbert/video/ | grep -Po '"url":.*?[^\\]",' | grep http | perl -pe 's/.*?"url":"(.*?)".*/$1\n/ims' | head -1 ) + elif [[ "${show}" =~ Meet ]]; then + # Get URL from https://www.nbc.com/meet-the-press/episodes + # Note that youtube-dl has a bug on https connections: https://github.com/rg3/youtube-dl/issues/13651 + url=$( wget -qO- https://www.nbc.com/meet-the-press/episodes | grep -Po '"permalink":.*?[^\\]",' | perl -pe 's/.*?"permalink":"(.*?)".*/$1\n/ims; s/https/http/g' | head -1 ) fi download_show "${url}" "${show}" done