From c2cd04758a144be27b99b4dd8fb37075baf470a4 Mon Sep 17 00:00:00 2001 From: Timothy Allen Date: Thu, 15 Oct 2015 16:22:03 +0200 Subject: [PATCH] Download Daily Show and Late Show --- download_daily_shows.sh | 108 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100755 download_daily_shows.sh diff --git a/download_daily_shows.sh b/download_daily_shows.sh new file mode 100755 index 0000000..c25345f --- /dev/null +++ b/download_daily_shows.sh @@ -0,0 +1,108 @@ +#/bin/bash + +# TODO videos seem to be downloading multiple times +# TODO late show + +DOWNLOAD_DIR=/media/storage/shared/Videos +YOUTUBE_DL="python3 /usr/local/bin/youtube-dl" +#KEEP_DAYS=40 +KEEP_DAYS=100 +ARGS=-qwc + +OPTIND=1 # Reset is necessary if getopts was used previously +while getopts ":u:d" opt; do + case "${opt}" in + d) + ARGS=-vwc + set -x + ;; + u) + url="${OPTARG}" + ;; + h) + echo "Usage: ${0} <-d>" + exit 1 + ;; + esac +done +shift "$((OPTIND-1))" # Shift off the options and optional --. + +# Formats: +# python3 /usr/local/bin/youtube-dl -F http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes/ +# [info] Available formats for b2a5ae64-3795-4914-b3a7-644eed83372f: +# format code extension resolution note +# 200 mp4 384x216 +# 400 mp4 384x216 +# 750 mp4 512x288 +# 1200 mp4 640x360 +# 1700 mp4 768x432 +# 2200 mp4 960x540 +# 3500 mp4 1280x720 (best) +#FORMAT="vhttp-750" +#FORMAT="rtmp-750" +#FORMAT="rtmp-3500/rtmp2200" + +function download_show { + url="$1" + show="$2" + + mkdir -p "${DOWNLOAD_DIR}/${show}/" + + # Get a chosen video format for each site + if [[ "${url}" =~ cc.com ]]; then + FORMAT=1200 + elif [[ "${url}" =~ cbs.com ]]; then + FORMAT="rtmp-496-0" + fi + + # Try and download twice (resume if partially downloaded) + for i in 1 2; do + ${YOUTUBE_DL} ${ARGS} -f ${FORMAT} \ + -o "${DOWNLOAD_DIR}/${show}/%(upload_date)s/%(title)s-%(id)s.%(ext)s" \ + --download-archive "${DOWNLOAD_DIR}/${show}.archive" \ + ${url} + + # Wait 10 minutes before trying again + #sleep 600 + done + + # Move files from ${DOWNLOAD_DIR}/The Daily Show/NA/ to ${DOWNLOAD_DIR}/The Daily Show/$date + while IFS= read -r -d '' file; do + dir=$( stat -c %y "$file" | sed -e 's/\([0-9]*\)-\([0-9]*\)-\([0-9]*\).*/\1\2\3/' ); + mkdir -p "${DOWNLOAD_DIR}/${show}/${dir}/" + # Keep a copy in /NA/ to avoid re-downloading + cp -n "$file" "${DOWNLOAD_DIR}/${show}/${dir}/" + done < <(find "${DOWNLOAD_DIR}/${show}/NA" -type f -print0) + # Remove old copies in /NA/, since we don't need to + # worry about re-downloading them any more + find "${DOWNLOAD_DIR}/${show}/NA/" -mindepth 1 -mtime +7 -delete + + # Remove old shows + find "${DOWNLOAD_DIR}/${show}/" -mindepth 1 -mtime +${KEEP_DAYS} -delete +} + + +if [[ -n ${url} ]]; then + if [[ "${url}" =~ cc.com ]]; then + show="The Daily Show" + elif [[ "${url}" =~ cbs.com ]]; then + show="The Late Show" + fi + download_show "${url}" "${show}" +else + for show in "The Daily Show" "The Late Show"; do + url= + if [[ "${show}" =~ Daily ]]; then + url=http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes/ + elif [[ "${show}" =~ Late ]]; then + # Get URL from http://www.cbs.com/shows/the-late-show-with-stephen-colbert/video/ + # Requires Unotelly DNS: + # 197.242.94.51 + # 197.189.212.164 + url=$( wget -qO- http://www.cbs.com/shows/the-late-show-with-stephen-colbert/video/ | grep -E "/video/[^/]+/" | grep '"url":' | perl -pe 's/.*?"url":"(.*?)".*/$1/ims' | head -1 ) + fi + download_show "${url}" "${show}" + done +fi + +