Extract images from RSS-Feeds


#!/bin/bash

# works
#URL="https://taz.de/!p4608;rss/"
#URL="https://www.stern.de/feed/standard/all/"
#URL="https://www.n-tv.de/rss"
#URL="https://www.faz.net/rss/aktuell"
#URL="https://www.spiegel.de/schlagzeilen/index.rss"
#URL="https://www.derstandard.at/rss"
URL="https://feeds.content.dowjones.io/public/rss/RSSWorldNews"
#URL="https://newsfeed.zeit.de/"
#URL="https://www.zdf.de/rss/zdf/nachrichten"

IMAGE_DIR="./rss-images"
MYDATE=$(date +"%H-%M-%S")

mkdir -p "$IMAGE_DIR"
wget -O rss-download "$URL"

grep -hoiE 'url="https?://[^"]+"' rss-download \
| sed -E 's/^url="//; s/"$//' > images.txt

sort -u images.txt -o images.txt

while read -r url; do
  # Create a stable, short filename from the FULL URL (including query string)
  hash=$(echo -n "$url" | sha1sum | cut -c1-16)

  wget -nc -c "$url" -O "$IMAGE_DIR/${hash}.jpg"
done < images.txt