Path: blob/main/bin/check-url-persistence.sh
1677 views
#!/bin/bash1if [[ ! -f /tmp/2021.txt ]]; then2curl --silent https://training.galaxyproject.org/archive/2021-02-01/sitemap.xml | sed 's|<url>|\n|g' | grep '<loc>[^<]*</loc>' -o | sed 's/<loc>//;s/<\/loc>//' | sed 's|archive/2021-02-01|training-material|g' > /tmp/2021.txt3fi45if [[ ! -f /tmp/2022.txt ]]; then6curl --silent https://training.galaxyproject.org/archive/2022-01-01/sitemap.xml | sed 's|<url>|\n|g' | grep '<loc>[^<]*</loc>' -o | sed 's/<loc>//;s/<\/loc>//' | sed 's|archive/2022-01-01|training-material|g' > /tmp/2022.txt7fi89if [[ ! -f /tmp/2024.txt ]]; then10curl --silent https://training.galaxyproject.org/archive/2024-01-01/sitemap.xml | sed 's|<url>|\n|g' | grep '<loc>[^<]*</loc>' -o | sed 's/<loc>//;s/<\/loc>//' | sed 's|archive/2024-01-01|training-material|g' > /tmp/2024.txt11fi121314if [[ ! -f /tmp/2099.txt ]]; then15curl --silent https://training.galaxyproject.org/training-material/sitemap.xml | sed 's|<url>|\n|g' | grep '<loc>[^<]*</loc>' -o | sed 's/<loc>//;s/<\/loc>//' > /tmp/2099.txt16fi1718# No guarantees of API or data-file persistence19# 1fe4d7d92e5ea5a5794cbe741eadb96a7451126120cat /tmp/20*.txt | sort -u | \21grep -v '/api/' | grep -v '/by-tool/' | grep -v '/hall-of-fame/' | \22grep -v '/badges/' | \23grep --extended-regexp -v 'krona_?[a-z]*.html' | \24grep -v '/transcriptomics/tutorials/ref-based/faqs/rnaseq_data.html' | \25grep -v '/topics/data-management/' | \26grep -v '/topics/statistics/tutorials/intro-to-ml-with-python/slides.html' | \27grep -v '/topics/statistics/tutorials/intro-to-ml-with-python/slides-plain.html' | \28grep -v 'training-material/tags/' | grep -v 'data-library'| grep -v '/recordings/index.html' |\29grep -v '/topics/statistics/tutorials/gai-llm-with-python/slides.html' | \30grep -v '/topics/statistics/tutorials/gai-llm-with-python/slides-plain.html' | \31grep -v '/topics/statistics/tutorials/gai-llm-with-python/tutorial.html' | \32sed 's|/$|/index.html|' | grep '.html$' | sort -u | sed 's|https://training.galaxyproject.org|_site|' > /tmp/gtn-files.txt3334count=035while read line; do36if [[ ! -f $line ]]; then37echo "Missing: $line"38count=$((count+1))39fi40done < /tmp/gtn-files.txt4142if (( $count > 0 )); then43echo "Files in previous versions that are not currently redirected: $count"44echo "Please ensure you add redirect_from: entries to each page"45echo "If this file is intentionally deleted, with no replacement, please update this script with some exclusions."46exit 147fi484950