Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
galaxyproject
GitHub Repository: galaxyproject/training-material
Path: blob/main/bin/check-url-persistence.sh
1677 views
1
#!/bin/bash
2
if [[ ! -f /tmp/2021.txt ]]; then
3
curl --silent https://training.galaxyproject.org/archive/2021-02-01/sitemap.xml | sed 's|<url>|\n|g' | grep '<loc>[^<]*</loc>' -o | sed 's/<loc>//;s/<\/loc>//' | sed 's|archive/2021-02-01|training-material|g' > /tmp/2021.txt
4
fi
5
6
if [[ ! -f /tmp/2022.txt ]]; then
7
curl --silent https://training.galaxyproject.org/archive/2022-01-01/sitemap.xml | sed 's|<url>|\n|g' | grep '<loc>[^<]*</loc>' -o | sed 's/<loc>//;s/<\/loc>//' | sed 's|archive/2022-01-01|training-material|g' > /tmp/2022.txt
8
fi
9
10
if [[ ! -f /tmp/2024.txt ]]; then
11
curl --silent https://training.galaxyproject.org/archive/2024-01-01/sitemap.xml | sed 's|<url>|\n|g' | grep '<loc>[^<]*</loc>' -o | sed 's/<loc>//;s/<\/loc>//' | sed 's|archive/2024-01-01|training-material|g' > /tmp/2024.txt
12
fi
13
14
15
if [[ ! -f /tmp/2099.txt ]]; then
16
curl --silent https://training.galaxyproject.org/training-material/sitemap.xml | sed 's|<url>|\n|g' | grep '<loc>[^<]*</loc>' -o | sed 's/<loc>//;s/<\/loc>//' > /tmp/2099.txt
17
fi
18
19
# No guarantees of API or data-file persistence
20
# 1fe4d7d92e5ea5a5794cbe741eadb96a74511261
21
cat /tmp/20*.txt | sort -u | \
22
grep -v '/api/' | grep -v '/by-tool/' | grep -v '/hall-of-fame/' | \
23
grep -v '/badges/' | \
24
grep --extended-regexp -v 'krona_?[a-z]*.html' | \
25
grep -v '/transcriptomics/tutorials/ref-based/faqs/rnaseq_data.html' | \
26
grep -v '/topics/data-management/' | \
27
grep -v '/topics/statistics/tutorials/intro-to-ml-with-python/slides.html' | \
28
grep -v '/topics/statistics/tutorials/intro-to-ml-with-python/slides-plain.html' | \
29
grep -v 'training-material/tags/' | grep -v 'data-library'| grep -v '/recordings/index.html' |\
30
grep -v '/topics/statistics/tutorials/gai-llm-with-python/slides.html' | \
31
grep -v '/topics/statistics/tutorials/gai-llm-with-python/slides-plain.html' | \
32
grep -v '/topics/statistics/tutorials/gai-llm-with-python/tutorial.html' | \
33
sed 's|/$|/index.html|' | grep '.html$' | sort -u | sed 's|https://training.galaxyproject.org|_site|' > /tmp/gtn-files.txt
34
35
count=0
36
while read line; do
37
if [[ ! -f $line ]]; then
38
echo "Missing: $line"
39
count=$((count+1))
40
fi
41
done < /tmp/gtn-files.txt
42
43
if (( $count > 0 )); then
44
echo "Files in previous versions that are not currently redirected: $count"
45
echo "Please ensure you add redirect_from: entries to each page"
46
echo "If this file is intentionally deleted, with no replacement, please update this script with some exclusions."
47
exit 1
48
fi
49
50