Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
galaxyproject
GitHub Repository: galaxyproject/training-material
Path: blob/main/bin/galaxy-help-news.sh
1677 views
1
#!/bin/bash
2
3
MATRIX_SERVER=${MATRIX_SERVER:-"https://matrix.org"}
4
ROOM_ID=${ROOM_ID:-'!TJRLNvfcbWbSRoUNpl:matrix.org'} ## GTN Single Cell Maintainers
5
WANTED_TAGS=${WANTED_TAGS:-"scrna scrna-seq"}
6
MAX_REPLIES=${MAX_REPLIES:-1}
7
HTML_TYPE=${HTML_TYPE:-"bullets"} ## "table"
8
9
## Result filters
10
OPTS=${OPTS:-"?ascending=true&order=activity"}
11
12
if [ -z "$MATRIX_ACCESS_TOKEN" ]; then
13
echo "
14
This is a Matrix bot that scrapes Galaxy Help for certain tags and posts to
15
a Room for topics that have less than X replies. Run this maybe once a month.
16
17
Example Usage:
18
19
MATRIX_ACCESS_TOKEN='123_123_123' \\
20
MATRIX_SERVER='https://matrix.org' \\
21
ROOM_ID='!123_132_123:matrix.org' \\
22
WANTED_TAGS='tag1 tag2' \\
23
MAX_REPLIES=1 \\
24
HTML_TYPE='bullets' \\
25
bash $0
26
27
Where:
28
MATRIX_ACCESS_TOKEN Can be found in your Matrix profile under
29
'All settings' -> 'Help & About' -> 'Access Token'
30
31
MATRIX_SERVER The name or base address of the Matrix server to
32
post to. Default is '$MATRIX_SERVER'
33
34
ROOM_ID The Room ID can be found in the URL of the room
35
usually following format '!123123123:matrix.org'.
36
Default is '$ROOM_ID'
37
NOTE: Single quotes are very important here.
38
39
WANTED_TAGS A space separated list of valid tags to find posts
40
at https://help.galaxyproject.org/
41
Default is \"$WANTED_TAGS\"
42
43
MAX_REPLIES Filter for posts that have less than or equal to
44
this many replies. Default is \"$MAX_REPLIES\"
45
46
HTML_TYPE Render either a 'table' or 'bullets'. HTML tables
47
look great in the browser but don't render well on
48
mobile. Default is \"$HTML_TYPE\"
49
50
OPTS Extra arguments to append to help.galaxyproject.org
51
URL. Default is \"$OPTS\"
52
" >&2
53
exit 255
54
fi
55
56
function tag_to_tsv {
57
## For a given TAG, fetch from the help forum, extract and parse
58
## the table and produce a 4-column TSV output of Link, Title,
59
## Replies, Views.
60
##
61
## TODO: Add date too?
62
local tag="$1"
63
curl -s "https://help.galaxyproject.org/tag/${tag}${OPTS}" \
64
| xmllint --noblanks --html --xpath '//tr[@class="topic-list-item"]/td/span' - 2>/dev/null \
65
| sed -r 's|<span class[^>]+>||; s|</span>||; s|\s*<a.*href=\"([^\"]*)\" [^>]+>([^<]+)<.*|_ROW_\1\t\2|' \
66
| tr '\n' '\t' \
67
| sed -r 's|\s\s\s*|\t|g; s|_ROW_|\n|g'
68
}
69
70
function alltags_to_tsv {
71
## For all wanted tags, populate a 4-column TSV output of Link,
72
## Title, Replies, Views, and return the path of the table.
73
local fetch_tags=$WANTED_TAGS
74
local tmp_tsv
75
tmp_tsv=$(mktemp --suffix=".tsv")
76
for tag in ${fetch_tags}; do
77
tag_to_tsv "$tag" >> "$tmp_tsv";
78
done
79
## No duplicates, no blanks, no duplicate delimiters,
80
## and sort by ascending reply count
81
grep -v "^\s*$" "${tmp_tsv}" | sed 's|\t\t|\t|g' \
82
| sort | uniq | sort -t $'\t' -nk 3 > "${tmp_tsv}".temp
83
echo -e "Link\tTitle\tReplies\tViews" > "${tmp_tsv}"
84
cat "${tmp_tsv}".temp >> "${tmp_tsv}"
85
rm "${tmp_tsv}".temp
86
echo "${tmp_tsv}"
87
}
88
89
function filter_tsv {
90
## Filter a TSV file for maximum replies and then return the path
91
## of the new filtered table
92
local tsv="$1"
93
local tmp_tsv
94
tmp_tsv=$(mktemp --suffix=".tsv")
95
awk -F$'\t' -v replies="$MAX_REPLIES" '$3 <= replies' "${tsv}" > "${tmp_tsv}"
96
echo "${tmp_tsv}"
97
}
98
99
function tsv_to_html {
100
## Convert a TSV table into HTML text that can be fed into a JSON
101
local tsv="$1"
102
if [ "$HTML_TYPE" = "table" ]; then
103
awk -F$'\t' -v subtitle="Recent posts matching: <b>${WANTED_TAGS}</b>, with replies &le; ${MAX_REPLIES}" '\
104
BEGIN { print "<h1>Updates from Galaxy Help</h1>"subtitle"\n<table>\n<thead><tr><th>Topic</th><th>Replies</th><th>Views</th></tr></thead>\n<tbody>"} \
105
END { print "</tbody>\n</table>"} \
106
NR > 0 {print "<tr><td><a href=\""$1"\">"$2"</a></td><td>"$3"</td><td>"$4"</td></tr>"}' \
107
"${tsv}" | tr '\n' ' ' | sed 's|"|\\"|g'
108
else ## bullets
109
awk -F$'\t' -v subtitle="Recent posts matching: <b>${WANTED_TAGS}</b>, with replies &le; ${MAX_REPLIES}" '\
110
BEGIN { print "<h1>Updates from Galaxy Help</h1><br/><p>"subtitle"</p><ol>\n"} \
111
END { print "\n</ol>"} \
112
NR > 0 {print "<li><a href=\""$1"\">"$2"</a><ul><li>Replies: "$3" and Views: "$4"</li></ul></li>"}' \
113
"${tsv}" | tr '\n' ' ' | sed 's|"|\\"|g'
114
fi
115
}
116
117
function tsv_to_markdown {
118
## Convert a TSV table into Markdown text that can be fed into a JSON
119
local tsv="$1"
120
awk -F$'\t' -v subtitle="Recent posts matching: **${WANTED_TAGS}**, with replies ≤ ${MAX_REPLIES}" '\
121
BEGIN { print "## Updates from Galaxy Help\\n***"subtitle"***\\n"} \
122
NR > 0 {print "* ["$2"]("$1")\\n * "$3" replies and "$4" views\\n"}' \
123
"${tsv}" | tr '\n' ' ' | sed 's|"|\\"|g'
124
}
125
126
function md_and_html_to_json {
127
## Stuff the Markdown and HTML text content into a JSON.
128
local md_text="$1"
129
local html_text="$2"
130
local tmp_json;
131
tmp_json=$(mktemp --suffix=".json")
132
## See: https://spec.matrix.org/legacy/r0.0.0/client_server.html
133
## "put-matrix-client-r0-rooms-roomid-send-eventtype-txnid"
134
echo "{\
135
\"msgtype\":\"m.notice\", \
136
\"format\":\"org.matrix.custom.html\", \
137
\"body\": \"${md_text}\", \
138
\"formatted_body\": \"${html_text}\"}" > "${tmp_json}"
139
echo "${tmp_json}"
140
}
141
142
function post_json_to_matrix {
143
local json_file="$1"
144
local txnid post_url
145
txnid=$(date "+%Y%m%d%H%M${RANDOM:1:3}") ## date-specific transaction ID
146
MATRIX_SERVER=${MATRIX_SERVER%/} ## remove trailing slash, if any
147
## Build curl
148
post_url="${MATRIX_SERVER}/_matrix/client/r0/rooms/"
149
post_url="${post_url}"${ROOM_ID}"/send/m.room.message/${txnid}"
150
post_url="${post_url}?access_token=${MATRIX_ACCESS_TOKEN}"
151
## DEBUG:
152
## - curl "$post_url" -X PUT --data '{"msgtype":"m.text","body":"hello"}'
153
curl "$post_url" -X PUT --data "$(cat ${json_file})"
154
}
155
156
function sanity_check {
157
## Assert that required binaries are in PATH
158
local required_progs=( cat curl xmllint awk sed grep tr jq )
159
local miss=""
160
for prog in "${required_progs[@]}"; do
161
if ! which "${prog}" 2>/dev/null >&2; then
162
miss="$miss $prog"
163
fi
164
done
165
if [ "$miss" != "" ]; then
166
echo "Cannot run without:$miss"
167
exit 255
168
fi
169
}
170
171
## MAIN ##
172
sanity_check
173
174
main_tsv=$(filter_tsv "$(alltags_to_tsv)" )
175
if [[ $(wc -l < "${main_tsv}") == 0 ]]; then
176
echo "Nothing new to post, aborting." >&2
177
exit 0
178
fi
179
180
main_mdwn_text=$(tsv_to_markdown "${main_tsv}")
181
main_html_text=$(tsv_to_html "${main_tsv}")
182
183
main_json_file=$(md_and_html_to_json "${main_mdwn_text}" "${main_html_text}")
184
if ! jq < "${main_json_file}" 2> /dev/null >&2; then
185
echo "This is not a valid JSON, aborting." >&2
186
echo "See: ${main_json_file}" >&2
187
exit 255
188
fi
189
190
post_json_to_matrix "${main_json_file}"
191
192