Path: blob/main/bin/galaxy-help-news.sh
1677 views
#!/bin/bash12MATRIX_SERVER=${MATRIX_SERVER:-"https://matrix.org"}3ROOM_ID=${ROOM_ID:-'!TJRLNvfcbWbSRoUNpl:matrix.org'} ## GTN Single Cell Maintainers4WANTED_TAGS=${WANTED_TAGS:-"scrna scrna-seq"}5MAX_REPLIES=${MAX_REPLIES:-1}6HTML_TYPE=${HTML_TYPE:-"bullets"} ## "table"78## Result filters9OPTS=${OPTS:-"?ascending=true&order=activity"}1011if [ -z "$MATRIX_ACCESS_TOKEN" ]; then12echo "13This is a Matrix bot that scrapes Galaxy Help for certain tags and posts to14a Room for topics that have less than X replies. Run this maybe once a month.1516Example Usage:1718MATRIX_ACCESS_TOKEN='123_123_123' \\19MATRIX_SERVER='https://matrix.org' \\20ROOM_ID='!123_132_123:matrix.org' \\21WANTED_TAGS='tag1 tag2' \\22MAX_REPLIES=1 \\23HTML_TYPE='bullets' \\24bash $02526Where:27MATRIX_ACCESS_TOKEN Can be found in your Matrix profile under28'All settings' -> 'Help & About' -> 'Access Token'2930MATRIX_SERVER The name or base address of the Matrix server to31post to. Default is '$MATRIX_SERVER'3233ROOM_ID The Room ID can be found in the URL of the room34usually following format '!123123123:matrix.org'.35Default is '$ROOM_ID'36NOTE: Single quotes are very important here.3738WANTED_TAGS A space separated list of valid tags to find posts39at https://help.galaxyproject.org/40Default is \"$WANTED_TAGS\"4142MAX_REPLIES Filter for posts that have less than or equal to43this many replies. Default is \"$MAX_REPLIES\"4445HTML_TYPE Render either a 'table' or 'bullets'. HTML tables46look great in the browser but don't render well on47mobile. Default is \"$HTML_TYPE\"4849OPTS Extra arguments to append to help.galaxyproject.org50URL. Default is \"$OPTS\"51" >&252exit 25553fi5455function tag_to_tsv {56## For a given TAG, fetch from the help forum, extract and parse57## the table and produce a 4-column TSV output of Link, Title,58## Replies, Views.59##60## TODO: Add date too?61local tag="$1"62curl -s "https://help.galaxyproject.org/tag/${tag}${OPTS}" \63| xmllint --noblanks --html --xpath '//tr[@class="topic-list-item"]/td/span' - 2>/dev/null \64| sed -r 's|<span class[^>]+>||; s|</span>||; s|\s*<a.*href=\"([^\"]*)\" [^>]+>([^<]+)<.*|_ROW_\1\t\2|' \65| tr '\n' '\t' \66| sed -r 's|\s\s\s*|\t|g; s|_ROW_|\n|g'67}6869function alltags_to_tsv {70## For all wanted tags, populate a 4-column TSV output of Link,71## Title, Replies, Views, and return the path of the table.72local fetch_tags=$WANTED_TAGS73local tmp_tsv74tmp_tsv=$(mktemp --suffix=".tsv")75for tag in ${fetch_tags}; do76tag_to_tsv "$tag" >> "$tmp_tsv";77done78## No duplicates, no blanks, no duplicate delimiters,79## and sort by ascending reply count80grep -v "^\s*$" "${tmp_tsv}" | sed 's|\t\t|\t|g' \81| sort | uniq | sort -t $'\t' -nk 3 > "${tmp_tsv}".temp82echo -e "Link\tTitle\tReplies\tViews" > "${tmp_tsv}"83cat "${tmp_tsv}".temp >> "${tmp_tsv}"84rm "${tmp_tsv}".temp85echo "${tmp_tsv}"86}8788function filter_tsv {89## Filter a TSV file for maximum replies and then return the path90## of the new filtered table91local tsv="$1"92local tmp_tsv93tmp_tsv=$(mktemp --suffix=".tsv")94awk -F$'\t' -v replies="$MAX_REPLIES" '$3 <= replies' "${tsv}" > "${tmp_tsv}"95echo "${tmp_tsv}"96}9798function tsv_to_html {99## Convert a TSV table into HTML text that can be fed into a JSON100local tsv="$1"101if [ "$HTML_TYPE" = "table" ]; then102awk -F$'\t' -v subtitle="Recent posts matching: <b>${WANTED_TAGS}</b>, with replies ≤ ${MAX_REPLIES}" '\103BEGIN { print "<h1>Updates from Galaxy Help</h1>"subtitle"\n<table>\n<thead><tr><th>Topic</th><th>Replies</th><th>Views</th></tr></thead>\n<tbody>"} \104END { print "</tbody>\n</table>"} \105NR > 0 {print "<tr><td><a href=\""$1"\">"$2"</a></td><td>"$3"</td><td>"$4"</td></tr>"}' \106"${tsv}" | tr '\n' ' ' | sed 's|"|\\"|g'107else ## bullets108awk -F$'\t' -v subtitle="Recent posts matching: <b>${WANTED_TAGS}</b>, with replies ≤ ${MAX_REPLIES}" '\109BEGIN { print "<h1>Updates from Galaxy Help</h1><br/><p>"subtitle"</p><ol>\n"} \110END { print "\n</ol>"} \111NR > 0 {print "<li><a href=\""$1"\">"$2"</a><ul><li>Replies: "$3" and Views: "$4"</li></ul></li>"}' \112"${tsv}" | tr '\n' ' ' | sed 's|"|\\"|g'113fi114}115116function tsv_to_markdown {117## Convert a TSV table into Markdown text that can be fed into a JSON118local tsv="$1"119awk -F$'\t' -v subtitle="Recent posts matching: **${WANTED_TAGS}**, with replies ≤ ${MAX_REPLIES}" '\120BEGIN { print "## Updates from Galaxy Help\\n***"subtitle"***\\n"} \121NR > 0 {print "* ["$2"]("$1")\\n * "$3" replies and "$4" views\\n"}' \122"${tsv}" | tr '\n' ' ' | sed 's|"|\\"|g'123}124125function md_and_html_to_json {126## Stuff the Markdown and HTML text content into a JSON.127local md_text="$1"128local html_text="$2"129local tmp_json;130tmp_json=$(mktemp --suffix=".json")131## See: https://spec.matrix.org/legacy/r0.0.0/client_server.html132## "put-matrix-client-r0-rooms-roomid-send-eventtype-txnid"133echo "{\134\"msgtype\":\"m.notice\", \135\"format\":\"org.matrix.custom.html\", \136\"body\": \"${md_text}\", \137\"formatted_body\": \"${html_text}\"}" > "${tmp_json}"138echo "${tmp_json}"139}140141function post_json_to_matrix {142local json_file="$1"143local txnid post_url144txnid=$(date "+%Y%m%d%H%M${RANDOM:1:3}") ## date-specific transaction ID145MATRIX_SERVER=${MATRIX_SERVER%/} ## remove trailing slash, if any146## Build curl147post_url="${MATRIX_SERVER}/_matrix/client/r0/rooms/"148post_url="${post_url}"${ROOM_ID}"/send/m.room.message/${txnid}"149post_url="${post_url}?access_token=${MATRIX_ACCESS_TOKEN}"150## DEBUG:151## - curl "$post_url" -X PUT --data '{"msgtype":"m.text","body":"hello"}'152curl "$post_url" -X PUT --data "$(cat ${json_file})"153}154155function sanity_check {156## Assert that required binaries are in PATH157local required_progs=( cat curl xmllint awk sed grep tr jq )158local miss=""159for prog in "${required_progs[@]}"; do160if ! which "${prog}" 2>/dev/null >&2; then161miss="$miss $prog"162fi163done164if [ "$miss" != "" ]; then165echo "Cannot run without:$miss"166exit 255167fi168}169170## MAIN ##171sanity_check172173main_tsv=$(filter_tsv "$(alltags_to_tsv)" )174if [[ $(wc -l < "${main_tsv}") == 0 ]]; then175echo "Nothing new to post, aborting." >&2176exit 0177fi178179main_mdwn_text=$(tsv_to_markdown "${main_tsv}")180main_html_text=$(tsv_to_html "${main_tsv}")181182main_json_file=$(md_and_html_to_json "${main_mdwn_text}" "${main_html_text}")183if ! jq < "${main_json_file}" 2> /dev/null >&2; then184echo "This is not a valid JSON, aborting." >&2185echo "See: ${main_json_file}" >&2186exit 255187fi188189post_json_to_matrix "${main_json_file}"190191192