print(
"CONFIDENTIAL: don't share the generated data publicly. It is solely used to improve the service!"
)
import sys, os
d = os.path.abspath(os.path.dirname(__file__))
sys.path.insert(0, d)
from smc_rethinkdb import r, accounts, file_access_log, secs2hms
from pprint import pprint
from datetime import datetime, timedelta
from pytz import utc
from queue import Queue
from threading import Thread
import socket
import numpy as np
from collections import Counter, defaultdict
import itertools as it
DAYS_AGO = int(sys.argv[1]) if len(sys.argv) >= 2 else 7
now = datetime.utcnow().replace(tzinfo=utc)
ago = now - timedelta(days=DAYS_AGO)
q = file_access_log\
.between(ago, now, index='time')\
.eq_join(r.row["account_id"], accounts)\
.pluck({"left" : ["time", "account_id", "project_id"], "right": ["first_name", "last_name", "email_address"]})
users = dict()
users_bins = defaultdict(set)
projs_bins = defaultdict(set)
users_tot = Counter()
projs_tot = Counter()
for idx, res in enumerate(q.run()):
what = res["left"]
who = res["right"]
aid = what["account_id"]
pid = what["project_id"]
if aid not in users:
who["email_address"] = who.get("email_address", "None")
users[
aid] = "{0[first_name]} {0[last_name]} <{0[email_address]}>".format(
who)
t = what["time"]
ts = int(
t.replace(minute=t.minute - t.minute % 10, second=0,
microsecond=0).timestamp())
users_bins[ts].add(aid)
projs_bins[ts].add(pid)
for name, bins, tot in [("users", users_bins, users_tot),
("projects", projs_bins, projs_tot)]:
print()
print("{} Bins".format(name.title()))
for ts, ids in sorted(bins.items()):
ts = datetime.fromtimestamp(ts).isoformat()
print("{} → {}".format(ts, len(ids)))
tot.update(Counter(ids))
sum_user_total = 0
print("Top Users")
for (aid, nb) in users_tot.most_common(30):
x = 60 * 10 * nb
print("{:>9}s {} ({})".format(secs2hms(x), users[aid], aid))
sum_user_total += x
print()
ratio = (sum_user_total / 60.) / (DAYS_AGO * 24 * 60)
print("Sum of all {} user's activity: {} (radio: 1:{:.2f})".format(
len(users_tot), secs2hms(sum_user_total), ratio))