Path: blob/main/scripts/reports/jsonify_db_structure.py
1128 views
from bson.code import Code1import datetime2#import threading3#import bson4#import time5from collections import defaultdict6from lmfdb.inventory_app.id_object import get_description7from psycodict import db89__version__ = '1.0.0'1011def _is_good_database(name):12""" Function to test if a database is one to scan """13bad=['inv']14if name in bad:15return False16return True1718def _is_good_table(name):19""" Function to test if a table should be scanned """20return True212223def merge_dicts(d1, d2):24""" Merge two dictionaries into one """25for key, value2 in d2.items():26if key in d1:27if type(value2) is dict:28merge_dicts(d1[key], value2)29else:30d1[key] = value23132def _get_db_records(table):3334""" Routine to execute the MapReduce operation on a specified table35object """3637mapper = Code("""38function() {39var names = Object.keys(this).sort();40emit(names,1);41}42""")4344reducer = Code("""45function (key,values) {46return Array.sum(values);47}48""")4950try:51results = table.inline_map_reduce(mapper,reducer)52except Exception as err:53print('Unable to perform map_reduce. Table or database may not exist')54raise err55#Strip the _id field from the results56for doc in results:57if '_id' in doc['_id']: doc['_id'].remove('_id')58return results5960def get_sample_record(collection, field_name):61""" Function to get a sample, non-empty record from a collection62collection - MongoDB collection object63field_name - name of field to find sample record from6465returns sample record66"""67return collection.find_one({str(field_name):{'$exists':True,'$nin':[[], '']}})6869def _jsonify_table_info(table, dbname = None):7071"""Private function to turn information about one table into base72JSON """73# Needs to be rewritten for Postgres74raise NotImplementedError7576if dbname is None:77dbname = table.search_table78results = _get_db_records(table)7980json_db_data = {}81json_db_data['dbinfo'] ={}82json_db_data['dbinfo']['name'] = dbname83json_db_data['records'] = {}84json_db_data['fields'] = {}8586lst=set()87for doc in results:88lst = lst | set(doc['_id'])89lst=list(lst)90lst.sort()9192for doc in lst:93try:94rls = get_sample_record(table, str(doc))95try:96typedesc = get_description(rls[str(doc)])97except Exception:98typedesc = 'Type cannot be identified (' \99+ str(type(rls[str(doc)])) + ')'100try:101strval = str(rls[str(doc)]).decode('unicode_escape').\102encode('ascii','ignore')103except Exception:104strval = 'Record cannot be stringified'105except Exception:106typedesc = 'Record cannot be found containing key'107strval = 'N/A'108109lstr = len(strval)110strval = strval.replace('\n',' ').replace('\r','')111strval = '`' + strval[:100].strip() + '`'112if lstr > 100:113strval = strval + ' ...'114json_db_data['fields'][str(doc)] = {}115json_db_data['fields'][str(doc)]['type'] = typedesc116json_db_data['fields'][str(doc)]['example'] = strval117118119for recordid, doc in enumerate(results):120json_db_data['records'][recordid] = {}121json_db_data['records'][recordid]['count'] = int(doc['value'])122json_db_data['records'][recordid]['schema'] = doc['_id']123124indices = table.index_information()125json_db_data['indices'] = {}126for recordid, index in enumerate(indices):127json_db_data['indices'][recordid] = {}128json_db_data['indices'][recordid]['name'] = index129json_db_data['indices'][recordid]['keys'] = indices[index]['key']130131return json_db_data132133def parse_table_info_to_json(tablename, retval = None, date = None):134""" Front end routine to create JSON information about a table """135136from lmfdb.db_backend import db137json_raw = _jsonify_table_info(db[tablename], tablename)138json_wrap = {tablename:json_raw}139if not date:140date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')141json_wrap[tablename]['scrape_date'] = date142if retval is not None: retval['data'] = json_wrap143return json_wrap144145def create_user_template(structure_json, dbname, tablename, field_subs = ['tiype',' example', 'description'],146info_subs = ['description', 'status','contact','code'], note_subs = ['description']):147148"""Legacy routine to create blank user specified data JSON"""149150result_json = {}151substr = structure_json[dbname][tablename]152result_json['(INFO)'] = {}153for el in info_subs:154result_json['(INFO)'][el] = ""155for el in substr['fields']:156result_json[el] = {}157for iel in field_subs:158result_json[el][iel] = ""159result_json['(NOTES)'] = {}160for el in note_subs:161result_json['(NOTES)'][el] = ""162return result_json163164165def parse_lmfdb_to_json(tables = None, databases = None,166is_good_database = _is_good_database,167is_good_table = _is_good_table):168169"""Legacy routine to scan any specified chunk of LMFDB to JSON"""170raise NotImplementedError171# connection has been deleted172#173# if not tables:174# tables = get_lmfdb_tables(databases = databases,175# is_good_database = is_good_database, is_good_table = is_good_table)176# else:177# if not hasattr(tables, '__iter__'): tables = [tables]178# if not isinstance(tables, dict):179# if not databases:180# databases = get_lmfdb_databases(is_good_database = is_good_database)181# if len(databases) == 1:182# tbldict = {databases[0] : tables}183# else:184# tbldict = defaultdict(list)185# for table in tables:186# db_name = table.split('_')[0]187# tbldict[db_name].append(table)188# tables = tbldict189# else:190# for db_name, L in tables.items():191# if not isinstance(L, list):192# if L:193# tables[db_name] = [L]194# else:195# tables.update(get_lmfdb_tables(databases=db_name))196#197# db_struct = {}198# for db_name in tables:199# print('Running ' + db_name)200# if is_good_database(db_name):201# for table in tables[db_name]:202# print('Parsing ' + table)203# if is_good_table(table):204# mydict={}205# mythread = threading.Thread(target = parse_table_info_to_json, args = [table, mydict])206# mythread.start()207# while mythread.isAlive():208# u=bson.son.SON({"$ownOps":1,"currentOp":1})209# progress = connection['admin'].command(u)210# for el in progress['inprog']:211# if 'progress' in el.keys():212# if el['ns'] == table:213# print("Scanning " + table + " " +214# unicode(int(el['progress']['done'])) +215# "\\" + unicode(int(el['progress']['total'])))216# time.sleep(5)217#218# merge_dicts(db_struct, mydict['data'])219# return db_struct220221def get_lmfdb_databases(is_good_database=_is_good_database):222""" Routine to get list of available databases """223return [db_name for db_name in db.inv_dbs.search({},'name') if is_good_database(db_name)]224225def get_lmfdb_tables(databases=None, is_good_database=_is_good_database,226is_good_table=_is_good_table):227228"""Routine to get a dictionary with keys of all databases and member lists229of tables in that database"""230231if not databases:232databases = get_lmfdb_databases(is_good_database=is_good_database)233if not hasattr(databases, '__iter__'):234databases = [databases]235tables = defaultdict(list)236for table in db.tablenames:237db_name = table.split('_')[0]238if db_name in databases and is_good_table(table):239tables[db_name].append(table)240return tables241242243