Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
AndrewVSutherland
GitHub Repository: AndrewVSutherland/lmfdb
Path: blob/main/scripts/reports/jsonify_db_structure.py
1128 views
1
from bson.code import Code
2
import datetime
3
#import threading
4
#import bson
5
#import time
6
from collections import defaultdict
7
from lmfdb.inventory_app.id_object import get_description
8
from psycodict import db
9
10
__version__ = '1.0.0'
11
12
def _is_good_database(name):
13
""" Function to test if a database is one to scan """
14
bad=['inv']
15
if name in bad:
16
return False
17
return True
18
19
def _is_good_table(name):
20
""" Function to test if a table should be scanned """
21
return True
22
23
24
def merge_dicts(d1, d2):
25
""" Merge two dictionaries into one """
26
for key, value2 in d2.items():
27
if key in d1:
28
if type(value2) is dict:
29
merge_dicts(d1[key], value2)
30
else:
31
d1[key] = value2
32
33
def _get_db_records(table):
34
35
""" Routine to execute the MapReduce operation on a specified table
36
object """
37
38
mapper = Code("""
39
function() {
40
var names = Object.keys(this).sort();
41
emit(names,1);
42
}
43
""")
44
45
reducer = Code("""
46
function (key,values) {
47
return Array.sum(values);
48
}
49
""")
50
51
try:
52
results = table.inline_map_reduce(mapper,reducer)
53
except Exception as err:
54
print('Unable to perform map_reduce. Table or database may not exist')
55
raise err
56
#Strip the _id field from the results
57
for doc in results:
58
if '_id' in doc['_id']: doc['_id'].remove('_id')
59
return results
60
61
def get_sample_record(collection, field_name):
62
""" Function to get a sample, non-empty record from a collection
63
collection - MongoDB collection object
64
field_name - name of field to find sample record from
65
66
returns sample record
67
"""
68
return collection.find_one({str(field_name):{'$exists':True,'$nin':[[], '']}})
69
70
def _jsonify_table_info(table, dbname = None):
71
72
"""Private function to turn information about one table into base
73
JSON """
74
# Needs to be rewritten for Postgres
75
raise NotImplementedError
76
77
if dbname is None:
78
dbname = table.search_table
79
results = _get_db_records(table)
80
81
json_db_data = {}
82
json_db_data['dbinfo'] ={}
83
json_db_data['dbinfo']['name'] = dbname
84
json_db_data['records'] = {}
85
json_db_data['fields'] = {}
86
87
lst=set()
88
for doc in results:
89
lst = lst | set(doc['_id'])
90
lst=list(lst)
91
lst.sort()
92
93
for doc in lst:
94
try:
95
rls = get_sample_record(table, str(doc))
96
try:
97
typedesc = get_description(rls[str(doc)])
98
except Exception:
99
typedesc = 'Type cannot be identified (' \
100
+ str(type(rls[str(doc)])) + ')'
101
try:
102
strval = str(rls[str(doc)]).decode('unicode_escape').\
103
encode('ascii','ignore')
104
except Exception:
105
strval = 'Record cannot be stringified'
106
except Exception:
107
typedesc = 'Record cannot be found containing key'
108
strval = 'N/A'
109
110
lstr = len(strval)
111
strval = strval.replace('\n',' ').replace('\r','')
112
strval = '`' + strval[:100].strip() + '`'
113
if lstr > 100:
114
strval = strval + ' ...'
115
json_db_data['fields'][str(doc)] = {}
116
json_db_data['fields'][str(doc)]['type'] = typedesc
117
json_db_data['fields'][str(doc)]['example'] = strval
118
119
120
for recordid, doc in enumerate(results):
121
json_db_data['records'][recordid] = {}
122
json_db_data['records'][recordid]['count'] = int(doc['value'])
123
json_db_data['records'][recordid]['schema'] = doc['_id']
124
125
indices = table.index_information()
126
json_db_data['indices'] = {}
127
for recordid, index in enumerate(indices):
128
json_db_data['indices'][recordid] = {}
129
json_db_data['indices'][recordid]['name'] = index
130
json_db_data['indices'][recordid]['keys'] = indices[index]['key']
131
132
return json_db_data
133
134
def parse_table_info_to_json(tablename, retval = None, date = None):
135
""" Front end routine to create JSON information about a table """
136
137
from lmfdb.db_backend import db
138
json_raw = _jsonify_table_info(db[tablename], tablename)
139
json_wrap = {tablename:json_raw}
140
if not date:
141
date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')
142
json_wrap[tablename]['scrape_date'] = date
143
if retval is not None: retval['data'] = json_wrap
144
return json_wrap
145
146
def create_user_template(structure_json, dbname, tablename, field_subs = ['tiype',' example', 'description'],
147
info_subs = ['description', 'status','contact','code'], note_subs = ['description']):
148
149
"""Legacy routine to create blank user specified data JSON"""
150
151
result_json = {}
152
substr = structure_json[dbname][tablename]
153
result_json['(INFO)'] = {}
154
for el in info_subs:
155
result_json['(INFO)'][el] = ""
156
for el in substr['fields']:
157
result_json[el] = {}
158
for iel in field_subs:
159
result_json[el][iel] = ""
160
result_json['(NOTES)'] = {}
161
for el in note_subs:
162
result_json['(NOTES)'][el] = ""
163
return result_json
164
165
166
def parse_lmfdb_to_json(tables = None, databases = None,
167
is_good_database = _is_good_database,
168
is_good_table = _is_good_table):
169
170
"""Legacy routine to scan any specified chunk of LMFDB to JSON"""
171
raise NotImplementedError
172
# connection has been deleted
173
#
174
# if not tables:
175
# tables = get_lmfdb_tables(databases = databases,
176
# is_good_database = is_good_database, is_good_table = is_good_table)
177
# else:
178
# if not hasattr(tables, '__iter__'): tables = [tables]
179
# if not isinstance(tables, dict):
180
# if not databases:
181
# databases = get_lmfdb_databases(is_good_database = is_good_database)
182
# if len(databases) == 1:
183
# tbldict = {databases[0] : tables}
184
# else:
185
# tbldict = defaultdict(list)
186
# for table in tables:
187
# db_name = table.split('_')[0]
188
# tbldict[db_name].append(table)
189
# tables = tbldict
190
# else:
191
# for db_name, L in tables.items():
192
# if not isinstance(L, list):
193
# if L:
194
# tables[db_name] = [L]
195
# else:
196
# tables.update(get_lmfdb_tables(databases=db_name))
197
#
198
# db_struct = {}
199
# for db_name in tables:
200
# print('Running ' + db_name)
201
# if is_good_database(db_name):
202
# for table in tables[db_name]:
203
# print('Parsing ' + table)
204
# if is_good_table(table):
205
# mydict={}
206
# mythread = threading.Thread(target = parse_table_info_to_json, args = [table, mydict])
207
# mythread.start()
208
# while mythread.isAlive():
209
# u=bson.son.SON({"$ownOps":1,"currentOp":1})
210
# progress = connection['admin'].command(u)
211
# for el in progress['inprog']:
212
# if 'progress' in el.keys():
213
# if el['ns'] == table:
214
# print("Scanning " + table + " " +
215
# unicode(int(el['progress']['done'])) +
216
# "\\" + unicode(int(el['progress']['total'])))
217
# time.sleep(5)
218
#
219
# merge_dicts(db_struct, mydict['data'])
220
# return db_struct
221
222
def get_lmfdb_databases(is_good_database=_is_good_database):
223
""" Routine to get list of available databases """
224
return [db_name for db_name in db.inv_dbs.search({},'name') if is_good_database(db_name)]
225
226
def get_lmfdb_tables(databases=None, is_good_database=_is_good_database,
227
is_good_table=_is_good_table):
228
229
"""Routine to get a dictionary with keys of all databases and member lists
230
of tables in that database"""
231
232
if not databases:
233
databases = get_lmfdb_databases(is_good_database=is_good_database)
234
if not hasattr(databases, '__iter__'):
235
databases = [databases]
236
tables = defaultdict(list)
237
for table in db.tablenames:
238
db_name = table.split('_')[0]
239
if db_name in databases and is_good_table(table):
240
tables[db_name].append(table)
241
return tables
242
243