CoCalc -- jsonify_db

GitHub Repository: AndrewVSutherland/lmfdb
Path: blob/main/scripts/reports/jsonify_db_structure.py
¹¹²⁸ views
1
from bson.code import Code
2
import datetime
3
#import threading
4
#import bson
5
#import time
6
from collections import defaultdict
7
from lmfdb.inventory_app.id_object import get_description
8
from psycodict import db
9

10
__version__ = '1.0.0'
11

12
def _is_good_database(name):
13
    """ Function to test if a database is one to scan """
14
    bad=['inv']
15
    if name in bad:
16
      return False
17
    return True
18

19
def _is_good_table(name):
20
    """ Function to test if a table should be scanned """
21
    return True
22

23

24
def merge_dicts(d1, d2):
25
    """ Merge two dictionaries into one """
26
    for key, value2 in d2.items():
27
        if key in d1:
28
            if type(value2) is dict:
29
                merge_dicts(d1[key], value2)
30
        else:
31
            d1[key] = value2
32

33
def _get_db_records(table):
34

35
    """ Routine to execute the MapReduce operation on a specified table
36
       object """
37

38
    mapper = Code("""
39
                  function() {
40
                    var names = Object.keys(this).sort();
41
                    emit(names,1);
42
                    }
43
                    """)
44

45
    reducer = Code("""
46
                    function (key,values) {
47
                      return Array.sum(values);
48
                    }
49
                    """)
50

51
    try:
52
        results = table.inline_map_reduce(mapper,reducer)
53
    except Exception as err:
54
        print('Unable to perform map_reduce. Table or database may not exist')
55
        raise err
56
    #Strip the _id field from the results
57
    for doc in results:
58
        if '_id' in doc['_id']: doc['_id'].remove('_id')
59
    return results
60

61
def get_sample_record(collection, field_name):
62
    """ Function to get a sample, non-empty record from a collection
63
        collection - MongoDB collection object
64
        field_name - name of field to find sample record from
65

66
        returns sample record
67
    """
68
    return collection.find_one({str(field_name):{'$exists':True,'$nin':[[], '']}})
69

70
def _jsonify_table_info(table, dbname = None):
71

72
    """Private function to turn information about one table into base
73
       JSON """
74
    # Needs to be rewritten for Postgres
75
    raise NotImplementedError
76

77
    if dbname is None:
78
        dbname = table.search_table
79
    results = _get_db_records(table)
80

81
    json_db_data = {}
82
    json_db_data['dbinfo'] ={}
83
    json_db_data['dbinfo']['name'] = dbname
84
    json_db_data['records'] = {}
85
    json_db_data['fields'] = {}
86

87
    lst=set()
88
    for doc in results:
89
        lst = lst | set(doc['_id'])
90
    lst=list(lst)
91
    lst.sort()
92

93
    for doc in lst:
94
        try:
95
            rls = get_sample_record(table, str(doc))
96
            try:
97
                typedesc = get_description(rls[str(doc)])
98
            except Exception:
99
                typedesc = 'Type cannot be identified (' \
100
                           + str(type(rls[str(doc)])) + ')'
101
            try:
102
                strval =  str(rls[str(doc)]).decode('unicode_escape').\
103
                          encode('ascii','ignore')
104
            except Exception:
105
                strval = 'Record cannot be stringified'
106
        except Exception:
107
            typedesc = 'Record cannot be found containing key'
108
            strval = 'N/A'
109

110
        lstr = len(strval)
111
        strval = strval.replace('\n',' ').replace('\r','')
112
        strval = '`' + strval[:100].strip() + '`'
113
        if lstr > 100:
114
            strval = strval + ' ...'
115
        json_db_data['fields'][str(doc)] = {}
116
        json_db_data['fields'][str(doc)]['type'] = typedesc
117
        json_db_data['fields'][str(doc)]['example'] = strval
118

119

120
    for recordid, doc in enumerate(results):
121
        json_db_data['records'][recordid] = {}
122
        json_db_data['records'][recordid]['count'] = int(doc['value'])
123
        json_db_data['records'][recordid]['schema'] = doc['_id']
124

125
    indices = table.index_information()
126
    json_db_data['indices'] = {}
127
    for recordid, index in enumerate(indices):
128
        json_db_data['indices'][recordid] = {}
129
        json_db_data['indices'][recordid]['name'] = index
130
        json_db_data['indices'][recordid]['keys'] = indices[index]['key']
131

132
    return json_db_data
133

134
def parse_table_info_to_json(tablename, retval = None, date = None):
135
    """ Front end routine to create JSON information about a table """
136

137
    from lmfdb.db_backend import db
138
    json_raw = _jsonify_table_info(db[tablename], tablename)
139
    json_wrap = {tablename:json_raw}
140
    if not date:
141
        date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')
142
    json_wrap[tablename]['scrape_date'] = date
143
    if retval is not None: retval['data'] = json_wrap
144
    return json_wrap
145

146
def create_user_template(structure_json, dbname, tablename, field_subs = ['tiype',' example', 'description'],
147
                         info_subs = ['description', 'status','contact','code'], note_subs = ['description']):
148

149
    """Legacy routine to create blank user specified data JSON"""
150

151
    result_json = {}
152
    substr = structure_json[dbname][tablename]
153
    result_json['(INFO)'] = {}
154
    for el in info_subs:
155
        result_json['(INFO)'][el] = ""
156
    for el in substr['fields']:
157
        result_json[el] = {}
158
        for iel in field_subs:
159
            result_json[el][iel] = ""
160
    result_json['(NOTES)'] = {}
161
    for el in note_subs:
162
        result_json['(NOTES)'][el] = ""
163
    return result_json
164

165

166
def parse_lmfdb_to_json(tables = None, databases = None,
167
                        is_good_database = _is_good_database,
168
                        is_good_table = _is_good_table):
169

170
    """Legacy routine to scan any specified chunk of LMFDB to JSON"""
171
    raise NotImplementedError
172
    # connection has been deleted
173
#
174
#    if not tables:
175
#        tables = get_lmfdb_tables(databases = databases,
176
#                          is_good_database = is_good_database, is_good_table = is_good_table)
177
#    else:
178
#        if not hasattr(tables, '__iter__'): tables = [tables]
179
#        if not isinstance(tables, dict):
180
#            if not databases:
181
#                databases = get_lmfdb_databases(is_good_database = is_good_database)
182
#            if len(databases) == 1:
183
#                tbldict = {databases[0] : tables}
184
#            else:
185
#                tbldict = defaultdict(list)
186
#                for table in tables:
187
#                    db_name = table.split('_')[0]
188
#                    tbldict[db_name].append(table)
189
#            tables = tbldict
190
#        else:
191
#            for db_name, L in tables.items():
192
#                if not isinstance(L, list):
193
#                    if L:
194
#                        tables[db_name] = [L]
195
#                    else:
196
#                        tables.update(get_lmfdb_tables(databases=db_name))
197
#
198
#    db_struct = {}
199
#    for db_name in tables:
200
#        print('Running ' + db_name)
201
#        if is_good_database(db_name):
202
#            for table in tables[db_name]:
203
#                print('Parsing ' + table)
204
#                if is_good_table(table):
205
#                    mydict={}
206
#                    mythread = threading.Thread(target = parse_table_info_to_json, args = [table, mydict])
207
#                    mythread.start()
208
#                    while mythread.isAlive():
209
#                        u=bson.son.SON({"$ownOps":1,"currentOp":1})
210
#                        progress = connection['admin'].command(u)
211
#                        for el in progress['inprog']:
212
#                            if 'progress' in el.keys():
213
#                                if el['ns'] == table:
214
#                                    print("Scanning " + table + " " +
215
#                                        unicode(int(el['progress']['done'])) +
216
#                                        "\\" + unicode(int(el['progress']['total'])))
217
#                        time.sleep(5)
218
#
219
#                    merge_dicts(db_struct, mydict['data'])
220
#    return db_struct
221

222
def get_lmfdb_databases(is_good_database=_is_good_database):
223
    """ Routine to get list of available databases """
224
    return [db_name for db_name in db.inv_dbs.search({},'name') if is_good_database(db_name)]
225

226
def get_lmfdb_tables(databases=None, is_good_database=_is_good_database,
227
                     is_good_table=_is_good_table):
228

229
    """Routine to get a dictionary with keys of all databases and member lists
230
       of tables in that database"""
231

232
    if not databases:
233
        databases = get_lmfdb_databases(is_good_database=is_good_database)
234
    if not hasattr(databases, '__iter__'):
235
        databases = [databases]
236
    tables = defaultdict(list)
237
    for table in db.tablenames:
238
        db_name = table.split('_')[0]
239
        if db_name in databases and is_good_table(table):
240
            tables[db_name].append(table)
241
    return tables
242

243
Product

Resources

Company