Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
AndrewVSutherland
GitHub Repository: AndrewVSutherland/lmfdb
Path: blob/main/scripts/hmf/import_hmf_data.py
1448 views
1
# -*- coding: utf-8 -*-
2
import os
3
import sage
4
from sage.repl import preparse
5
from sage.interfaces.magma import magma
6
from sage.all import PolynomialRing, Rationals
7
8
from lmfdb.base import getDBConnection
9
print("getting connection")
10
C= getDBConnection()
11
C['admin'].authenticate('lmfdb', 'lmfdb') # read-only
12
13
import yaml
14
pw_dict = yaml.load(open(os.path.join(os.getcwd(), os.extsep, os.extsep, os.extsep, "passwords.yaml")))
15
username = pw_dict['data']['username']
16
password = pw_dict['data']['password']
17
hmfs = C.hmfs
18
hmfs.authenticate(username, password)
19
hmf_forms = hmfs.forms
20
hmf_stats = hmfs.forms.stats
21
print("Setting hmf_forms to {} and hmf_stats to {}".format('hmfs.forms','hmfs.forms.stats'))
22
23
hmf_fields = hmfs.fields
24
C['admin'].authenticate('lmfdb', 'lmfdb') # read-only
25
fields = C.numberfields.fields
26
27
# hmf_forms.create_index('field_label')
28
# hmf_forms.create_index('level_norm')
29
# hmf_forms.create_index('level_ideal')
30
# hmf_forms.create_index('dimension')
31
32
magma.eval('nice_idealstr := function(F : Bound := 10000); idealsstr := []; ideals := IdealsUpTo(Bound, F); for I in ideals do bl, g := IsPrincipal(I); if bl then s := Sprintf("[%o, %o, %o]", Norm(I), Minimum(I), F!g); else zs := Generators(I); z := zs[#zs]; m := Minimum(I); z := F![(Integers()!c) mod m : c in Eltseq(F!z)]; assert ideal<Integers(F) | [m, z]> eq I; s := Sprintf("[%o, %o, %o]", Norm(I), m, z); end if; Append(~idealsstr, s); end for; return idealsstr; end function;')
33
34
35
def parse_label(field_label, weight, level_label, label_suffix):
36
if weight == [2 for i in range(len(weight))]:
37
weight_label = ''
38
elif weight == [weight[0] for i in range(len(weight))]: # Parellel weight
39
weight_label = str(weight[0]) + '-'
40
else:
41
weight_label = str(weight) + '-'
42
return field_label + '-' + weight_label + level_label + '-' + label_suffix
43
44
P = sage.rings.polynomial.polynomial_ring_constructor.PolynomialRing(sage.rings.rational_field.RationalField(), 3, ['w', 'e', 'x'])
45
w, e, x = P.gens()
46
47
48
def add_narrow_classno_data():
49
# Adds narrow class number data to all fields
50
for F in hmf_fields.find():
51
Fcoeff = fields.find_one({'label' : F['label']})['coeffs']
52
magma.eval('F<w> := NumberField(Polynomial([' + str(Fcoeff) + ']));')
53
hplus = magma.eval('NarrowClassNumber(F);')
54
hmf_fields.update({ '_id': F['_id'] }, { "$set": {'narrow_class_no': int(hplus)} })
55
print(Fcoeff, hplus)
56
57
def import_all_data(n):
58
nstr = str(n)
59
60
fileprefix = "/home/jvoight/Elements/ModFrmHilDatav1/Data/" + nstr
61
# subprocess.call("ls " + fileprefix + "/data* > " + fileprefix + "/dir.tmp", shell=True)
62
ff = open(fileprefix + "/dir.tmp", 'r')
63
files = ff.readlines()
64
files = [f[:-1] for f in files]
65
# subprocess.call("rm dir.tmp", shell=True)
66
67
files = [f for f in files if f.find('_old') == -1]
68
for file_name in files:
69
print(file_name)
70
import_data(file_name)
71
72
73
def import_data(hmf_filename):
74
hmff = open(hmf_filename)
75
76
ferrors = open('/home/jvoight/lmfdb/backups/import_data.err', 'a')
77
78
# Parse field data
79
v = hmff.readline()
80
assert v[:9] == 'COEFFS :='
81
coeffs = eval(v[10:][:-2])
82
v = hmff.readline()
83
assert v[:4] == 'n :='
84
n = int(v[4:][:-2])
85
v = hmff.readline()
86
assert v[:4] == 'd :='
87
d = int(v[4:][:-2])
88
89
magma.eval('F<w> := NumberField(Polynomial(' + str(coeffs) + '));')
90
magma.eval('ZF := Integers(F);')
91
92
# Find the corresponding field in the database of number fields
93
print("Finding field...")
94
fields_matching = fields.find({"signature": [n, int(0)],
95
"discriminant": d})
96
cnt = fields_matching.count()
97
assert cnt >= 1
98
field_label = None
99
for i in range(cnt):
100
nf = next(fields_matching)
101
if nf['coefficients'] == coeffs:
102
field_label = nf['label']
103
assert field_label is not None
104
print("...found!")
105
106
# Find the field in the HMF database
107
print("Finding field in HMF...")
108
F_hmf = hmf_fields.find_one({"label": field_label})
109
if F_hmf is None:
110
# Create list of ideals
111
print("...adding!")
112
ideals = eval(preparse(magma.eval('nice_idealstr(F);')))
113
ideals_str = [str(c) for c in ideals]
114
hmf_fields.insert({"label": field_label,
115
"degree": n,
116
"discriminant": d,
117
"ideals": ideals_str})
118
F_hmf = hmf_fields.find_one({"label": field_label})
119
else:
120
print("...found!")
121
122
print("Computing ideals...")
123
ideals_str = F_hmf['ideals']
124
ideals = [eval(preparse(c)) for c in ideals_str]
125
ideals_norms = [c[0] for c in ideals]
126
magma.eval('ideals_str := [' + ''.join(F_hmf['ideals']).replace('][', '], [') + ']')
127
magma.eval('ideals := [ideal<ZF | {F!x : x in I}> : I in ideals_str];')
128
129
# Add the list of primes
130
print("Computing primes...")
131
v = hmff.readline() # Skip line
132
v = hmff.readline()
133
assert v[:9] == 'PRIMES :='
134
primes_str = v[10:][:-2]
135
primes_array = [str(t) for t in eval(preparse(primes_str))]
136
magma.eval('primes_array := ' + primes_str)
137
magma.eval('primes := [ideal<ZF | {F!x : x in I}> : I in primes_array];')
138
magma.eval('primes_indices := [Index(ideals, pp) : pp in primes];')
139
try:
140
assert magma('&and[primes_indices[j] gt primes_indices[j-1] : j in [2..#primes_indices]]')
141
resort = False
142
except AssertionError:
143
print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Primes reordered!")
144
resort = True
145
magma.eval('_, sigma := Sort(primes_indices, func<x,y | x-y>);')
146
magma.eval('perm := [[xx : xx in x] : x in CycleDecomposition(sigma) | #x gt 1]')
147
# Check at least they have the same norm
148
magma.eval('for tau in perm do assert #{Norm(ideals[primes_indices[t]]) : t in tau} eq 1; end for;')
149
primes_resort = eval(magma.eval('Eltseq(sigma)'))
150
primes_resort = [c - 1 for c in primes_resort]
151
152
primes_indices = eval(magma.eval('primes_indices'))
153
primes_str = [ideals_str[j - 1] for j in primes_indices]
154
assert len(primes_array) == len(primes_str)
155
print("...comparing...")
156
for i in range(len(primes_array)):
157
assert magma('ideal<ZF | {F!x : x in ' + primes_array[i] + '}> eq '
158
+ 'ideal<ZF | {F!x : x in ' + primes_str[i] + '}>;')
159
160
if 'primes' in F_hmf: # Nothing smart: make sure it is the same
161
assert F_hmf['primes'] == primes_str
162
else:
163
F_hmf['primes'] = primes_str
164
hmf_fields.save(F_hmf)
165
print("...saved!")
166
167
# Collect levels
168
v = hmff.readline()
169
if v[:9] == 'LEVELS :=':
170
# skip this line, we do not use the list of levels
171
v = hmff.readline()
172
for i in range(3):
173
if v[:11] != 'NEWFORMS :=':
174
v = hmff.readline()
175
else:
176
break
177
178
# Finally, newforms!
179
print("Starting newforms!")
180
while v != '':
181
v = hmff.readline()[1:-3]
182
if len(v) == 0:
183
break
184
data = eval(preparse(v))
185
level_ideal = data[0]
186
level_norm = data[0][0]
187
label_suffix = data[1]
188
weight = [2 for i in range(n)]
189
190
level_ind = int(magma('Index(ideals, ideal<ZF | {F!x : x in ' + str(level_ideal) + '}>)')
191
) - 1 # Magma counts starting at 1
192
level_ideal = ideals_str[level_ind]
193
assert magma('ideal<ZF | {F!x : x in ' + str(level_ideal) + '}> eq '
194
+ 'ideal<ZF | {F!x : x in ' + str(data[0]) + '}>;')
195
level_dotlabel = level_ind - ideals_norms.index(level_norm) + 1 # Start counting at 1
196
assert level_dotlabel > 0
197
level_label = str(level_norm) + '.' + str(level_dotlabel)
198
199
label = parse_label(field_label, weight, level_label, label_suffix)
200
short_label = level_label + '-' + label_suffix
201
202
if len(data) == 3:
203
hecke_polynomial = x
204
hecke_eigenvalues = data[2]
205
else:
206
hecke_polynomial = data[2]
207
hecke_eigenvalues = data[3]
208
209
if resort:
210
hecke_eigenvalues = [hecke_eigenvalues[i] for i in primes_resort]
211
212
# Sort out Atkin-Lehner involutions
213
magma.eval('NN := ideal<ZF | {F!x : x in ' + str(level_ideal) + '}>;')
214
magma.eval('NNfact := Factorization(NN);')
215
ALind = eval(
216
preparse(magma.eval('[Index(primes, pp[1])-1 : pp in NNfact | Index(primes,pp[1]) gt 0];')))
217
AL_eigsin = [hecke_eigenvalues[c] for c in ALind]
218
try:
219
assert all([abs(int(c)) <= 1 for c in AL_eigsin])
220
except Exception:
221
ferrors.write(str(n) + '/' + str(d) + ' ' + label + '\n')
222
223
AL_eigenvalues = []
224
ees = eval(preparse(magma.eval('[pp[2] : pp in NNfact]')))
225
for i in range(len(AL_eigsin)):
226
if ees[i] >= 2:
227
if hecke_eigenvalues[ALind[i]] != 0:
228
ferrors.write(str(n) + '/' + str(d) + ' ' + label + '\n')
229
else:
230
try:
231
if abs(int(AL_eigsin[i])) != 1:
232
ferrors.write(str(n) + '/' + str(d) + ' ' + label + '\n')
233
else:
234
AL_eigenvalues.append([primes_str[ALind[i]], -AL_eigsin[i]])
235
except TypeError:
236
ferrors.write(str(n) + '/' + str(d) + ' ' + label + '\n')
237
238
assert magma('[Valuation(NN, ideal<ZF | {F!x : x in a}>) gt 0 : a in [' +
239
str(''.join([a[0] for a in AL_eigenvalues])).replace('][', '], [') + ']];')
240
241
# Constrain eigenvalues to size 2MB
242
hecke_eigenvalues = [str(c) for c in hecke_eigenvalues]
243
leftout = 0
244
while sum([len(s) for s in hecke_eigenvalues]) > 2000000:
245
leftout += 1
246
hecke_eigenvalues = hecke_eigenvalues[:-1]
247
if leftout > 0:
248
print("Left out", leftout)
249
250
info = {"label": label,
251
"short_label": short_label,
252
"field_label": field_label,
253
"level_norm": int(level_norm),
254
"level_ideal": str(level_ideal),
255
"level_label": level_label,
256
"weight": str(weight),
257
"label_suffix": label_suffix,
258
"dimension": hecke_polynomial.degree(),
259
"hecke_polynomial": str(hecke_polynomial),
260
"hecke_eigenvalues": hecke_eigenvalues,
261
"AL_eigenvalues": [[str(a[0]), str(a[1])] for a in AL_eigenvalues]}
262
print(info['label'])
263
264
existing_forms = hmf_forms.find({"label": label})
265
assert existing_forms.count() <= 1
266
if existing_forms.count() == 0:
267
hmf_forms.insert(info)
268
else:
269
existing_form = next(existing_forms)
270
assert info['hecke_eigenvalues'] == existing_form['hecke_eigenvalues']
271
print("...duplicate")
272
273
# def parse_label_old(field_label, weight, level_ideal, label_suffix):
274
# label_str = field_label + str(weight) + str(level_ideal) + label_suffix
275
# label_str = label_str.replace(' ', '')
276
# return label_str
277
278
279
def repair_fields(D):
280
F = hmf_fields.find_one({"label": '2.2.' + str(D) + '.1'})
281
282
P = PolynomialRing(Rationals(), 'w')
283
# P is used implicitly in the eval() calls below. When these are
284
# removed, this will not longer be neceesary, but until then the
285
# assert statement is for pyflakes.
286
assert P
287
288
primes = F['primes']
289
primes = [[int(eval(p)[0]), int(eval(p)[1]), str(eval(p)[2])] for p in primes]
290
F['primes'] = primes
291
292
hmff = open("data_2_" + (4 - len(str(D))) * '0' + str(D))
293
294
# Parse field data
295
for i in range(7):
296
v = hmff.readline()
297
ideals = eval(v[10:][:-2])
298
ideals = [[p[0], p[1], str(p[2])] for p in ideals]
299
F['ideals'] = ideals
300
hmf_fields.save(F)
301
302
303
def repair_fields_add_ideal_labels(D):
304
F = hmf_fields.find_one({"label": '2.2.' + str(D) + '.1'})
305
306
ideals = F['ideals']
307
ideal_labels = ['1.1']
308
N = 1
309
cnt = 1
310
for I in ideals[2:]:
311
NI = I[0]
312
if NI == N:
313
cnt += 1
314
else:
315
cnt = 1
316
N = NI
317
ideal_labels.append(str(NI) + '.' + str(cnt))
318
F['ideal_labels'] = ideal_labels
319
hmf_fields.save(F)
320
321
322
def attach_new_label(f):
323
print(f['label'])
324
F = hmf_fields.find_one({"label": f['field_label']})
325
326
P = PolynomialRing(Rationals(), 'w')
327
# P is used implicitly in the eval() calls below. When these are
328
# removed, this will not longer be necessary, but until then the
329
# assert statement is for pyflakes.
330
assert P
331
332
if isinstance(f['level_ideal'], str):
333
N = eval(f['level_ideal'])
334
else:
335
N = f['level_ideal']
336
if type(N) != list or len(N) != 3:
337
print(f, N, type(N))
338
raise ValueError("{} does not define a valid level ideal".format(N))
339
340
f['level_ideal'] = [N[0], N[1], str(N[2])]
341
342
try:
343
ideal_label = F['ideal_labels'][F['ideals'].index(f['level_ideal'])]
344
f['level_ideal_label'] = ideal_label
345
f['label'] = parse_label(f['field_label'], f['weight'], f['level_ideal_label'], f['label_suffix'])
346
hmf_forms.save(f)
347
print(f['label'])
348
except ValueError:
349
hmf_forms.remove(f)
350
print("REMOVED!")
351
352
# Old function used to make a stats yaml file.
353
def make_stats_dict():
354
degrees = hmf_fields.distinct('degree')
355
stats = {}
356
for d in degrees:
357
statsd = {}
358
statsd['fields'] = [F['label'] for F in hmf_fields.find({'degree':d},['label']).hint('degree_1')]
359
field_sort_key = lambda F: int(F.split(".")[2]) # by discriminant
360
statsd['fields'].sort(key=field_sort_key)
361
statsd['nfields'] = len(statsd['fields'])
362
statsd['nforms'] = hmf_forms.find({'deg':d}).hint('deg_1').count()
363
statsd['maxnorm'] = max(hmf_forms.find({'deg':d}).hint('deg_1_level_norm_1').distinct('level_norm')+[0])
364
statsd['counts'] = {}
365
for F in statsd['fields']:
366
#print("Field %s" % F)
367
res = [f['level_norm'] for f in hmf_forms.find({'field_label':F}, ['level_norm'])]
368
statsdF = {}
369
statsdF['nforms'] = len(res)
370
statsdF['maxnorm'] = max(res+[0])
371
statsd['counts'][F] = statsdF
372
stats[d] = statsd
373
return stats
374
375
# To store these in a yaml file, after loading the current file do
376
# sage: hst = make_stats() # takes a few minutes
377
# sage: sage: hst_yaml_file = open("lmfdb/hilbert_modular_forms/hst_stats.yaml", 'w')
378
# sage: yaml.safe_dump(hst, hst_yaml_file)
379
# sage: hst_yaml_file.close()
380
381
def make_stats():
382
from data_mgt.utilities.rewrite import update_attribute_stats
383
384
print("Updating fields stats")
385
fields = hmf_fields.distinct('label')
386
degrees = hmf_fields.distinct('degree')
387
field_sort_key = lambda F: int(F.split(".")[2]) # by discriminant
388
fields_by_degree = dict([(d,sorted(hmf_fields.find({'degree':d}).distinct('label'),key=field_sort_key)) for d in degrees])
389
print("{} fields in database of degree from {} to {}".format(len(fields),min(degrees),max(degrees)))
390
391
print("...summary of counts by degree...")
392
entry = {'_id': 'fields_summary'}
393
hmf_stats.delete_one(entry)
394
field_data = {'max': max(degrees),
395
'min': min(degrees),
396
'total': len(fields),
397
'counts': [[d,hmf_fields.count({'degree': d})]
398
for d in degrees]
399
}
400
entry.update(field_data)
401
hmf_stats.insert_one(entry)
402
403
print("...fields by degree...")
404
entry = {'_id': 'fields_by_degree'}
405
hmf_stats.delete_one(entry)
406
for d in degrees:
407
entry[str(d)] = {'fields': fields_by_degree[d],
408
'nfields': len(fields_by_degree[d]),
409
'maxdisc': max(hmf_fields.find({'degree':d}).distinct('discriminant'))
410
}
411
hmf_stats.insert_one(entry)
412
413
print("Updating forms stats")
414
print("counts by field degree and by dimension...")
415
update_attribute_stats(hmfs, 'forms', 'deg')
416
update_attribute_stats(hmfs, 'forms', 'dimension')
417
418
print("counts by field degree and by level norm...")
419
entry = {'_id': 'level_norm_by_degree'}
420
degree_data = {}
421
for d in degrees:
422
res = hmf_forms.find({'deg':d})
423
nforms = res.count()
424
Ns = res.distinct('level_norm')
425
min_norm = min(Ns)
426
max_norm = max(Ns)
427
degree_data[str(d)] = {'nforms':nforms,
428
'min_norm':min_norm,
429
'max_norm':max_norm,
430
}
431
print("{}: {}".format(d,degree_data[str(d)]))
432
hmf_stats.delete_one(entry)
433
entry.update(degree_data)
434
hmf_stats.insert_one(entry)
435
436
print("counts by field and by level norm...")
437
entry = {'_id': 'level_norm_by_field'}
438
field_data = {}
439
for f in fields:
440
ff = f.replace(".",":") # mongo does not allow "." in key strings
441
res = hmf_forms.find({'field_label': f})
442
nforms = res.count()
443
Ns = res.distinct('level_norm')
444
min_norm = min(Ns)
445
max_norm = max(Ns)
446
field_data[ff] = {'nforms':nforms,
447
'min_norm':min_norm,
448
'max_norm':max_norm,
449
}
450
#print("{}: {}".format(f,field_data[ff]))
451
hmf_stats.delete_one(entry)
452
entry.update(field_data)
453
hmf_stats.insert_one(entry)
454
455
456
def add_missing_disc_deg_wt(nf):
457
if not all([nf.get('disc', None),
458
nf.get('deg', None),
459
nf.get('parallel_weight', None)]):
460
print("Fixing form {}".format(nf['label']))
461
#print(" keys are {}".format(nf.keys()))
462
deg, r, disc, n = nf['field_label'].split(".")
463
nf['disc'] = int(disc)
464
nf['deg'] = int(deg)
465
nf['parallel_weight'] = 2
466
return nf
467
468