Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemath
GitHub Repository: sagemath/sagelib
Path: blob/master/sage/databases/db.py
6915 views
1
"""nodoctest
2
Generic database that uses ZODB.
3
"""
4
5
6
7
"""
8
Important long email note. This guy found that zlib is much better
9
than bzip2 to compression of small files with ZODB.
10
11
Hi William,
12
13
than you might be interested in what I found in the meantime, when doing some
14
simple comparisons for a particular application (ran it as "time
15
my-application.py"):
16
17
tests on local disk
18
19
no compression
20
real 28m6.191s
21
user 24m44.077s
22
sys 0m54.764s
23
resulting Data.fs: 1.9G
24
25
----------
26
27
bzip2, thresh 1024
28
real 53m36.815s
29
user 49m1.959s
30
sys 1m6.094s
31
resulting Data.fs: 871M
32
33
----------
34
35
bzip2, thresh 2048
36
real 55m52.140s
37
user 49m37.324s
38
sys 1m6.361s
39
resulting Data.fs: 871M
40
41
----------
42
43
zlib, thresh 1024
44
real 34m44.240s
45
user 30m7.113s
46
sys 0m42.538s
47
resulting Data.fs: 852M
48
49
----------
50
51
zlib, thresh 2048
52
real 32m38.335s
53
user 30m21.959s
54
sys 0m42.355s
55
resulting Data.fs: 852M
56
57
58
I found that to be very interesting: in my case, zlib compresses a little bit
59
better than bzip2 (interesting enough, but I think bzip2 has its strength on
60
larger chunks anyway), but more important for me, seems to work nearly as
61
fast as the plain FileStorage. The test was not run under laboratory
62
conditions, but should still be good enough to get a trend.
63
64
Cheers,
65
66
Sascha
67
68
--
69
Gallileus - the power of knowledge
70
71
Gallileus GmbH http://www.gallileus.info/
72
73
"""
74
75
#*****************************************************************************
76
#
77
# Sage: System for Algebra and Geometry Experimentation
78
#
79
# Copyright (C) 2005 William Stein <[email protected]>
80
#
81
# Distributed under the terms of the GNU General Public License (GPL)
82
#
83
# This code is distributed in the hope that it will be useful,
84
# but WITHOUT ANY WARRANTY; without even the implied warranty of
85
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
86
# General Public License for more details.
87
#
88
# The full text of the GPL is available at:
89
#
90
# http://www.gnu.org/licenses/
91
#*****************************************************************************
92
93
import os, shutil, cPickle
94
95
import sage.databases.compressed_storage
96
from sage.misc.misc import SAGE_DATA
97
98
# The following effectively turns off the ZODB logger, which is OK for us.
99
# Without this, one gets this annoying error message a lot:
100
# No handlers could be found for logger "ZODB.FileStorage"
101
import logging
102
logging.getLogger("ZODB.FileStorage").setLevel(10000000)
103
logging.getLogger("ZODB.lock_file").setLevel(10000000)
104
logging.getLogger("ZODB.Connection").setLevel(10000000)
105
106
class _uniq(object):
107
_db = {} # Class variable, no globals!
108
109
def __new__(cls, name="", read_only=True, unique_key=None):
110
key = (cls, unique_key)
111
if _uniq._db.has_key(key):
112
return _uniq._db[key]
113
X = object.__new__(cls)
114
_uniq._db[key] = X
115
return X
116
117
class Database(_uniq):
118
def __init__(self, name, read_only=True, thresh=1024):
119
if not hasattr(self, 'name'):
120
self.read_only = read_only
121
self.name = name
122
self._thresh = thresh
123
self._load_()
124
125
def _load_(self):
126
import BTrees.OOBTree
127
from ZODB import FileStorage, DB
128
name = self.name
129
read_only = self.read_only
130
thresh = self._thresh
131
if not os.path.exists("%s/%s"%(SAGE_DATA,name)):
132
try:
133
os.makedirs("%s/%s"%(SAGE_DATA,name))
134
except OSError: # for online calculator...
135
pass
136
self._dbname = "%s/%s/%s"%(SAGE_DATA, name, name)
137
if self.read_only and not os.path.exists(self._dbname):
138
raise RuntimeError, "The database %s is not installed."%self._dbname
139
fs = FileStorage.FileStorage(self._dbname, read_only=self.read_only)
140
self._storage = sage.databases.compressed_storage.CompressedStorage(fs, thresh=self._thresh)
141
self._db = DB(self._storage)
142
self.conn = self._db.open()
143
self._root = self.conn.root()
144
if not self._root.has_key("btree"):
145
self._root["btree"] = BTrees.OOBTree.OOBTree()
146
self.root = self._root["btree"]
147
148
def begin(self):
149
r"""Start a new database transaction"""
150
import transaction
151
transaction.get().begin()
152
153
def abort(self):
154
r"""Abort the current database transaction, without committing"""
155
import transaction
156
transaction.get().abort()
157
158
def commit(self):
159
"""
160
Commit the new version of this object to the database file.
161
162
Note that if a data item corresponding to a key is changed,
163
you still have to tell the database that that data item
164
was changed by calling the changed method with that key.
165
"""
166
if self.read_only:
167
raise RuntimeError, "Cannot commit read only database."
168
self._root._p_changed = 1
169
import transaction
170
transaction.get().commit()
171
#get_transaction().commit()
172
173
def changed(self, key):
174
"""
175
Informs the database that some items corresponding to
176
the given key may have changed. This does not commit
177
the changes to disk (use the commit function after
178
calling changed to do that).
179
"""
180
self.root._p_changed = 1
181
X = self.root[key]
182
self.root[key] = X
183
184
def pack(self):
185
"""
186
This function is not implemented -- I couldn't get pack
187
working with compressed storage. You can use the rebuild
188
function instead, though it's slower than the usual ZODB pack,
189
since it completely rebuilds the database from scratch.
190
"""
191
raise NotImplementedError
192
self._db.pack()
193
self.commit()
194
195
def rebuild(self, thresh=None):
196
"""
197
Completely rebuild the database from scratch, by going
198
through and writing everything out to a temporary database,
199
then moving the temporary database files over self's
200
files. This can take a long time.
201
202
The main reason for this function is that unfortunately I
203
can't get pack to work on compressed ZODB databases.
204
205
A copy of the old database file is created before rebuild.
206
207
If you specify a thresh then that threshold is used for
208
recompressing all the objects. Note that the threshold is
209
not saved as part of the database, so new objects will be
210
compressed using whatever threshold you use when creating
211
the database object.
212
"""
213
import BTrees.OOBTree
214
from ZODB import FileStorage, DB
215
if self.read_only:
216
raise RuntimeError, "Cannot pack read only database."
217
if thresh == None:
218
thresh = self._thresh
219
else:
220
self._thresh = thresh
221
rebuild_name = self._dbname + "_rebuild"
222
shutil.copy2(self._dbname, self._dbname + ".old")
223
if os.path.exists(rebuild_name):
224
os.unlink(rebuild_name)
225
fs = FileStorage.FileStorage(rebuild_name, read_only=False)
226
storage = sage.databases.compressed_storage.CompressedStorage(fs, thresh)
227
db = DB(storage)
228
conn = db.open()
229
_root = conn.root()
230
root = BTrees.OOBTree.OOBTree()
231
_root["btree"] = root
232
for k, x in self.root.iteritems():
233
root[k] = x
234
_root._p_changed = 1
235
#get_transaction().commit()
236
import transaction
237
transaction.get().commit()
238
shutil.move(rebuild_name, self._dbname)
239
os.unlink(rebuild_name + ".tmp")
240
os.unlink(rebuild_name + ".index")
241
os.unlink(rebuild_name + ".lock")
242
self.read_only = True
243
244
245
def __repr__(self):
246
return "Database %s"%self.name
247
248
def __setitem__(self, x, y):
249
try:
250
self.root[x] = y
251
except AttributeError:
252
self._init()
253
self.root[x] = y
254
255
def __getitem__(self, x):
256
try:
257
if not isinstance(x, slice):
258
return self.root[x]
259
return [self[k] for k in range(x.start, x.stop, x.step)]
260
except AttributeError:
261
self._init()
262
return self.root[x]
263
264
def __delitem__(self, x):
265
del self.root[x]
266
267
def has_key(self, x):
268
return bool(self.root.has_key(x))
269
270
def keys(self):
271
return self.root.keys()
272
273
def as_dict(self, keys=None):
274
"""
275
Return a dict representation of the database.
276
277
Since the database could be large, if the optional keys
278
parameter is given then only the elements of the database
279
with key in keys are listed.
280
"""
281
X = {}
282
if keys == None:
283
keys = self.root.keys()
284
for k in keys:
285
if self.has_key(k):
286
X[k] = self.root[k]
287
return X
288
289
def dump_as_dict(self, filename, keys):
290
from sage.misc.misc import sage_makedirs
291
X = self.as_dict(keys)
292
print "Dumping %s..."%filename
293
s = cPickle.dumps(X,2)
294
dir = "%s/pickles/"%SAGE_DATA
295
sage_makedirs(dir)
296
open("%s/%s"%(dir,filename), "w").write(s)
297
298
def dump_as_dict_intervals(self, basename, Nstart, Nstop, length):
299
N = Nstart
300
while N <= Nstop:
301
N2 = min(Nstop, N+length)
302
Z = xrange(N, N2+1)
303
self.dump_as_dict("%s_%s-%s"%(basename,N,N2), Z)
304
N += length
305
306
def restore_from_dict(self, filename):
307
"""
308
Restore from the filename which must store a pickled dict.
309
310
After loading the database is committed.
311
"""
312
if self.read_only:
313
raise RuntimeError, "%s is read only."%self
314
dir = "%s/pickles/"%SAGE_DATA
315
s = open("%s/%s"%(dir,filename)).read()
316
print "Restoring %s..."%filename
317
X = cPickle.loads(s,2)
318
for k, x in X.iteritems():
319
self.root[k] = x
320
self.commit()
321
322
def restore_from_dict_all(self, basename):
323
"""
324
Restore all files that start with the given basename.
325
326
Each file is loaded then committed to disk before the next
327
file is loaded.
328
"""
329
X = os.listdir("%s/pickles/"%SAGE_DATA)
330
n = len(basename)
331
for F in X:
332
if F[:n] == basename:
333
self.restore_from_dict(F)
334
335
def delete_all(self):
336
"""
337
Delete every entry in the database.
338
"""
339
import BTrees.OOBTree
340
del self._root["btree"]
341
self._root["btree"] = BTrees.OOBTree.OOBTree()
342
self.root = self._root["btree"]
343
344
def clone(self, new_name):
345
"""
346
Copy the database to a new database with the given new_name.
347
There must not be a database with the new_name already, or a
348
RuntimeError exception is raised.
349
"""
350
if os.path.exists("%s/%s"%(SAGE_DATA,new_name)):
351
raise RuntimeError, "Cannot clone to %s since that database already exists."%name
352
os.path.makedirs("%s/%s"%(SAGE_DATA,new_name))
353
shutil.copy2("%s/%s/%s"%(SAGE_DATA,name,name), "%s/%s"%(SAGE_DATA,new_name))
354
355