Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Lib/dbm/dumb.py
12 views
1
"""A dumb and slow but simple dbm clone.
2
3
For database spam, spam.dir contains the index (a text file),
4
spam.bak *may* contain a backup of the index (also a text file),
5
while spam.dat contains the data (a binary file).
6
7
XXX TO DO:
8
9
- seems to contain a bug when updating...
10
11
- reclaim free space (currently, space once occupied by deleted or expanded
12
items is never reused)
13
14
- support concurrent access (currently, if two processes take turns making
15
updates, they can mess up the index)
16
17
- support efficient access to large databases (currently, the whole index
18
is read when the database is opened, and some updates rewrite the whole index)
19
20
- support opening for read-only (flag = 'm')
21
22
"""
23
24
import ast as _ast
25
import io as _io
26
import os as _os
27
import collections.abc
28
29
__all__ = ["error", "open"]
30
31
_BLOCKSIZE = 512
32
33
error = OSError
34
35
class _Database(collections.abc.MutableMapping):
36
37
# The on-disk directory and data files can remain in mutually
38
# inconsistent states for an arbitrarily long time (see comments
39
# at the end of __setitem__). This is only repaired when _commit()
40
# gets called. One place _commit() gets called is from __del__(),
41
# and if that occurs at program shutdown time, module globals may
42
# already have gotten rebound to None. Since it's crucial that
43
# _commit() finish successfully, we can't ignore shutdown races
44
# here, and _commit() must not reference any globals.
45
_os = _os # for _commit()
46
_io = _io # for _commit()
47
48
def __init__(self, filebasename, mode, flag='c'):
49
filebasename = self._os.fsencode(filebasename)
50
self._mode = mode
51
self._readonly = (flag == 'r')
52
53
# The directory file is a text file. Each line looks like
54
# "%r, (%d, %d)\n" % (key, pos, siz)
55
# where key is the string key, pos is the offset into the dat
56
# file of the associated value's first byte, and siz is the number
57
# of bytes in the associated value.
58
self._dirfile = filebasename + b'.dir'
59
60
# The data file is a binary file pointed into by the directory
61
# file, and holds the values associated with keys. Each value
62
# begins at a _BLOCKSIZE-aligned byte offset, and is a raw
63
# binary 8-bit string value.
64
self._datfile = filebasename + b'.dat'
65
self._bakfile = filebasename + b'.bak'
66
67
# The index is an in-memory dict, mirroring the directory file.
68
self._index = None # maps keys to (pos, siz) pairs
69
70
# Handle the creation
71
self._create(flag)
72
self._update(flag)
73
74
def _create(self, flag):
75
if flag == 'n':
76
for filename in (self._datfile, self._bakfile, self._dirfile):
77
try:
78
_os.remove(filename)
79
except OSError:
80
pass
81
# Mod by Jack: create data file if needed
82
try:
83
f = _io.open(self._datfile, 'r', encoding="Latin-1")
84
except OSError:
85
if flag not in ('c', 'n'):
86
raise
87
with _io.open(self._datfile, 'w', encoding="Latin-1") as f:
88
self._chmod(self._datfile)
89
else:
90
f.close()
91
92
# Read directory file into the in-memory index dict.
93
def _update(self, flag):
94
self._modified = False
95
self._index = {}
96
try:
97
f = _io.open(self._dirfile, 'r', encoding="Latin-1")
98
except OSError:
99
if flag not in ('c', 'n'):
100
raise
101
self._modified = True
102
else:
103
with f:
104
for line in f:
105
line = line.rstrip()
106
key, pos_and_siz_pair = _ast.literal_eval(line)
107
key = key.encode('Latin-1')
108
self._index[key] = pos_and_siz_pair
109
110
# Write the index dict to the directory file. The original directory
111
# file (if any) is renamed with a .bak extension first. If a .bak
112
# file currently exists, it's deleted.
113
def _commit(self):
114
# CAUTION: It's vital that _commit() succeed, and _commit() can
115
# be called from __del__(). Therefore we must never reference a
116
# global in this routine.
117
if self._index is None or not self._modified:
118
return # nothing to do
119
120
try:
121
self._os.unlink(self._bakfile)
122
except OSError:
123
pass
124
125
try:
126
self._os.rename(self._dirfile, self._bakfile)
127
except OSError:
128
pass
129
130
with self._io.open(self._dirfile, 'w', encoding="Latin-1") as f:
131
self._chmod(self._dirfile)
132
for key, pos_and_siz_pair in self._index.items():
133
# Use Latin-1 since it has no qualms with any value in any
134
# position; UTF-8, though, does care sometimes.
135
entry = "%r, %r\n" % (key.decode('Latin-1'), pos_and_siz_pair)
136
f.write(entry)
137
138
sync = _commit
139
140
def _verify_open(self):
141
if self._index is None:
142
raise error('DBM object has already been closed')
143
144
def __getitem__(self, key):
145
if isinstance(key, str):
146
key = key.encode('utf-8')
147
self._verify_open()
148
pos, siz = self._index[key] # may raise KeyError
149
with _io.open(self._datfile, 'rb') as f:
150
f.seek(pos)
151
dat = f.read(siz)
152
return dat
153
154
# Append val to the data file, starting at a _BLOCKSIZE-aligned
155
# offset. The data file is first padded with NUL bytes (if needed)
156
# to get to an aligned offset. Return pair
157
# (starting offset of val, len(val))
158
def _addval(self, val):
159
with _io.open(self._datfile, 'rb+') as f:
160
f.seek(0, 2)
161
pos = int(f.tell())
162
npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE
163
f.write(b'\0'*(npos-pos))
164
pos = npos
165
f.write(val)
166
return (pos, len(val))
167
168
# Write val to the data file, starting at offset pos. The caller
169
# is responsible for ensuring that there's enough room starting at
170
# pos to hold val, without overwriting some other value. Return
171
# pair (pos, len(val)).
172
def _setval(self, pos, val):
173
with _io.open(self._datfile, 'rb+') as f:
174
f.seek(pos)
175
f.write(val)
176
return (pos, len(val))
177
178
# key is a new key whose associated value starts in the data file
179
# at offset pos and with length siz. Add an index record to
180
# the in-memory index dict, and append one to the directory file.
181
def _addkey(self, key, pos_and_siz_pair):
182
self._index[key] = pos_and_siz_pair
183
with _io.open(self._dirfile, 'a', encoding="Latin-1") as f:
184
self._chmod(self._dirfile)
185
f.write("%r, %r\n" % (key.decode("Latin-1"), pos_and_siz_pair))
186
187
def __setitem__(self, key, val):
188
if self._readonly:
189
raise error('The database is opened for reading only')
190
if isinstance(key, str):
191
key = key.encode('utf-8')
192
elif not isinstance(key, (bytes, bytearray)):
193
raise TypeError("keys must be bytes or strings")
194
if isinstance(val, str):
195
val = val.encode('utf-8')
196
elif not isinstance(val, (bytes, bytearray)):
197
raise TypeError("values must be bytes or strings")
198
self._verify_open()
199
self._modified = True
200
if key not in self._index:
201
self._addkey(key, self._addval(val))
202
else:
203
# See whether the new value is small enough to fit in the
204
# (padded) space currently occupied by the old value.
205
pos, siz = self._index[key]
206
oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE
207
newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE
208
if newblocks <= oldblocks:
209
self._index[key] = self._setval(pos, val)
210
else:
211
# The new value doesn't fit in the (padded) space used
212
# by the old value. The blocks used by the old value are
213
# forever lost.
214
self._index[key] = self._addval(val)
215
216
# Note that _index may be out of synch with the directory
217
# file now: _setval() and _addval() don't update the directory
218
# file. This also means that the on-disk directory and data
219
# files are in a mutually inconsistent state, and they'll
220
# remain that way until _commit() is called. Note that this
221
# is a disaster (for the database) if the program crashes
222
# (so that _commit() never gets called).
223
224
def __delitem__(self, key):
225
if self._readonly:
226
raise error('The database is opened for reading only')
227
if isinstance(key, str):
228
key = key.encode('utf-8')
229
self._verify_open()
230
self._modified = True
231
# The blocks used by the associated value are lost.
232
del self._index[key]
233
# XXX It's unclear why we do a _commit() here (the code always
234
# XXX has, so I'm not changing it). __setitem__ doesn't try to
235
# XXX keep the directory file in synch. Why should we? Or
236
# XXX why shouldn't __setitem__?
237
self._commit()
238
239
def keys(self):
240
try:
241
return list(self._index)
242
except TypeError:
243
raise error('DBM object has already been closed') from None
244
245
def items(self):
246
self._verify_open()
247
return [(key, self[key]) for key in self._index.keys()]
248
249
def __contains__(self, key):
250
if isinstance(key, str):
251
key = key.encode('utf-8')
252
try:
253
return key in self._index
254
except TypeError:
255
if self._index is None:
256
raise error('DBM object has already been closed') from None
257
else:
258
raise
259
260
def iterkeys(self):
261
try:
262
return iter(self._index)
263
except TypeError:
264
raise error('DBM object has already been closed') from None
265
__iter__ = iterkeys
266
267
def __len__(self):
268
try:
269
return len(self._index)
270
except TypeError:
271
raise error('DBM object has already been closed') from None
272
273
def close(self):
274
try:
275
self._commit()
276
finally:
277
self._index = self._datfile = self._dirfile = self._bakfile = None
278
279
__del__ = close
280
281
def _chmod(self, file):
282
self._os.chmod(file, self._mode)
283
284
def __enter__(self):
285
return self
286
287
def __exit__(self, *args):
288
self.close()
289
290
291
def open(file, flag='c', mode=0o666):
292
"""Open the database file, filename, and return corresponding object.
293
294
The flag argument, used to control how the database is opened in the
295
other DBM implementations, supports only the semantics of 'c' and 'n'
296
values. Other values will default to the semantics of 'c' value:
297
the database will always opened for update and will be created if it
298
does not exist.
299
300
The optional mode argument is the UNIX mode of the file, used only when
301
the database has to be created. It defaults to octal code 0o666 (and
302
will be modified by the prevailing umask).
303
304
"""
305
306
# Modify mode depending on the umask
307
try:
308
um = _os.umask(0)
309
_os.umask(um)
310
except AttributeError:
311
pass
312
else:
313
# Turn off any bits that are set in the umask
314
mode = mode & (~um)
315
if flag not in ('r', 'w', 'c', 'n'):
316
raise ValueError("Flag must be one of 'r', 'w', 'c', or 'n'")
317
return _Database(file, mode, flag=flag)
318
319