Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
181 views
unlisted
ubuntu2004
1
# -*- coding: utf-8 -*-
2
3
import os
4
import pickle
5
import bz2
6
import warnings
7
import urllib.parse
8
from urllib.request import urlretrieve, urlopen
9
from urllib.error import HTTPError
10
import configparser
11
import string
12
13
from sage.rings.rational_field import QQ
14
from sage.modules.free_module_element import vector
15
from sage.misc.cachefunc import CachedFunction, dict_key
16
from sage.misc.decorators import decorator_keywords
17
try:
18
# NOTE: moved in sage 9.7
19
from sage.misc.instancedoc import instancedoc
20
except ImportError:
21
from sage.docs.instancedoc import instancedoc
22
23
24
@instancedoc
25
class FileCachedFunction(CachedFunction):
26
r"""
27
Function wrapper that implements a cache extending SageMath's CachedFunction.
28
29
Preface a function definition with @file_cached_function to wrap it. When
30
the wrapped function is called the following locations are visited to try
31
and obtain the output data:
32
- first the cache in working memory
33
- then the local filesystem
34
- then the internet If the data is not stored in any of these locations
35
then the wrapped function is executed. The output of the function is
36
saved in the working memory cache and as a file in the filesystem.
37
38
By default, the file is saved in the directory given by the directory
39
argument in the current working directory. If a name for a environment
40
variable is supplied via the env_var argument and this environment variable
41
is set to a valid path, then this director is used instead of the current
42
working directory.
43
44
A filename is generated from the function arguments. The default
45
implementation requires that
46
- the arguments are hashable and convertible to strings via str(),
47
- the resulting strings do not contain any characters not allowed in file
48
names.
49
50
The remote_database_list accepts the url of a file that should contain a
51
list of all files in the remote cache, one file per line.
52
This allows bulk downloading the files with the download_all() method.
53
54
The key argument accepts a callable to generate the cache key from the
55
function arguments. For details see the documentation of CachedFunction.
56
57
The filename argument accepts a callable that generates the file name from
58
the function name and the key. Whenever key is provided, filename must be
59
provided, too.
60
61
EXAMPLES::
62
63
sage: from admcycles.file_cache import file_cached_function, ignore_args_key, ignore_args_filename
64
sage: from tempfile import mkdtemp
65
sage: from shutil import rmtree
66
sage: import os
67
68
sage: tmpdir = mkdtemp()
69
sage: # We ignore the second argument for caching
70
sage: @file_cached_function(directory=tmpdir, key=ignore_args_key([1]), filename=ignore_args_filename())
71
....: def f(a, b=True):
72
....: pass
73
sage: f(1)
74
sage: assert os.path.exists(os.path.join(tmpdir, "f_1.pkl.bz2"))
75
....:
76
sage: os.environ["TEST_CACHE_DIR"] = tmpdir
77
sage: @file_cached_function(directory="", env_var="TEST_CACHE_DIR")
78
....: def f(a, b=True):
79
....: pass
80
sage: f(1)
81
sage: assert os.path.exists(os.path.join(tmpdir, "f_1_True.pkl.bz2"))
82
sage: rmtree(tmpdir)
83
"""
84
def __init__(self, f, directory, url=None, remote_database_list=None, env_var=None, key=None, filename=None, pickle_wrappers=(None, None)):
85
self.env_var = env_var
86
if env_var is not None:
87
try:
88
env_dir = os.environ[env_var]
89
if not os.path.isdir(env_dir):
90
warnings.warn("%s=%s is not a directory. Ignoring it." % env_var, env_dir)
91
else:
92
directory = os.path.join(env_dir, directory)
93
except KeyError:
94
pass
95
96
if key is not None and filename is None:
97
raise ValueError("If key is provided, filename must also be provided")
98
99
super(FileCachedFunction, self).__init__(f, key=key)
100
self.directory = directory
101
self.url = url
102
self.remote_database_list = remote_database_list
103
if self.url is None:
104
self.go_online = False
105
else:
106
self.go_online = self.__get_online_lookup_default()
107
self.filename = filename
108
self.pickle_wrapper, self.unpickle_wrapper = pickle_wrappers
109
110
def __call__(self, *args, **kwds):
111
k = self.get_key(*args, **kwds)
112
113
# First try to return the value from the cache.
114
try:
115
return self.cache[k]
116
except TypeError: # k is not hashable
117
k = dict_key(k)
118
try:
119
return self.cache[k]
120
except KeyError:
121
pass
122
except KeyError:
123
pass
124
125
# If the value is not in the cache, check if the cache file exists.
126
# If not, maybe try to download it
127
# Note: We prefix the filename with the function name to avoid collisions if mutliple
128
# functions are cached in the same directory.
129
(filename, filename_with_path) = self.filename_from_args(k)
130
if not os.path.exists(filename_with_path) and self.go_online:
131
try:
132
self.__download(filename, filename_with_path)
133
except IOError:
134
pass
135
136
# If the cache file exists now, try to load it.
137
if os.path.exists(filename_with_path):
138
try:
139
dat = self.__load_from_file(filename_with_path)
140
self.cache[k] = dat
141
return dat
142
except IOError:
143
pass
144
except TypeError:
145
warnings.warn("can not unpickle file %s, it was probably created with a newer version of SageMath.")
146
147
# If we reach this, then all methods to retrive the data from the cache have failed.
148
dat = self.f(*args, **kwds)
149
self.__save(k, dat)
150
return dat
151
152
def __config_file(self):
153
r"""
154
Returns the path to the configuration file.
155
"""
156
return os.path.join(self.directory, "filecache.ini")
157
158
def __get_online_lookup_default(self):
159
r"""
160
Tries to obtain a user specified value from the config file.
161
Returns True if this is not possible.
162
"""
163
cf = self.__config_file()
164
config = configparser.ConfigParser()
165
config.read(cf)
166
try:
167
if config[self.f.__name__]['online_lookup'] == 'no':
168
return False
169
except KeyError:
170
pass
171
return True
172
173
def set_online_lookup_default(self, b):
174
r"""
175
Saves the default for online lookup in the configuration file.
176
"""
177
self.set_online_lookup(b)
178
cf = self.__config_file()
179
config = configparser.ConfigParser()
180
config.read(cf)
181
if b:
182
config[self.f.__name__] = {'online_lookup': 'yes'}
183
else:
184
config[self.f.__name__] = {'online_lookup': 'no'}
185
with open(cf, "w") as configfile:
186
config.write(configfile)
187
188
def set_online_lookup(self, b):
189
r"""
190
Temporarily set whether online lookup is active.
191
Use func:`set_online_lookup_default` to save a default.
192
193
It is set to the boolean ``b``.
194
"""
195
if b and self.url is None:
196
raise ValueError("no online database available for this function")
197
self.go_online = b
198
199
def set_cache(self, dat, *args, **kwds):
200
r"""
201
Manually add a value to the cache.
202
203
EXAMPLES::
204
205
sage: from admcycles.file_cache import file_cached_function
206
sage: from tempfile import mkdtemp
207
sage: from shutil import rmtree
208
sage: tmpdir = mkdtemp()
209
sage: @file_cached_function(directory=tmpdir)
210
....: def f(a, b=True):
211
....: pass
212
sage: f.set_cache("test", 1, b=False)
213
sage: assert f(1, False) == "test" # This is the cached value
214
sage: f.clear_cache()
215
sage: f(1, False)
216
'test'
217
sage: rmtree(tmpdir)
218
219
The above output "test" is the file cached value, as f returns None.
220
"""
221
k = self.get_key(*args, **kwds)
222
self.__save(k, dat)
223
224
def __create_directory(self):
225
r"""
226
Creates the directory if it does not exist yet.
227
May throw an OSError.
228
"""
229
try:
230
if not os.path.isdir(self.directory):
231
os.mkdir(self.directory)
232
except OSError as e:
233
print("Can not create directory", self.directory, e)
234
raise e
235
236
def __save(self, k, dat):
237
r"""
238
Saves the data in the cache file and the in-memory cache.
239
240
EXAMPLES::
241
242
sage: from admcycles.file_cache import file_cached_function
243
sage: from tempfile import mkdtemp
244
sage: from shutil import rmtree
245
sage: tmpdir = mkdtemp()
246
sage: @file_cached_function(directory=tmpdir)
247
....: def f(a, b=True):
248
....: pass
249
sage: k = f.get_key(1)
250
sage: f._FileCachedFunction__save(k, "test")
251
sage: assert f.cache[k] == "test"
252
sage: f.clear_cache()
253
sage: f(1)
254
'test'
255
sage: rmtree(tmpdir)
256
257
The above "test" is the file cached value, as f returns None.
258
"""
259
self.cache[k] = dat
260
261
(filename, filename_with_path) = self.filename_from_args(k)
262
try:
263
self.__create_directory()
264
except OSError:
265
return
266
with bz2.open(filename_with_path, 'wb') as f:
267
# We force pickle to use protocol version 3 to make
268
# sure that it works for all Python 3 version
269
# See
270
# https://docs.python.org/3/library/pickle.html
271
if self.pickle_wrapper is not None:
272
dat = self.pickle_wrapper(dat)
273
pickle.dump(dat, f, protocol=3)
274
275
def filename_from_args(self, k):
276
r"""
277
Constructs a file name of the form func_name_arg1_arg2_arg3.pkl.bz2
278
279
EXAMPLES::
280
281
sage: from admcycles.file_cache import file_cached_function
282
sage: @file_cached_function(directory="dir")
283
....: def f(a, b=True):
284
....: pass
285
sage: k = f.get_key(1)
286
sage: f.filename_from_args(k)
287
('f_1_True.pkl.bz2', 'dir/f_1_True.pkl.bz2')
288
"""
289
if self.filename is None:
290
filename = self.f.__name__
291
for a in k[0]:
292
filename += '_' + str(a)
293
filename += '.pkl.bz2'
294
else:
295
filename = self.filename(self.f, k)
296
filename_with_path = os.path.join(self.directory, filename)
297
return (filename, filename_with_path)
298
299
def __load_from_file(self, filename_with_path):
300
r"""
301
Unplickles the given file and returns the data.
302
"""
303
with bz2.open(filename_with_path, 'rb') as file:
304
if self.unpickle_wrapper is None:
305
return pickle.load(file)
306
else:
307
return self.unpickle_wrapper(pickle.load(file))
308
309
def __download(self, filename, filename_with_path):
310
r"""
311
Download the given file from the remote database and stores it
312
on the file system.
313
"""
314
if self.url is None:
315
raise ValueError('no url provided')
316
try:
317
self.__create_directory()
318
except OSError:
319
return
320
complete_url = urllib.parse.urljoin(self.url, filename)
321
try:
322
urlretrieve(complete_url, filename_with_path)
323
except HTTPError:
324
pass
325
326
def download_all(self):
327
r"""
328
Download all files from the remote database.
329
"""
330
if self.url is None:
331
raise ValueError('no url provided')
332
if self.remote_database_list is None:
333
raise ValueError('no remote database list provided')
334
try:
335
for filename in urlopen(self.remote_database_list):
336
filename = filename.decode('utf-8').strip()
337
# Check that the filename does not contain any characters that
338
# may result in downloading from or saving to an unwanted location.
339
allowed = set(string.ascii_letters + string.digits + '.' + '_')
340
if not set(filename) <= allowed:
341
print("Recived an invalid filename, aborting.")
342
return
343
filename_with_path = os.path.join(self.directory, filename)
344
if not os.path.exists(filename_with_path):
345
print("Downloading", filename)
346
self.__download(filename, filename_with_path)
347
except HTTPError as e:
348
print("Can not open", self.remote_database_list, e)
349
350
351
file_cached_function = decorator_keywords(FileCachedFunction)
352
353
354
def ignore_args_key(ignore_args):
355
r"""
356
Returns a callable that builds a key from a list of arguments,
357
but ignores the arguments with the indices supplied by ignore_arguments.
358
359
EXAMPLES::
360
361
sage: from admcycles.file_cache import ignore_args_key
362
sage: key = ignore_args_key([0, 1])
363
sage: key("first arg", "second arg", "third arg")
364
('third arg',)
365
"""
366
def key(*args, **invalid_args):
367
return tuple(arg for i, arg in enumerate(args) if i not in ignore_args)
368
369
return key
370
371
372
def ignore_args_filename():
373
r"""
374
Returns a callable that builds a file name from the key returned by
375
ignore_args_key.
376
377
EXAMPLES::
378
379
sage: from admcycles.file_cache import ignore_args_key, ignore_args_filename
380
sage: key = ignore_args_key([0, 1])
381
sage: filename = ignore_args_filename()
382
sage: def test():
383
....: pass
384
sage: filename(test, key("first arg", "second arg", "third arg"))
385
'test_third arg.pkl.bz2'
386
"""
387
def filename(f, key):
388
filename = f.__name__
389
for a in key:
390
filename += '_' + str(a)
391
filename += '.pkl.bz2'
392
return filename
393
394
return filename
395
396
397
def rational_to_py(q):
398
r"""
399
Converts a rational number to a pair of python integers.
400
401
EXAMPLES::
402
403
sage: from admcycles.file_cache import rational_to_py
404
sage: a, b = rational_to_py(QQ(1/2))
405
sage: a
406
1
407
sage: type(a)
408
<class 'int'>
409
sage: b
410
2
411
sage: type(b)
412
<class 'int'>
413
"""
414
return (int(q.numerator()), int(q.denominator()))
415
416
417
def py_to_rational(t):
418
r"""
419
Converts a pair of python integers (a,b) into the rational number a/b.
420
421
EXAMPLES::
422
423
sage: from admcycles.file_cache import py_to_rational
424
sage: q = py_to_rational((1, 2))
425
sage: q
426
1/2
427
sage: type(q)
428
<class 'sage.rings.rational.Rational'>
429
"""
430
return QQ(t[0]) / QQ(t[1])
431
432
433
def rational_vectors_to_py(vs):
434
r"""
435
Converts a list of vectors over QQ into a list of tuples of pairs of python integers.
436
437
EXAMPLES::
438
439
sage: from admcycles.file_cache import rational_vectors_to_py
440
sage: v = rational_vectors_to_py([vector(QQ, [1/2, 1])])
441
sage: v
442
[((1, 2), (1, 1))]
443
"""
444
return [tuple(rational_to_py(a) for a in v) for v in vs]
445
446
447
def py_to_rational_vectors(vs):
448
r"""
449
Converts a list of tuples of pairs of python integers into a list of sparse vectors over QQ.
450
451
EXAMPLES::
452
453
sage: from admcycles.file_cache import py_to_rational_vectors
454
sage: v = py_to_rational_vectors([((1, 2), (1, 1))])
455
sage: v
456
[(1/2, 1)]
457
sage: type(v[0])
458
<class 'sage.modules.free_module_element.FreeModuleElement_generic_sparse'>
459
"""
460
return [vector(QQ, (py_to_rational(q) for q in v), sparse=True) for v in vs]
461
462