Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemathinc
GitHub Repository: sagemathinc/wapython
Path: blob/main/python/cpython/src/cowasm_importer.py
1067 views
1
"""
2
This tiny simple custom importer makes it so we can if you have a
3
tarball foo.tar.xz somewhere in your sys.path that contains a Python
4
module, then this works:
5
6
import foo
7
8
This even works with .so extension module code. It's reasonably
9
efficient too, in some ways. How is this possible? This works in a
10
very different way than Python's own zipfile importer and to me it
11
is both much simpler and much better. At
12
https://docs.python.org/3/library/zipfile.html#pyzipfile-objects
13
there are docs about turning a Python module (without extension code)
14
into a zip file which can then be exported. It works for that
15
application, but has drawbacks because zip files are much larger than
16
.tar.xz files; also, it seems like importing is a bit slower. What
17
we do here instead is much simpler -- we just automaticlaly extract
18
the .tar.xz file to a temporary folder, which we add to sys.path.
19
That's it! It's ridiculously simple, but works well for our application
20
to WebAssembly where small size is very important.
21
22
NOTES:
23
24
- See https://dev.to/dangerontheranger/dependency-injection-with-import-hooks-in-python-3-5hap
25
26
- When working on this, here's how to update things after a change:
27
28
~/cowasm/packages/cpython$ rm dist/wasm/.install-data && cp src/cowasm_importer.py dist/wasm/lib/python3.11/site-packages/ && make && cd ../python-wasm/ && make && cd ../cpython/
29
30
"""
31
32
import importlib
33
import importlib.abc
34
import os
35
import sys
36
import tempfile
37
import zipfile
38
import tarfile
39
from time import time
40
41
cowasm_modules = {}
42
43
verbose = 'cowasm:importer' in os.environ.get("DEBUG", '')
44
45
EXTENSION = '.tar.xz'
46
47
if verbose:
48
49
def log(*args):
50
print(*args)
51
else:
52
53
def log(*args):
54
pass
55
56
57
temporary_directory = None
58
59
60
def site_packages_directory():
61
for path in sys.path:
62
if path.endswith('/site-packages'):
63
# In dev mode using the real filesystem
64
return path
65
# didn't find it so try again with different heuristic
66
for path in sys.path:
67
if path.endswith('/lib-dynload'):
68
# this is typically inside site-packages
69
return os.path.dirname(path)
70
71
72
def get_package_directory():
73
# We use a temporary directory that gets
74
# deleted automatically when the process exits, hence the global
75
# temporary_directoy object is important. A drawback of this approach is
76
# that every time you start python and import something
77
# the module has to get uncompressed again; an advantage is that space is
78
# only used when you actually import the module, and probably most modules
79
# are never used at all. That also breaks Cython, which we work around
80
# by putting a cython.py file in site-packages, and also Cython vs cython
81
# is an issue there. (We work around the cython.py thing for now.)
82
83
global temporary_directory
84
if temporary_directory is None:
85
temporary_directory = tempfile.TemporaryDirectory()
86
sys.path.insert(0, temporary_directory.name)
87
return temporary_directory.name
88
89
90
class CoWasmPackageFinder(importlib.abc.MetaPathFinder):
91
92
def __init__(self, loader):
93
self._loader = loader
94
95
def find_spec(self, fullname, path, target=None):
96
"""
97
- fullname is the fully-qualified name of the module,
98
- path is set to __path__ for sub-modules/packages, or None otherwise.
99
- target can be a module object, but is unused in this example.
100
"""
101
log("find_spec:", fullname, path, target)
102
if self._loader.provides(fullname):
103
return self._gen_spec(fullname)
104
105
def _gen_spec(self, fullname):
106
return importlib.machinery.ModuleSpec(fullname, self._loader)
107
108
109
class CoWasmPackageLoader(importlib.abc.Loader):
110
111
def provides(self, fullname: str):
112
return path_to_bundle(fullname) is not None
113
114
def _do_import(self, name, path):
115
log("_do_import", name, path)
116
mod = extract_archive_and_import(name, path)
117
# We save the spec so we can use it to proxy get_code, etc.
118
# TODO: I don't actually know if any of this proxying really works.
119
# I implemented this in hopes of getting "-m pip" to work as a bundle,
120
# but it doesn't.
121
self._spec = mod.__spec__
122
return mod
123
124
def create_module(self, spec):
125
log("create_module", spec)
126
path = path_to_bundle(spec.name)
127
return self._do_import(spec.name, path)
128
129
def exec_module(self, module):
130
pass
131
132
def get_code(self, fullname):
133
log("get_code", fullname)
134
if not hasattr(self, '_spec'):
135
path = path_to_bundle(fullname)
136
self._do_import(fullname, path)
137
return self._spec.loader.get_code(fullname)
138
139
def get_data(self, fullname):
140
if not hasattr(self, '_spec'):
141
path = path_to_bundle(fullname)
142
self._do_import(fullname, path)
143
return self._spec.loader.get_data(fullname)
144
145
def get_filename(self, fullname):
146
if not hasattr(self, '_spec'):
147
path = path_to_bundle(fullname)
148
self._do_import(fullname, path)
149
return self._spec.loader.get_filename(fullname)
150
151
def get_source(self, fullname):
152
if not hasattr(self, '_spec'):
153
path = path_to_bundle(fullname)
154
self._do_import(fullname, path)
155
return self._spec.loader.get_source(fullname)
156
157
def extract_archive_and_import(name: str, archive_path: str):
158
archive_path = cowasm_modules[name]
159
package_dirname = get_package_directory()
160
161
if verbose:
162
t = time()
163
log("extracting archive", archive_path, " to", package_dirname)
164
165
try:
166
if archive_path.endswith('.zip'):
167
zipfile.ZipFile(archive_path).extractall(package_dirname)
168
else:
169
tarfile.open(archive_path).extractall(package_dirname)
170
finally:
171
# Once we even try to extract, make it impossible that our importer will ever
172
# try again on this module -- this avoids any possibility of an infinite loop
173
del cowasm_modules[name]
174
175
# Updating the directory timestamp should be automatic on any OS,
176
# but *right now* it is not with memfs, so we do it manually.
177
# (That said, I think I patched around this.)
178
# Also this can workaround issues. Basically this is clearing the python
179
# cache. Sometimes on linux vm's, this is critical.
180
import pathlib
181
pathlib.Path(package_dirname).touch()
182
# Alternatively, invalidating the cache should work no matter what,
183
# and is recommended in the docs, so we do it:
184
importlib.invalidate_caches()
185
186
if verbose:
187
log(time() - t, package_dirname)
188
189
if verbose: t = time()
190
191
mod = importlib.import_module(name)
192
193
if verbose:
194
log(name, "import time: ", time() - t)
195
196
return mod
197
198
199
def path_to_bundle(module_name: str):
200
if module_name in cowasm_modules:
201
return cowasm_modules[module_name]
202
# Search the import path
203
filename = module_name + EXTENSION
204
for segment in sys.path:
205
path = os.path.join(segment, filename)
206
if os.path.exists(path):
207
log("path_to_bundle: found", path)
208
cowasm_modules[module_name] = path
209
return path
210
# We do not have it now. It could get added later.
211
# TODO: should I add a timestamp based hash like
212
# the builtin import process?
213
return None
214
215
216
def init():
217
218
if 'PYTHONREGRTEST_UNICODE_GUARD' in os.environ:
219
# do not install or use this when running tests, as it changes
220
# the path which breaks some tests.
221
return
222
223
if "COWASM_DISABLE_IMPORTER" in os.environ:
224
return
225
226
loader = CoWasmPackageLoader()
227
finder = CoWasmPackageFinder(loader)
228
sys.meta_path.append(finder)
229
230