Path: blob/main/python/cpython/src/cowasm_importer.py
1067 views
"""1This tiny simple custom importer makes it so we can if you have a2tarball foo.tar.xz somewhere in your sys.path that contains a Python3module, then this works:45import foo67This even works with .so extension module code. It's reasonably8efficient too, in some ways. How is this possible? This works in a9very different way than Python's own zipfile importer and to me it10is both much simpler and much better. At11https://docs.python.org/3/library/zipfile.html#pyzipfile-objects12there are docs about turning a Python module (without extension code)13into a zip file which can then be exported. It works for that14application, but has drawbacks because zip files are much larger than15.tar.xz files; also, it seems like importing is a bit slower. What16we do here instead is much simpler -- we just automaticlaly extract17the .tar.xz file to a temporary folder, which we add to sys.path.18That's it! It's ridiculously simple, but works well for our application19to WebAssembly where small size is very important.2021NOTES:2223- See https://dev.to/dangerontheranger/dependency-injection-with-import-hooks-in-python-3-5hap2425- When working on this, here's how to update things after a change:2627~/cowasm/packages/cpython$ rm dist/wasm/.install-data && cp src/cowasm_importer.py dist/wasm/lib/python3.11/site-packages/ && make && cd ../python-wasm/ && make && cd ../cpython/2829"""3031import importlib32import importlib.abc33import os34import sys35import tempfile36import zipfile37import tarfile38from time import time3940cowasm_modules = {}4142verbose = 'cowasm:importer' in os.environ.get("DEBUG", '')4344EXTENSION = '.tar.xz'4546if verbose:4748def log(*args):49print(*args)50else:5152def log(*args):53pass545556temporary_directory = None575859def site_packages_directory():60for path in sys.path:61if path.endswith('/site-packages'):62# In dev mode using the real filesystem63return path64# didn't find it so try again with different heuristic65for path in sys.path:66if path.endswith('/lib-dynload'):67# this is typically inside site-packages68return os.path.dirname(path)697071def get_package_directory():72# We use a temporary directory that gets73# deleted automatically when the process exits, hence the global74# temporary_directoy object is important. A drawback of this approach is75# that every time you start python and import something76# the module has to get uncompressed again; an advantage is that space is77# only used when you actually import the module, and probably most modules78# are never used at all. That also breaks Cython, which we work around79# by putting a cython.py file in site-packages, and also Cython vs cython80# is an issue there. (We work around the cython.py thing for now.)8182global temporary_directory83if temporary_directory is None:84temporary_directory = tempfile.TemporaryDirectory()85sys.path.insert(0, temporary_directory.name)86return temporary_directory.name878889class CoWasmPackageFinder(importlib.abc.MetaPathFinder):9091def __init__(self, loader):92self._loader = loader9394def find_spec(self, fullname, path, target=None):95"""96- fullname is the fully-qualified name of the module,97- path is set to __path__ for sub-modules/packages, or None otherwise.98- target can be a module object, but is unused in this example.99"""100log("find_spec:", fullname, path, target)101if self._loader.provides(fullname):102return self._gen_spec(fullname)103104def _gen_spec(self, fullname):105return importlib.machinery.ModuleSpec(fullname, self._loader)106107108class CoWasmPackageLoader(importlib.abc.Loader):109110def provides(self, fullname: str):111return path_to_bundle(fullname) is not None112113def _do_import(self, name, path):114log("_do_import", name, path)115mod = extract_archive_and_import(name, path)116# We save the spec so we can use it to proxy get_code, etc.117# TODO: I don't actually know if any of this proxying really works.118# I implemented this in hopes of getting "-m pip" to work as a bundle,119# but it doesn't.120self._spec = mod.__spec__121return mod122123def create_module(self, spec):124log("create_module", spec)125path = path_to_bundle(spec.name)126return self._do_import(spec.name, path)127128def exec_module(self, module):129pass130131def get_code(self, fullname):132log("get_code", fullname)133if not hasattr(self, '_spec'):134path = path_to_bundle(fullname)135self._do_import(fullname, path)136return self._spec.loader.get_code(fullname)137138def get_data(self, fullname):139if not hasattr(self, '_spec'):140path = path_to_bundle(fullname)141self._do_import(fullname, path)142return self._spec.loader.get_data(fullname)143144def get_filename(self, fullname):145if not hasattr(self, '_spec'):146path = path_to_bundle(fullname)147self._do_import(fullname, path)148return self._spec.loader.get_filename(fullname)149150def get_source(self, fullname):151if not hasattr(self, '_spec'):152path = path_to_bundle(fullname)153self._do_import(fullname, path)154return self._spec.loader.get_source(fullname)155156def extract_archive_and_import(name: str, archive_path: str):157archive_path = cowasm_modules[name]158package_dirname = get_package_directory()159160if verbose:161t = time()162log("extracting archive", archive_path, " to", package_dirname)163164try:165if archive_path.endswith('.zip'):166zipfile.ZipFile(archive_path).extractall(package_dirname)167else:168tarfile.open(archive_path).extractall(package_dirname)169finally:170# Once we even try to extract, make it impossible that our importer will ever171# try again on this module -- this avoids any possibility of an infinite loop172del cowasm_modules[name]173174# Updating the directory timestamp should be automatic on any OS,175# but *right now* it is not with memfs, so we do it manually.176# (That said, I think I patched around this.)177# Also this can workaround issues. Basically this is clearing the python178# cache. Sometimes on linux vm's, this is critical.179import pathlib180pathlib.Path(package_dirname).touch()181# Alternatively, invalidating the cache should work no matter what,182# and is recommended in the docs, so we do it:183importlib.invalidate_caches()184185if verbose:186log(time() - t, package_dirname)187188if verbose: t = time()189190mod = importlib.import_module(name)191192if verbose:193log(name, "import time: ", time() - t)194195return mod196197198def path_to_bundle(module_name: str):199if module_name in cowasm_modules:200return cowasm_modules[module_name]201# Search the import path202filename = module_name + EXTENSION203for segment in sys.path:204path = os.path.join(segment, filename)205if os.path.exists(path):206log("path_to_bundle: found", path)207cowasm_modules[module_name] = path208return path209# We do not have it now. It could get added later.210# TODO: should I add a timestamp based hash like211# the builtin import process?212return None213214215def init():216217if 'PYTHONREGRTEST_UNICODE_GUARD' in os.environ:218# do not install or use this when running tests, as it changes219# the path which breaks some tests.220return221222if "COWASM_DISABLE_IMPORTER" in os.environ:223return224225loader = CoWasmPackageLoader()226finder = CoWasmPackageFinder(loader)227sys.meta_path.append(finder)228229230