Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
emscripten-core
GitHub Repository: emscripten-core/emscripten
Path: blob/main/tools/file_packager.py
4128 views
1
#!/usr/bin/env python3
2
# Copyright 2012 The Emscripten Authors. All rights reserved.
3
# Emscripten is available under two separate licenses, the MIT license and the
4
# University of Illinois/NCSA Open Source License. Both these licenses can be
5
# found in the LICENSE file.
6
7
"""A tool that generates FS API calls to generate a filesystem, and packages the files
8
to work with that.
9
10
This is called by emcc. You can also call it yourself.
11
12
You can split your files into "asset bundles", and create each bundle separately
13
with this tool. Then just include the generated js for each and they will load
14
the data and prepare it accordingly. This allows you to share assets and reduce
15
data downloads.
16
17
* If you run this yourself, separately/standalone from emcc, then the main program
18
compiled by emcc must be built with filesystem support. You can do that with
19
-sFORCE_FILESYSTEM (if you forget that, an unoptimized build or one with
20
ASSERTIONS enabled will show an error suggesting you use that flag).
21
22
Usage:
23
24
file_packager TARGET [--preload A [B..]] [--embed C [D..]] [--exclude E [F..]] [--js-output=OUTPUT.js] [--no-force] [--use-preload-cache] [--indexedDB-name=EM_PRELOAD_CACHE] [--separate-metadata] [--lz4] [--use-preload-plugins] [--no-node] [--export-es6] [--help]
25
26
--preload ,
27
--embed See emcc --help for more details on those options.
28
29
--exclude E [F..] Specifies filename pattern matches to use for excluding given files from being added to the package.
30
See https://docs.python.org/2/library/fnmatch.html for syntax.
31
32
--from-emcc Indicate that `file_packager` was called from `emcc` and will be further processed by it, so some code generation can be skipped here
33
34
--js-output=FILE Writes output in FILE, if not specified, standard output is used.
35
36
--obj-output=FILE create an object file from embedded files, for direct linking into a wasm binary.
37
38
--depfile=FILE Writes a dependency list containing the list of directories and files walked, compatible with Make, Ninja, CMake, etc.
39
40
--wasm64 When used with `--obj-output` create a wasm64 object file
41
42
--export-name=EXPORT_NAME Use custom export name (default is `Module`)
43
44
--export-es6 Wrap generated code inside ES6 exported function
45
46
--no-force Don't create output if no valid input file is specified.
47
48
--use-preload-cache Stores package in IndexedDB so that subsequent loads don't need to do XHR. Checks package version.
49
50
--indexedDB-name Use specified IndexedDB database name (Default: 'EM_PRELOAD_CACHE')
51
52
--separate-metadata Stores package metadata separately. Only applicable when preloading and js-output file is specified.
53
54
--lz4 Uses LZ4. This compresses the data using LZ4 when this utility is run, then the client decompresses chunks on the fly, avoiding storing
55
the entire decompressed data in memory at once. See LZ4 in src/settings.js, you must build the main program with that flag.
56
57
--use-preload-plugins Tells the file packager to run preload plugins on the files as they are loaded. This performs tasks like decoding images
58
and audio using the browser's codecs.
59
60
--no-node Whether to support Node.js. By default we do, which emits some extra code.
61
62
--quiet Suppress reminder about using `FORCE_FILESYSTEM`
63
64
Notes:
65
66
* The file packager generates unix-style file paths. So if you are on windows and a file is accessed at
67
subdir\file, in JS it will be subdir/file. For simplicity we treat the web platform as a *NIX.
68
"""
69
70
import base64
71
import ctypes
72
import fnmatch
73
import hashlib
74
import json
75
import os
76
import posixpath
77
import shutil
78
import sys
79
from subprocess import PIPE
80
from textwrap import dedent
81
from typing import List
82
83
__scriptdir__ = os.path.dirname(os.path.abspath(__file__))
84
__rootdir__ = os.path.dirname(__scriptdir__)
85
sys.path.insert(0, __rootdir__)
86
87
from tools import shared, utils, js_manipulation, diagnostics
88
from tools.response_file import substitute_response_files
89
90
91
DEBUG = os.environ.get('EMCC_DEBUG')
92
93
excluded_patterns: List[str] = []
94
new_data_files = []
95
walked = []
96
97
98
class Options:
99
def __init__(self):
100
self.export_name = 'Module'
101
self.has_preloaded = False
102
self.has_embedded = False
103
self.jsoutput = None
104
self.obj_output = None
105
self.depfile = None
106
self.from_emcc = False
107
self.quiet = False
108
self.force = True
109
# If set to True, IndexedDB (IDBFS in library_idbfs.js) is used to locally
110
# cache VFS XHR so that subsequent page loads can read the data from the
111
# offline cache instead.
112
self.use_preload_cache = False
113
self.indexeddb_name = 'EM_PRELOAD_CACHE'
114
# If set to True, the package metadata is stored separately from js-output
115
# file which makes js-output file immutable to the package content changes.
116
# If set to False, the package metadata is stored inside the js-output file
117
# which makes js-output file to mutate on each invocation of this packager tool.
118
self.separate_metadata = False
119
self.lz4 = False
120
self.use_preload_plugins = False
121
self.support_node = True
122
self.wasm64 = False
123
self.export_es6 = False
124
125
126
class DataFile:
127
def __init__(self, srcpath, dstpath, mode, explicit_dst_path):
128
self.srcpath = srcpath
129
self.dstpath = dstpath
130
self.mode = mode
131
self.explicit_dst_path = explicit_dst_path
132
133
134
options = Options()
135
136
137
def err(*args):
138
print(*args, file=sys.stderr)
139
140
141
def base64_encode(b):
142
b64 = base64.b64encode(b)
143
return b64.decode('ascii')
144
145
146
def has_hidden_attribute(filepath):
147
"""Win32 code to test whether the given file has the hidden property set."""
148
149
if sys.platform != 'win32':
150
return False
151
152
try:
153
attrs = ctypes.windll.kernel32.GetFileAttributesW(filepath)
154
assert attrs != -1
155
result = bool(attrs & 2)
156
except Exception:
157
result = False
158
return result
159
160
161
def should_ignore(fullname):
162
"""The packager should never preload/embed files if the file
163
is hidden (Win32) or it matches any pattern specified in --exclude"""
164
if has_hidden_attribute(fullname):
165
return True
166
167
return any(fnmatch.fnmatch(fullname, p) for p in excluded_patterns)
168
169
170
def add(mode, rootpathsrc, rootpathdst):
171
"""Expand directories into individual files
172
173
rootpathsrc: The path name of the root directory on the local FS we are
174
adding to emscripten virtual FS.
175
rootpathdst: The name we want to make the source path available on the
176
emscripten virtual FS.
177
"""
178
walked.append(rootpathsrc)
179
for dirpath, dirnames, filenames in os.walk(rootpathsrc):
180
new_dirnames = []
181
for name in dirnames:
182
fullname = os.path.join(dirpath, name)
183
if not should_ignore(fullname):
184
walked.append(fullname)
185
new_dirnames.append(name)
186
elif DEBUG:
187
err('Skipping directory "%s" from inclusion in the emscripten '
188
'virtual file system.' % fullname)
189
for name in filenames:
190
fullname = os.path.join(dirpath, name)
191
if not should_ignore(fullname):
192
walked.append(fullname)
193
# Convert source filename relative to root directory of target FS.
194
dstpath = os.path.join(rootpathdst,
195
os.path.relpath(fullname, rootpathsrc))
196
new_data_files.append(DataFile(srcpath=fullname, dstpath=dstpath,
197
mode=mode, explicit_dst_path=True))
198
elif DEBUG:
199
err('Skipping file "%s" from inclusion in the emscripten '
200
'virtual file system.' % fullname)
201
dirnames.clear()
202
dirnames.extend(new_dirnames)
203
204
205
def to_asm_string(string):
206
"""Convert a python string to string suitable for including in an
207
assembly file using the `.asciz` directive.
208
209
The result will be an UTF-8 encoded string in the data section.
210
"""
211
# See MCAsmStreamer::PrintQuotedString in llvm/lib/MC/MCAsmStreamer.cpp
212
# And isPrint in llvm/include/llvm/ADT/StringExtras.h
213
214
def is_print(c):
215
return c >= 0x20 and c <= 0x7E
216
217
def escape(c):
218
if is_print(c):
219
return chr(c)
220
escape_chars = {
221
'\b': '\\b',
222
'\f': '\\f',
223
'\n': '\\n',
224
'\r': '\\r',
225
'\t': '\\t',
226
}
227
if c in escape_chars:
228
return escape_chars[c]
229
# Enscode all other chars are three octal digits(!)
230
return '\\%s%s%s' % (oct(c >> 6), oct(c >> 3), oct(c >> 0))
231
232
return ''.join(escape(c) for c in string.encode('utf-8'))
233
234
235
def to_c_symbol(filename, used):
236
"""Convert a filename (python string) to a legal C symbols, avoiding collisions."""
237
def escape(c):
238
if c.isalnum():
239
return c
240
else:
241
return '_'
242
c_symbol = ''.join(escape(c) for c in filename)
243
# Handle collisions
244
if c_symbol in used:
245
counter = 2
246
while c_symbol + str(counter) in used:
247
counter = counter + 1
248
c_symbol = c_symbol + str(counter)
249
used.add(c_symbol)
250
return c_symbol
251
252
253
def generate_object_file(data_files):
254
embed_files = [f for f in data_files if f.mode == 'embed']
255
assert embed_files
256
257
asm_file = shared.replace_suffix(options.obj_output, '.s')
258
259
used = set()
260
for f in embed_files:
261
f.c_symbol_name = '__em_file_data_%s' % to_c_symbol(f.dstpath, used)
262
263
with open(asm_file, 'w') as out:
264
out.write('# Emscripten embedded file data, generated by tools/file_packager.py\n')
265
266
for f in embed_files:
267
if DEBUG:
268
err('embedding %s at %s' % (f.srcpath, f.dstpath))
269
270
size = os.path.getsize(f.srcpath)
271
dstpath = to_asm_string(f.dstpath)
272
srcpath = utils.normalize_path(f.srcpath)
273
out.write(dedent(f'''
274
.section .rodata.{f.c_symbol_name},"",@
275
276
# The name of file
277
{f.c_symbol_name}_name:
278
.asciz "{dstpath}"
279
.size {f.c_symbol_name}_name, {len(dstpath) + 1}
280
281
# The size of the file followed by the content itself
282
{f.c_symbol_name}:
283
.incbin "{srcpath}"
284
.size {f.c_symbol_name}, {size}
285
'''))
286
287
if options.wasm64:
288
align = 3
289
ptr_type = 'i64'
290
bits = 64
291
else:
292
align = 2
293
ptr_type = 'i32'
294
bits = 32
295
out.write(dedent(f'''
296
.functype _emscripten_fs_load_embedded_files ({ptr_type}) -> ()
297
.section .text,"",@
298
init_file_data:
299
.functype init_file_data () -> ()
300
global.get __emscripten_embedded_file_data@GOT
301
call _emscripten_fs_load_embedded_files
302
end_function
303
304
# Run init_file_data on startup.
305
# See system/lib/README.md for ordering of system constructors.
306
.section .init_array.49,"",@
307
.p2align {align}
308
.int{bits} init_file_data
309
310
# A list of triples of:
311
# (file_name_ptr, file_data_size, file_data_ptr)
312
# The list in null terminate with a single 0
313
.section .rodata.__emscripten_embedded_file_data,"",@
314
__emscripten_embedded_file_data:
315
.p2align {align}
316
'''))
317
318
for f in embed_files:
319
# The `.dc.a` directive gives us a pointer (address) sized entry.
320
# See https://sourceware.org/binutils/docs/as/Dc.html
321
out.write(dedent(f'''\
322
.p2align %s
323
.dc.a {f.c_symbol_name}_name
324
.p2align %s
325
.int32 {os.path.getsize(f.srcpath)}
326
.p2align %s
327
.dc.a {f.c_symbol_name}
328
''' % (align, align, align)))
329
330
ptr_size = 4
331
elem_size = (2 * ptr_size) + 4
332
total_size = len(embed_files) * elem_size + 4
333
out.write(dedent(f'''\
334
.dc.a 0
335
.size __emscripten_embedded_file_data, {total_size}
336
'''))
337
cmd = [shared.EMCC, '-c', asm_file, '-o', options.obj_output]
338
if options.wasm64:
339
target = 'wasm64-unknown-emscripten'
340
cmd.append('-Wno-experimental')
341
else:
342
target = 'wasm32-unknown-emscripten'
343
cmd.append('--target=' + target)
344
shared.check_call(cmd)
345
346
347
def main(): # noqa: C901, PLR0912, PLR0915
348
"""Future modifications should consider refactoring to reduce complexity.
349
350
* The McCabe cyclomatiic complexity is currently 60 vs 10 recommended.
351
* There are currently 63 branches vs 12 recommended.
352
* There are currently 151 statements vs 50 recommended.
353
354
To revalidate these numbers, run `ruff check --select=C901,PLR091`.
355
"""
356
if len(sys.argv) == 1:
357
err('''Usage: file_packager TARGET [--preload A [B..]] [--embed C [D..]] [--exclude E [F..]] [--js-output=OUTPUT.js] [--no-force] [--use-preload-cache] [--indexedDB-name=EM_PRELOAD_CACHE] [--separate-metadata] [--lz4] [--use-preload-plugins] [--no-node] [--export-es6] [--help]
358
Try 'file_packager --help' for more details.''')
359
return 1
360
361
# read response files very early on
362
try:
363
args = substitute_response_files(sys.argv[1:])
364
except OSError as e:
365
shared.exit_with_error(e)
366
367
if '--help' in args:
368
print(__doc__.strip())
369
return 0
370
371
data_target = args[0]
372
data_files = []
373
leading = ''
374
375
for arg in args[1:]:
376
if arg == '--preload':
377
leading = 'preload'
378
elif arg == '--embed':
379
leading = 'embed'
380
elif arg == '--exclude':
381
leading = 'exclude'
382
elif arg == '--no-force':
383
options.force = False
384
leading = ''
385
elif arg == '--export-es6':
386
options.export_es6 = True
387
leading = ''
388
elif arg == '--use-preload-cache':
389
options.use_preload_cache = True
390
leading = ''
391
elif arg.startswith('--indexedDB-name'):
392
options.indexeddb_name = arg.split('=', 1)[1] if '=' in arg else None
393
leading = ''
394
elif arg == '--no-heap-copy':
395
diagnostics.warn('ignoring legacy flag --no-heap-copy (that is the only mode supported now)')
396
leading = ''
397
elif arg == '--separate-metadata':
398
options.separate_metadata = True
399
leading = ''
400
elif arg == '--lz4':
401
options.lz4 = True
402
leading = ''
403
elif arg == '--use-preload-plugins':
404
options.use_preload_plugins = True
405
leading = ''
406
elif arg == '--no-node':
407
options.support_node = False
408
leading = ''
409
elif arg.startswith('--js-output'):
410
options.jsoutput = arg.split('=', 1)[1] if '=' in arg else None
411
leading = ''
412
elif arg.startswith('--obj-output'):
413
options.obj_output = arg.split('=', 1)[1] if '=' in arg else None
414
leading = ''
415
elif arg.startswith('--depfile'):
416
options.depfile = arg.split('=', 1)[1] if '=' in arg else None
417
leading = ''
418
elif arg == '--wasm64':
419
options.wasm64 = True
420
elif arg.startswith('--export-name'):
421
if '=' in arg:
422
options.export_name = arg.split('=', 1)[1]
423
leading = ''
424
elif arg == '--from-emcc':
425
options.from_emcc = True
426
leading = ''
427
elif arg == '--quiet':
428
options.quiet = True
429
elif leading in {'preload', 'embed'}:
430
mode = leading
431
# position of @ if we're doing 'src@dst'. '__' is used to keep the index
432
# same with the original if they escaped with '@@'.
433
at_position = arg.replace('@@', '__').find('@')
434
# '@@' in input string means there is an actual @ character, a single '@'
435
# means the 'src@dst' notation.
436
uses_at_notation = (at_position != -1)
437
438
if uses_at_notation:
439
srcpath = arg[0:at_position].replace('@@', '@') # split around the @
440
dstpath = arg[at_position + 1:].replace('@@', '@')
441
else:
442
# Use source path as destination path.
443
srcpath = dstpath = arg.replace('@@', '@')
444
if os.path.isfile(srcpath) or os.path.isdir(srcpath):
445
data_files.append(DataFile(srcpath=srcpath, dstpath=dstpath, mode=mode,
446
explicit_dst_path=uses_at_notation))
447
else:
448
diagnostics.error(f'${arg} does not exist')
449
elif leading == 'exclude':
450
excluded_patterns.append(arg)
451
else:
452
diagnostics.error('Unknown parameter:', arg)
453
454
options.has_preloaded = any(f.mode == 'preload' for f in data_files)
455
options.has_embedded = any(f.mode == 'embed' for f in data_files)
456
457
if options.has_preloaded and options.has_embedded:
458
diagnostics.warn('support for using --preload and --embed in the same command is scheduled '
459
'for deprecation. If you need this feature please comment at '
460
'https://github.com/emscripten-core/emscripten/issues/24803')
461
462
if options.separate_metadata and (not options.has_preloaded or not options.jsoutput):
463
diagnostics.error('cannot separate-metadata without both --preloaded files and a specified --js-output')
464
465
if not options.from_emcc and not options.quiet:
466
diagnostics.warn('Remember to build the main file with `-sFORCE_FILESYSTEM` '
467
'so that it includes support for loading this file package')
468
469
if options.jsoutput and os.path.abspath(options.jsoutput) == os.path.abspath(data_target):
470
diagnostics.error('TARGET should not be the same value of --js-output')
471
472
if options.from_emcc and options.export_es6:
473
diagnostics.error("Can't use --export-es6 option together with --from-emcc since the code should be embedded within emcc's code")
474
475
walked.append(__file__)
476
for file_ in data_files:
477
if not should_ignore(file_.srcpath):
478
if os.path.isdir(file_.srcpath):
479
add(file_.mode, file_.srcpath, file_.dstpath)
480
else:
481
walked.append(file_.srcpath)
482
new_data_files.append(file_)
483
data_files = [file_ for file_ in new_data_files
484
if not os.path.isdir(file_.srcpath)]
485
if len(data_files) == 0:
486
diagnostics.error('Nothing to do!')
487
488
# Absolutize paths, and check that they make sense
489
# os.getcwd() always returns the hard path with any symbolic links resolved,
490
# even if we cd'd into a symbolic link.
491
curr_abspath = os.path.abspath(os.getcwd())
492
493
for file_ in data_files:
494
if not file_.explicit_dst_path:
495
# This file was not defined with src@dst, so we inferred the destination
496
# from the source. In that case, we require that the destination be
497
# within the current working directory.
498
path = file_.dstpath
499
# Use os.path.realpath to resolve any symbolic links to hard paths,
500
# to match the structure in curr_abspath.
501
abspath = os.path.realpath(os.path.abspath(path))
502
if DEBUG:
503
err(path, abspath, curr_abspath)
504
if not abspath.startswith(curr_abspath):
505
err('Error: Embedding "%s" which is not contained within the current directory '
506
'"%s". This is invalid since the current directory becomes the '
507
'root that the generated code will see. To include files outside of the current '
508
'working directory you can use the `--preload-file srcpath@dstpath` syntax to '
509
'explicitly specify the target location.' % (path, curr_abspath))
510
sys.exit(1)
511
file_.dstpath = abspath[len(curr_abspath) + 1:]
512
if os.path.isabs(path):
513
diagnostics.warn('Embedding an absolute file/directory name "%s" to the '
514
'virtual filesystem. The file will be made available in the '
515
'relative path "%s". You can use the `--preload-file srcpath@dstpath` '
516
'syntax to explicitly specify the target location the absolute source '
517
'path should be directed to.' % (path, file_.dstpath))
518
519
for file_ in data_files:
520
# name in the filesystem, native and emulated
521
file_.dstpath = utils.normalize_path(file_.dstpath)
522
# If user has submitted a directory name as the destination but omitted
523
# the destination filename, use the filename from source file
524
if file_.dstpath.endswith('/'):
525
file_.dstpath = file_.dstpath + os.path.basename(file_.srcpath)
526
# make destination path always relative to the root
527
file_.dstpath = posixpath.normpath(os.path.join('/', file_.dstpath))
528
if DEBUG:
529
err('Packaging file "%s" to VFS in path "%s".' % (file_.srcpath, file_.dstpath))
530
531
# Remove duplicates (can occur naively, for example preload dir/, preload dir/subdir/)
532
seen = set()
533
534
def was_seen(name):
535
if name in seen:
536
return True
537
seen.add(name)
538
return False
539
540
# The files are sorted by the dstpath to make the order of files reproducible
541
# across file systems / operating systems (os.walk does not produce the same
542
# file order on different file systems / operating systems)
543
data_files = sorted(data_files, key=lambda file_: file_.dstpath)
544
data_files = [file_ for file_ in data_files if not was_seen(file_.dstpath)]
545
546
metadata = {'files': []}
547
548
if options.depfile:
549
targets = []
550
if options.obj_output:
551
targets.append(options.obj_output)
552
if options.jsoutput:
553
targets.append(data_target)
554
targets.append(options.jsoutput)
555
with open(options.depfile, 'w') as f:
556
for target in targets:
557
if target:
558
f.write(escape_for_makefile(target))
559
f.write(' \\\n')
560
f.write(': \\\n')
561
for dependency in walked:
562
f.write(escape_for_makefile(dependency))
563
f.write(' \\\n')
564
565
if options.obj_output:
566
if not options.has_embedded:
567
diagnostics.error('--obj-output is only applicable when embedding files')
568
generate_object_file(data_files)
569
if not options.has_preloaded:
570
return 0
571
572
ret = generate_js(data_target, data_files, metadata)
573
574
if options.force or len(data_files):
575
if options.jsoutput is None:
576
print(ret)
577
else:
578
# Overwrite the old jsoutput file (if exists) only when its content
579
# differs from the current generated one, otherwise leave the file
580
# untouched preserving its old timestamp
581
if os.path.isfile(options.jsoutput):
582
old = utils.read_file(options.jsoutput)
583
if old != ret:
584
utils.write_file(options.jsoutput, ret)
585
else:
586
utils.write_file(options.jsoutput, ret)
587
if options.separate_metadata:
588
utils.write_file(options.jsoutput + '.metadata', json.dumps(metadata, separators=(',', ':')))
589
590
return 0
591
592
593
def escape_for_makefile(fpath):
594
# Escapes for CMake's "pathname" grammar as described here:
595
# https://cmake.org/cmake/help/latest/command/add_custom_command.html#grammar-token-depfile-pathname
596
# Which is congruent with how Ninja and GNU Make expect characters escaped.
597
fpath = utils.normalize_path(fpath)
598
return fpath.replace('$', '$$').replace('#', '\\#').replace(' ', '\\ ')
599
600
601
def generate_js(data_target, data_files, metadata):
602
# emcc will add this to the output itself, so it is only needed for
603
# standalone calls
604
if options.from_emcc:
605
ret = ''
606
else:
607
if options.export_es6:
608
ret = 'export default async function loadDataFile(Module) {\n'
609
else:
610
ret = '''
611
var Module = typeof %(EXPORT_NAME)s != 'undefined' ? %(EXPORT_NAME)s : {};\n''' % {"EXPORT_NAME": options.export_name}
612
613
ret += '''
614
Module['expectedDataFileDownloads'] ??= 0;
615
Module['expectedDataFileDownloads']++;'''
616
617
if not options.export_es6:
618
ret += '''
619
(() => {'''
620
621
ret += '''
622
// Do not attempt to redownload the virtual filesystem data when in a pthread or a Wasm Worker context.
623
var isPthread = typeof ENVIRONMENT_IS_PTHREAD != 'undefined' && ENVIRONMENT_IS_PTHREAD;
624
var isWasmWorker = typeof ENVIRONMENT_IS_WASM_WORKER != 'undefined' && ENVIRONMENT_IS_WASM_WORKER;
625
if (isPthread || isWasmWorker) return;\n'''
626
627
if options.support_node:
628
ret += " var isNode = typeof process === 'object' && typeof process.versions === 'object' && typeof process.versions.node === 'string';\n"
629
630
if options.support_node and options.export_es6:
631
ret += '''if (isNode) {
632
const { createRequire } = await import('module');
633
/** @suppress{duplicate} */
634
var require = createRequire(import.meta.url);
635
}\n'''
636
637
if options.export_es6:
638
ret += 'return new Promise((loadDataResolve, loadDataReject) => {\n'
639
ret += ' async function loadPackage(metadata) {\n'
640
641
code = '''
642
function assert(check, msg) {
643
if (!check) throw new Error(msg);
644
}\n'''
645
646
# Set up folders
647
partial_dirs = []
648
for file_ in data_files:
649
dirname = os.path.dirname(file_.dstpath)
650
dirname = dirname.lstrip('/') # absolute paths start with '/', remove that
651
if dirname != '':
652
parts = dirname.split('/')
653
for i in range(len(parts)):
654
partial = '/'.join(parts[:i + 1])
655
if partial not in partial_dirs:
656
code += ('''Module['FS_createPath'](%s, %s, true, true);\n'''
657
% (json.dumps('/' + '/'.join(parts[:i])), json.dumps(parts[i])))
658
partial_dirs.append(partial)
659
660
if options.has_preloaded:
661
# Bundle all datafiles into one archive. Avoids doing lots of simultaneous
662
# XHRs which has overhead.
663
start = 0
664
with open(data_target, 'wb') as data:
665
for file_ in data_files:
666
file_.data_start = start
667
curr = utils.read_binary(file_.srcpath)
668
file_.data_end = start + len(curr)
669
start += len(curr)
670
data.write(curr)
671
672
if start > 256 * 1024 * 1024:
673
diagnostics.warn('file packager is creating an asset bundle of %d MB. '
674
'this is very large, and browsers might have trouble loading it. '
675
'see https://hacks.mozilla.org/2015/02/synchronous-execution-and-filesystem-access-in-emscripten/'
676
% (start / (1024 * 1024)))
677
678
create_preloaded = '''
679
try {
680
// canOwn this data in the filesystem, it is a slice into the heap that will never change
681
await Module['FS_preloadFile'](name, null, data, true, true, false, true);
682
Module['removeRunDependency'](`fp ${name}`);
683
} catch (e) {
684
err(`Preloading file ${name} failed`);
685
}\n'''
686
create_data = '''// canOwn this data in the filesystem, it is a slice into the heap that will never change
687
Module['FS_createDataFile'](name, null, data, true, true, true);
688
Module['removeRunDependency'](`fp ${name}`);'''
689
690
finish_handler = create_preloaded if options.use_preload_plugins else create_data
691
692
if not options.lz4:
693
# Data requests - for getting a block of data out of the big archive - have
694
# a similar API to XHRs
695
code += '''
696
for (var file of metadata['files']) {
697
var name = file['filename']
698
Module['addRunDependency'](`fp ${name}`);
699
}\n'''
700
701
if options.has_embedded and not options.obj_output:
702
diagnostics.warn('--obj-output is recommended when using --embed. This outputs an object file for linking directly into your application is more efficient than JS encoding')
703
704
catch_handler = ''
705
if options.export_es6:
706
catch_handler += '''
707
.catch((error) => {
708
loadDataReject(error);
709
})'''
710
711
for counter, file_ in enumerate(data_files):
712
filename = file_.dstpath
713
dirname = os.path.dirname(filename)
714
basename = os.path.basename(filename)
715
if file_.mode == 'embed':
716
if not options.obj_output:
717
# Embed (only needed when not generating object file output)
718
data = base64_encode(utils.read_binary(file_.srcpath))
719
code += " var fileData%d = '%s';\n" % (counter, data)
720
# canOwn this data in the filesystem (i.e. there is no need to create a copy in the FS layer).
721
code += (" Module['FS_createDataFile']('%s', '%s', atob(fileData%d), true, true, true);\n"
722
% (dirname, basename, counter))
723
elif file_.mode == 'preload':
724
# Preload
725
metadata['files'].append({
726
'filename': file_.dstpath,
727
'start': file_.data_start,
728
'end': file_.data_end,
729
})
730
else:
731
assert 0
732
733
if options.has_preloaded:
734
if not options.lz4:
735
# Get the big archive and split it up
736
use_data = '''// Reuse the bytearray from the XHR as the source for file reads.
737
for (var file of metadata['files']) {
738
var name = file['filename'];
739
var data = byteArray.subarray(file['start'], file['end']);
740
%s
741
}
742
Module['removeRunDependency']('datafile_%s');''' % (finish_handler,
743
js_manipulation.escape_for_js_string(data_target))
744
else:
745
# LZ4FS usage
746
temp = data_target + '.orig'
747
shutil.move(data_target, temp)
748
meta = shared.run_js_tool(utils.path_from_root('tools/lz4-compress.mjs'),
749
[temp, data_target], stdout=PIPE)
750
os.unlink(temp)
751
use_data = '''var compressedData = %s;
752
compressedData['data'] = byteArray;
753
assert(typeof Module['LZ4'] === 'object', 'LZ4 not present - was your app build with -sLZ4?');
754
Module['LZ4'].loadPackage({ 'metadata': metadata, 'compressedData': compressedData }, %s);
755
Module['removeRunDependency']('datafile_%s');''' % (meta, "true" if options.use_preload_plugins else "false", js_manipulation.escape_for_js_string(data_target))
756
757
if options.export_es6:
758
use_data += '\nloadDataResolve();'
759
760
package_name = data_target
761
remote_package_size = os.path.getsize(package_name)
762
remote_package_name = os.path.basename(package_name)
763
ret += '''
764
var PACKAGE_PATH = '';
765
if (typeof window === 'object') {
766
PACKAGE_PATH = window['encodeURIComponent'](window.location.pathname.substring(0, window.location.pathname.lastIndexOf('/')) + '/');
767
} else if (typeof process === 'undefined' && typeof location !== 'undefined') {
768
// web worker
769
PACKAGE_PATH = encodeURIComponent(location.pathname.substring(0, location.pathname.lastIndexOf('/')) + '/');
770
}
771
var PACKAGE_NAME = '%s';
772
var REMOTE_PACKAGE_BASE = '%s';
773
var REMOTE_PACKAGE_NAME = Module['locateFile']?.(REMOTE_PACKAGE_BASE, '') ?? REMOTE_PACKAGE_BASE;\n''' % (js_manipulation.escape_for_js_string(data_target), js_manipulation.escape_for_js_string(remote_package_name))
774
metadata['remote_package_size'] = remote_package_size
775
ret += " var REMOTE_PACKAGE_SIZE = metadata['remote_package_size'];\n"
776
777
if options.use_preload_cache:
778
# Set the id to a hash of the preloaded data, so that caches survive over multiple builds
779
# if the data has not changed.
780
data = utils.read_binary(data_target)
781
package_uuid = 'sha256-' + hashlib.sha256(data).hexdigest()
782
metadata['package_uuid'] = str(package_uuid)
783
784
code += r'''
785
var PACKAGE_UUID = metadata['package_uuid'];
786
var IDB_RO = "readonly";
787
var IDB_RW = "readwrite";
788
var DB_NAME = "''' + options.indexeddb_name + '''";
789
var DB_VERSION = 1;
790
var METADATA_STORE_NAME = 'METADATA';
791
var PACKAGE_STORE_NAME = 'PACKAGES';
792
793
async function openDatabase() {
794
if (typeof indexedDB == 'undefined') {
795
throw new Error('using IndexedDB to cache data can only be done on a web page or in a web worker');
796
}
797
return new Promise((resolve, reject) => {
798
var openRequest = indexedDB.open(DB_NAME, DB_VERSION);
799
openRequest.onupgradeneeded = (event) => {
800
var db = /** @type {IDBDatabase} */ (event.target.result);
801
802
if (db.objectStoreNames.contains(PACKAGE_STORE_NAME)) {
803
db.deleteObjectStore(PACKAGE_STORE_NAME);
804
}
805
var packages = db.createObjectStore(PACKAGE_STORE_NAME);
806
807
if (db.objectStoreNames.contains(METADATA_STORE_NAME)) {
808
db.deleteObjectStore(METADATA_STORE_NAME);
809
}
810
var metadata = db.createObjectStore(METADATA_STORE_NAME);
811
};
812
openRequest.onsuccess = (event) => {
813
var db = /** @type {IDBDatabase} */ (event.target.result);
814
resolve(db);
815
};
816
openRequest.onerror = reject;
817
});
818
}
819
820
// This is needed as chromium has a limit on per-entry files in IndexedDB
821
// https://cs.chromium.org/chromium/src/content/renderer/indexed_db/webidbdatabase_impl.cc?type=cs&sq=package:chromium&g=0&l=177
822
// https://cs.chromium.org/chromium/src/out/Debug/gen/third_party/blink/public/mojom/indexeddb/indexeddb.mojom.h?type=cs&sq=package:chromium&g=0&l=60
823
// We set the chunk size to 64MB to stay well-below the limit
824
var CHUNK_SIZE = 64 * 1024 * 1024;
825
826
async function cacheRemotePackage(db, packageName, packageData, packageMeta) {
827
var transactionPackages = db.transaction([PACKAGE_STORE_NAME], IDB_RW);
828
var packages = transactionPackages.objectStore(PACKAGE_STORE_NAME);
829
var chunkSliceStart = 0;
830
var nextChunkSliceStart = 0;
831
var chunkCount = Math.ceil(packageData.byteLength / CHUNK_SIZE);
832
var finishedChunks = 0;
833
834
return new Promise((resolve, reject) => {
835
for (var chunkId = 0; chunkId < chunkCount; chunkId++) {
836
nextChunkSliceStart += CHUNK_SIZE;
837
var putPackageRequest = packages.put(
838
packageData.slice(chunkSliceStart, nextChunkSliceStart),
839
`package/${packageName}/${chunkId}`
840
);
841
chunkSliceStart = nextChunkSliceStart;
842
putPackageRequest.onsuccess = (event) => {
843
finishedChunks++;
844
if (finishedChunks == chunkCount) {
845
var transaction_metadata = db.transaction(
846
[METADATA_STORE_NAME],
847
IDB_RW
848
);
849
var metadata = transaction_metadata.objectStore(METADATA_STORE_NAME);
850
var putMetadataRequest = metadata.put(
851
{
852
'uuid': packageMeta.uuid,
853
'chunkCount': chunkCount
854
},
855
`metadata/${packageName}`
856
);
857
putMetadataRequest.onsuccess = (event) => resolve(packageData);
858
putMetadataRequest.onerror = reject;
859
}
860
};
861
putPackageRequest.onerror = reject;
862
}
863
});
864
}
865
866
/*
867
* Check if there's a cached package, and if so whether it's the latest available.
868
* Resolves to the cached metadata, or `null` if it is missing or out-of-date.
869
*/
870
async function checkCachedPackage(db, packageName) {
871
var transaction = db.transaction([METADATA_STORE_NAME], IDB_RO);
872
var metadata = transaction.objectStore(METADATA_STORE_NAME);
873
var getRequest = metadata.get(`metadata/${packageName}`);
874
return new Promise((resolve, reject) => {
875
getRequest.onsuccess = (event) => {
876
var result = event.target.result;
877
if (result && PACKAGE_UUID === result['uuid']) {
878
resolve(result);
879
} else {
880
resolve(null);
881
}
882
}
883
getRequest.onerror = reject;
884
});
885
}
886
887
async function fetchCachedPackage(db, packageName, metadata) {
888
var transaction = db.transaction([PACKAGE_STORE_NAME], IDB_RO);
889
var packages = transaction.objectStore(PACKAGE_STORE_NAME);
890
891
var chunksDone = 0;
892
var totalSize = 0;
893
var chunkCount = metadata['chunkCount'];
894
var chunks = new Array(chunkCount);
895
896
return new Promise((resolve, reject) => {
897
for (var chunkId = 0; chunkId < chunkCount; chunkId++) {
898
var getRequest = packages.get(`package/${packageName}/${chunkId}`);
899
getRequest.onsuccess = (event) => {
900
if (!event.target.result) {
901
reject(`CachedPackageNotFound for: ${packageName}`);
902
return;
903
}
904
// If there's only 1 chunk, there's nothing to concatenate it with so we can just return it now
905
if (chunkCount == 1) {
906
resolve(event.target.result);
907
} else {
908
chunksDone++;
909
totalSize += event.target.result.byteLength;
910
chunks.push(event.target.result);
911
if (chunksDone == chunkCount) {
912
if (chunksDone == 1) {
913
resolve(event.target.result);
914
} else {
915
var tempTyped = new Uint8Array(totalSize);
916
var byteOffset = 0;
917
for (var chunkId in chunks) {
918
var buffer = chunks[chunkId];
919
tempTyped.set(new Uint8Array(buffer), byteOffset);
920
byteOffset += buffer.byteLength;
921
buffer = undefined;
922
}
923
chunks = undefined;
924
resolve(tempTyped.buffer);
925
tempTyped = undefined;
926
}
927
}
928
}
929
};
930
getRequest.onerror = reject;
931
}
932
});
933
}\n'''
934
935
# add Node.js support code, if necessary
936
node_support_code = ''
937
if options.support_node:
938
node_support_code = '''
939
if (isNode) {
940
var fsPromises = require('fs/promises');
941
var contents = await fsPromises.readFile(packageName);
942
return contents.buffer;
943
}'''.strip()
944
945
ret += '''
946
async function fetchRemotePackage(packageName, packageSize) {
947
%(node_support_code)s
948
Module['dataFileDownloads'] ??= {};
949
try {
950
var response = await fetch(packageName);
951
} catch (e) {
952
throw new Error(`Network Error: ${packageName}`, {e});
953
}
954
if (!response.ok) {
955
throw new Error(`${response.status}: ${response.url}`);
956
}
957
958
const chunks = [];
959
const headers = response.headers;
960
const total = Number(headers.get('Content-Length') ?? packageSize);
961
let loaded = 0;
962
963
Module['setStatus']?.('Downloading data...');
964
const reader = response.body.getReader();
965
966
while (1) {
967
var {done, value} = await reader.read();
968
if (done) break;
969
chunks.push(value);
970
loaded += value.length;
971
Module['dataFileDownloads'][packageName] = {loaded, total};
972
973
let totalLoaded = 0;
974
let totalSize = 0;
975
976
for (const download of Object.values(Module['dataFileDownloads'])) {
977
totalLoaded += download.loaded;
978
totalSize += download.total;
979
}
980
981
Module['setStatus']?.(`Downloading data... (${totalLoaded}/${totalSize})`);
982
}
983
984
const packageData = new Uint8Array(chunks.map((c) => c.length).reduce((a, b) => a + b, 0));
985
let offset = 0;
986
for (const chunk of chunks) {
987
packageData.set(chunk, offset);
988
offset += chunk.length;
989
}
990
return packageData.buffer;
991
}\n''' % {'node_support_code': node_support_code}
992
993
code += '''
994
async function processPackageData(arrayBuffer) {
995
assert(arrayBuffer, 'Loading data file failed.');
996
assert(arrayBuffer.constructor.name === ArrayBuffer.name, 'bad input to processPackageData');
997
var byteArray = new Uint8Array(arrayBuffer);
998
var curr;
999
%s
1000
}
1001
Module['addRunDependency']('datafile_%s');\n''' % (use_data, js_manipulation.escape_for_js_string(data_target))
1002
# use basename because from the browser's point of view,
1003
# we need to find the datafile in the same dir as the html file
1004
1005
code += '''
1006
Module['preloadResults'] ??= {};\n'''
1007
1008
if options.use_preload_cache:
1009
code += '''
1010
async function preloadFallback(error) {
1011
console.error(error);
1012
console.error('falling back to default preload behavior');
1013
processPackageData(await fetchRemotePackage(REMOTE_PACKAGE_NAME, REMOTE_PACKAGE_SIZE));
1014
}
1015
1016
try {
1017
var db = await openDatabase();
1018
var pkgMetadata = await checkCachedPackage(db, PACKAGE_PATH + PACKAGE_NAME);
1019
var useCached = !!pkgMetadata;
1020
Module['preloadResults'][PACKAGE_NAME] = {fromCache: useCached};
1021
if (useCached) {
1022
processPackageData(await fetchCachedPackage(db, PACKAGE_PATH + PACKAGE_NAME, pkgMetadata));
1023
} else {
1024
var packageData = await fetchRemotePackage(REMOTE_PACKAGE_NAME, REMOTE_PACKAGE_SIZE);
1025
try {
1026
processPackageData(await cacheRemotePackage(db, PACKAGE_PATH + PACKAGE_NAME, packageData, {uuid:PACKAGE_UUID}))
1027
} catch (error) {
1028
console.error(error);
1029
processPackageData(packageData);
1030
}
1031
}
1032
} catch(e) {
1033
await preloadFallback(e)%s;
1034
}
1035
1036
Module['setStatus']?.('Downloading...');\n''' % catch_handler
1037
else:
1038
# Not using preload cache, so we might as well start the xhr ASAP,
1039
# potentially before JS parsing of the main codebase if it's after us.
1040
# Only tricky bit is the fetch is async, but also when runWithFS is called
1041
# is async, so we handle both orderings.
1042
ret += '''
1043
var fetchPromise;
1044
var fetched = Module['getPreloadedPackage']?.(REMOTE_PACKAGE_NAME, REMOTE_PACKAGE_SIZE);
1045
1046
if (!fetched) {
1047
// Note that we don't use await here because we want to execute the
1048
// the rest of this function immediately.
1049
fetchPromise = fetchRemotePackage(REMOTE_PACKAGE_NAME, REMOTE_PACKAGE_SIZE)%s;
1050
}\n''' % catch_handler
1051
1052
code += '''
1053
Module['preloadResults'][PACKAGE_NAME] = {fromCache: false};
1054
if (!fetched) {
1055
fetched = await fetchPromise;
1056
}
1057
processPackageData(fetched);\n'''
1058
1059
ret += '''
1060
async function runWithFS(Module) {\n'''
1061
ret += code
1062
ret += '''
1063
}
1064
if (Module['calledRun']) {
1065
runWithFS(Module)%s;
1066
} else {
1067
(Module['preRun'] ??= []).push(runWithFS); // FS is not initialized yet, wait for it
1068
}\n''' % catch_handler
1069
1070
if options.separate_metadata:
1071
node_support_code = ''
1072
if options.support_node:
1073
node_support_code = '''
1074
if (isNode) {
1075
var fsPromises = require('fs/promises');
1076
var contents = await fsPromises.readFile(metadataUrl, 'utf8');
1077
return loadPackage(JSON.parse(contents));
1078
}'''.strip()
1079
1080
ret += '''
1081
Module['removeRunDependency']('%(metadata_file)s');
1082
}
1083
1084
async function runMetaWithFS() {
1085
Module['addRunDependency']('%(metadata_file)s');
1086
var metadataUrl = Module['locateFile']?.('%(metadata_file)s', '') ?? '%(metadata_file)s';
1087
%(node_support_code)s
1088
var response = await fetch(metadataUrl);
1089
if (!response.ok) {
1090
throw new Error(`${response.status}: ${response.url}`);
1091
}
1092
var json = await response.json();
1093
return loadPackage(json);
1094
}
1095
1096
if (Module['calledRun']) {
1097
runMetaWithFS();
1098
} else {
1099
(Module['preRun'] ??= []).push(runMetaWithFS);
1100
}\n''' % {'node_support_code': node_support_code, 'metadata_file': os.path.basename(options.jsoutput + '.metadata')}
1101
else:
1102
ret += '''
1103
}
1104
loadPackage(%s);\n''' % json.dumps(metadata)
1105
1106
if options.export_es6:
1107
ret += '''
1108
});
1109
}
1110
// END the loadDataFile function
1111
'''
1112
else:
1113
ret += '''
1114
})();\n'''
1115
1116
return ret
1117
1118
1119
if __name__ == '__main__':
1120
sys.exit(main())
1121
1122