Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
emscripten-core
GitHub Repository: emscripten-core/emscripten
Path: blob/main/tools/file_packager.py
6169 views
1
#!/usr/bin/env python3
2
# Copyright 2012 The Emscripten Authors. All rights reserved.
3
# Emscripten is available under two separate licenses, the MIT license and the
4
# University of Illinois/NCSA Open Source License. Both these licenses can be
5
# found in the LICENSE file.
6
7
"""A tool that generates FS API calls to generate a filesystem, and packages the files
8
to work with that.
9
10
This is called by emcc. You can also call it yourself.
11
12
You can split your files into "asset bundles", and create each bundle separately
13
with this tool. Then just include the generated js for each and they will load
14
the data and prepare it accordingly. This allows you to share assets and reduce
15
data downloads.
16
17
* If you run this yourself, separately/standalone from emcc, then the main program
18
compiled by emcc must be built with filesystem support. You can do that with
19
-sFORCE_FILESYSTEM (if you forget that, an unoptimized build or one with
20
ASSERTIONS enabled will show an error suggesting you use that flag).
21
22
Usage:
23
24
file_packager TARGET [--preload A [B..]] [--embed C [D..]] [--exclude E [F..]] [--js-output=OUTPUT.js] [--no-force] [--use-preload-cache] [--indexedDB-name=EM_PRELOAD_CACHE] [--separate-metadata] [--lz4] [--use-preload-plugins] [--no-node] [--export-es6] [--help]
25
26
--preload ,
27
--embed See emcc --help for more details on those options.
28
29
--exclude E [F..] Specifies filename pattern matches to use for excluding given files from being added to the package.
30
See https://docs.python.org/2/library/fnmatch.html for syntax.
31
32
--from-emcc Indicate that `file_packager` was called from `emcc` and will be further processed by it, so some code generation can be skipped here
33
34
--js-output=FILE Writes output in FILE, if not specified, standard output is used.
35
36
--obj-output=FILE create an object file from embedded files, for direct linking into a wasm binary.
37
38
--depfile=FILE Writes a dependency list containing the list of directories and files walked, compatible with Make, Ninja, CMake, etc.
39
40
--wasm64 When used with `--obj-output` create a wasm64 object file
41
42
--export-name=EXPORT_NAME Use custom export name (default is `Module`)
43
44
--export-es6 Wrap generated code inside ES6 exported function
45
46
--no-force Don't create output if no valid input file is specified.
47
48
--use-preload-cache Stores package in IndexedDB so that subsequent loads don't need to do XHR. Checks package version.
49
50
--indexedDB-name Use specified IndexedDB database name (Default: 'EM_PRELOAD_CACHE')
51
52
--separate-metadata Stores package metadata separately. Only applicable when preloading and js-output file is specified.
53
54
--lz4 Uses LZ4. This compresses the data using LZ4 when this utility is run, then the client decompresses chunks on the fly, avoiding storing
55
the entire decompressed data in memory at once. See LZ4 in src/settings.js, you must build the main program with that flag.
56
57
--use-preload-plugins Tells the file packager to run preload plugins on the files as they are loaded. This performs tasks like decoding images
58
and audio using the browser's codecs.
59
60
--no-node Whether to support Node.js. By default we do, which emits some extra code.
61
62
--quiet Suppress reminder about using `FORCE_FILESYSTEM`
63
64
Notes:
65
66
* The file packager generates unix-style file paths. So if you are on windows and a file is accessed at
67
subdir\file, in JS it will be subdir/file. For simplicity we treat the web platform as a *NIX.
68
"""
69
70
import ctypes
71
import fnmatch
72
import hashlib
73
import json
74
import os
75
import posixpath
76
import shutil
77
import sys
78
from dataclasses import dataclass
79
from subprocess import PIPE
80
from textwrap import dedent
81
82
__scriptdir__ = os.path.dirname(os.path.abspath(__file__))
83
__rootdir__ = os.path.dirname(__scriptdir__)
84
sys.path.insert(0, __rootdir__)
85
86
from tools import diagnostics, js_manipulation, shared, utils
87
from tools.response_file import substitute_response_files
88
89
DEBUG = os.environ.get('EMCC_DEBUG')
90
91
excluded_patterns: list[str] = []
92
new_data_files = []
93
walked = []
94
95
96
class Options:
97
def __init__(self):
98
self.export_name = 'Module'
99
self.has_preloaded = False
100
self.has_embedded = False
101
self.jsoutput = None
102
self.obj_output = None
103
self.depfile = None
104
self.from_emcc = False
105
self.quiet = False
106
self.force = True
107
# If set to True, IndexedDB (IDBFS in library_idbfs.js) is used to locally
108
# cache VFS XHR so that subsequent page loads can read the data from the
109
# offline cache instead.
110
self.use_preload_cache = False
111
self.indexeddb_name = 'EM_PRELOAD_CACHE'
112
# If set to True, the package metadata is stored separately from js-output
113
# file which makes js-output file immutable to the package content changes.
114
# If set to False, the package metadata is stored inside the js-output file
115
# which makes js-output file to mutate on each invocation of this packager tool.
116
self.separate_metadata = False
117
self.lz4 = False
118
self.use_preload_plugins = False
119
self.support_node = True
120
self.wasm64 = False
121
self.export_es6 = False
122
123
124
@dataclass
125
class DataFile:
126
srcpath: str
127
dstpath: str
128
mode: str
129
explicit_dst_path: bool
130
131
132
options = Options()
133
134
135
def err(*args):
136
print(*args, file=sys.stderr)
137
138
139
def has_hidden_attribute(filepath):
140
"""Win32 code to test whether the given file has the hidden property set."""
141
142
if sys.platform != 'win32':
143
return False
144
145
try:
146
attrs = ctypes.windll.kernel32.GetFileAttributesW(filepath)
147
assert attrs != -1
148
result = bool(attrs & 2)
149
except Exception:
150
result = False
151
return result
152
153
154
def should_ignore(fullname):
155
"""The packager should never preload/embed files if the file
156
is hidden (Win32) or it matches any pattern specified in --exclude"""
157
if has_hidden_attribute(fullname):
158
return True
159
160
return any(fnmatch.fnmatch(fullname, p) for p in excluded_patterns)
161
162
163
def add(mode, rootpathsrc, rootpathdst):
164
"""Expand directories into individual files
165
166
rootpathsrc: The path name of the root directory on the local FS we are
167
adding to emscripten virtual FS.
168
rootpathdst: The name we want to make the source path available on the
169
emscripten virtual FS.
170
"""
171
walked.append(rootpathsrc)
172
for dirpath, dirnames, filenames in os.walk(rootpathsrc):
173
new_dirnames = []
174
for name in dirnames:
175
fullname = os.path.join(dirpath, name)
176
if not should_ignore(fullname):
177
walked.append(fullname)
178
new_dirnames.append(name)
179
elif DEBUG:
180
err('Skipping directory "%s" from inclusion in the emscripten '
181
'virtual file system.' % fullname)
182
for name in filenames:
183
fullname = os.path.join(dirpath, name)
184
if not should_ignore(fullname):
185
walked.append(fullname)
186
# Convert source filename relative to root directory of target FS.
187
dstpath = os.path.join(rootpathdst,
188
os.path.relpath(fullname, rootpathsrc))
189
new_data_files.append(DataFile(srcpath=fullname, dstpath=dstpath,
190
mode=mode, explicit_dst_path=True))
191
elif DEBUG:
192
err('Skipping file "%s" from inclusion in the emscripten '
193
'virtual file system.' % fullname)
194
dirnames.clear()
195
dirnames.extend(new_dirnames)
196
197
198
def to_asm_string(string):
199
"""Convert a python string to string suitable for including in an
200
assembly file using the `.asciz` directive.
201
202
The result will be an UTF-8 encoded string in the data section.
203
"""
204
# See MCAsmStreamer::PrintQuotedString in llvm/lib/MC/MCAsmStreamer.cpp
205
# And isPrint in llvm/include/llvm/ADT/StringExtras.h
206
207
def is_print(c):
208
return c >= 0x20 and c <= 0x7E
209
210
def escape(c):
211
if is_print(c):
212
return chr(c)
213
escape_chars = {
214
'\b': '\\b',
215
'\f': '\\f',
216
'\n': '\\n',
217
'\r': '\\r',
218
'\t': '\\t',
219
}
220
if c in escape_chars:
221
return escape_chars[c]
222
# Encode all other chars as three octal digits(!)
223
return '\\%s%s%s' % (oct(c >> 6), oct(c >> 3), oct(c >> 0))
224
225
return ''.join(escape(c) for c in string.encode('utf-8'))
226
227
228
def to_c_symbol(filename, used):
229
"""Convert a filename (python string) to a legal C symbols, avoiding collisions."""
230
def escape(c):
231
if c.isalnum():
232
return c
233
else:
234
return '_'
235
c_symbol = ''.join(escape(c) for c in filename)
236
# Handle collisions
237
if c_symbol in used:
238
counter = 2
239
while c_symbol + str(counter) in used:
240
counter = counter + 1
241
c_symbol = c_symbol + str(counter)
242
used.add(c_symbol)
243
return c_symbol
244
245
246
def generate_object_file(data_files):
247
embed_files = [f for f in data_files if f.mode == 'embed']
248
assert embed_files
249
250
asm_file = utils.replace_suffix(options.obj_output, '.s')
251
252
used = set()
253
for f in embed_files:
254
f.c_symbol_name = '__em_file_data_%s' % to_c_symbol(f.dstpath, used)
255
256
with open(asm_file, 'w') as out:
257
out.write('# Emscripten embedded file data, generated by tools/file_packager.py\n')
258
259
for f in embed_files:
260
if DEBUG:
261
err('embedding %s at %s' % (f.srcpath, f.dstpath))
262
263
size = os.path.getsize(f.srcpath)
264
dstpath = to_asm_string(f.dstpath)
265
srcpath = utils.normalize_path(f.srcpath)
266
out.write(dedent(f'''
267
.section .rodata.{f.c_symbol_name},"",@
268
269
# The name of file
270
{f.c_symbol_name}_name:
271
.asciz "{dstpath}"
272
.size {f.c_symbol_name}_name, {len(dstpath) + 1}
273
274
# The size of the file followed by the content itself
275
{f.c_symbol_name}:
276
.incbin "{srcpath}"
277
.size {f.c_symbol_name}, {size}
278
'''))
279
280
if options.wasm64:
281
align = 3
282
ptr_type = 'i64'
283
bits = 64
284
else:
285
align = 2
286
ptr_type = 'i32'
287
bits = 32
288
out.write(dedent(f'''
289
.functype _emscripten_fs_load_embedded_files ({ptr_type}) -> ()
290
.section .text,"",@
291
init_file_data:
292
.functype init_file_data () -> ()
293
global.get __emscripten_embedded_file_data@GOT
294
call _emscripten_fs_load_embedded_files
295
end_function
296
297
# Run init_file_data on startup.
298
# See system/lib/README.md for ordering of system constructors.
299
.section .init_array.49,"",@
300
.p2align {align}
301
.int{bits} init_file_data
302
303
# A list of triples of:
304
# (file_name_ptr, file_data_size, file_data_ptr)
305
# The list in null terminate with a single 0
306
.section .rodata.__emscripten_embedded_file_data,"",@
307
__emscripten_embedded_file_data:
308
.p2align {align}
309
'''))
310
311
for f in embed_files:
312
# The `.dc.a` directive gives us a pointer (address) sized entry.
313
# See https://sourceware.org/binutils/docs/as/Dc.html
314
out.write(dedent(f'''\
315
.p2align %s
316
.dc.a {f.c_symbol_name}_name
317
.p2align %s
318
.int32 {os.path.getsize(f.srcpath)}
319
.p2align %s
320
.dc.a {f.c_symbol_name}
321
''' % (align, align, align)))
322
323
ptr_size = 4
324
elem_size = (2 * ptr_size) + 4
325
total_size = len(embed_files) * elem_size + 4
326
out.write(dedent(f'''\
327
.dc.a 0
328
.size __emscripten_embedded_file_data, {total_size}
329
'''))
330
cmd = [shared.EMCC, '-c', asm_file, '-o', options.obj_output]
331
if options.wasm64:
332
target = 'wasm64-unknown-emscripten'
333
cmd.append('-Wno-experimental')
334
else:
335
target = 'wasm32-unknown-emscripten'
336
cmd.append('--target=' + target)
337
shared.check_call(cmd)
338
339
340
def main(): # noqa: C901, PLR0912, PLR0915
341
"""Future modifications should consider refactoring to reduce complexity.
342
343
* The McCabe cyclomatiic complexity is currently 60 vs 10 recommended.
344
* There are currently 63 branches vs 12 recommended.
345
* There are currently 151 statements vs 50 recommended.
346
347
To revalidate these numbers, run `ruff check --select=C901,PLR091`.
348
"""
349
if len(sys.argv) == 1:
350
err('''Usage: file_packager TARGET [--preload A [B..]] [--embed C [D..]] [--exclude E [F..]] [--js-output=OUTPUT.js] [--no-force] [--use-preload-cache] [--indexedDB-name=EM_PRELOAD_CACHE] [--separate-metadata] [--lz4] [--use-preload-plugins] [--no-node] [--export-es6] [--help]
351
Try 'file_packager --help' for more details.''')
352
return 1
353
354
# read response files very early on
355
try:
356
args = substitute_response_files(sys.argv[1:])
357
except OSError as e:
358
utils.exit_with_error(e)
359
360
if '--help' in args:
361
print(__doc__.strip())
362
return 0
363
364
data_target = args[0]
365
data_files = []
366
leading = ''
367
368
for arg in args[1:]:
369
if arg == '--preload':
370
leading = 'preload'
371
elif arg == '--embed':
372
leading = 'embed'
373
elif arg == '--exclude':
374
leading = 'exclude'
375
elif arg == '--no-force':
376
options.force = False
377
leading = ''
378
elif arg == '--export-es6':
379
options.export_es6 = True
380
leading = ''
381
elif arg == '--use-preload-cache':
382
options.use_preload_cache = True
383
leading = ''
384
elif arg.startswith('--indexedDB-name'):
385
options.indexeddb_name = arg.split('=', 1)[1] if '=' in arg else None
386
leading = ''
387
elif arg == '--no-heap-copy':
388
diagnostics.warn('ignoring legacy flag --no-heap-copy (that is the only mode supported now)')
389
leading = ''
390
elif arg == '--separate-metadata':
391
options.separate_metadata = True
392
leading = ''
393
elif arg == '--lz4':
394
options.lz4 = True
395
leading = ''
396
elif arg == '--use-preload-plugins':
397
options.use_preload_plugins = True
398
leading = ''
399
elif arg == '--no-node':
400
options.support_node = False
401
leading = ''
402
elif arg.startswith('--js-output'):
403
options.jsoutput = arg.split('=', 1)[1] if '=' in arg else None
404
leading = ''
405
elif arg.startswith('--obj-output'):
406
options.obj_output = arg.split('=', 1)[1] if '=' in arg else None
407
leading = ''
408
elif arg.startswith('--depfile'):
409
options.depfile = arg.split('=', 1)[1] if '=' in arg else None
410
leading = ''
411
elif arg == '--wasm64':
412
options.wasm64 = True
413
elif arg.startswith('--export-name'):
414
if '=' in arg:
415
options.export_name = arg.split('=', 1)[1]
416
leading = ''
417
elif arg == '--from-emcc':
418
options.from_emcc = True
419
leading = ''
420
elif arg == '--quiet':
421
options.quiet = True
422
elif leading in {'preload', 'embed'}:
423
mode = leading
424
# position of @ if we're doing 'src@dst'. '__' is used to keep the index
425
# same with the original if they escaped with '@@'.
426
at_position = arg.replace('@@', '__').find('@')
427
# '@@' in input string means there is an actual @ character, a single '@'
428
# means the 'src@dst' notation.
429
uses_at_notation = (at_position != -1)
430
431
if uses_at_notation:
432
srcpath = arg[0:at_position].replace('@@', '@') # split around the @
433
dstpath = arg[at_position + 1:].replace('@@', '@')
434
else:
435
# Use source path as destination path.
436
srcpath = dstpath = arg.replace('@@', '@')
437
if os.path.isfile(srcpath) or os.path.isdir(srcpath):
438
data_files.append(DataFile(srcpath=srcpath, dstpath=dstpath, mode=mode,
439
explicit_dst_path=uses_at_notation))
440
else:
441
diagnostics.error(f'${arg} does not exist')
442
elif leading == 'exclude':
443
excluded_patterns.append(arg)
444
else:
445
diagnostics.error('Unknown parameter:', arg)
446
447
options.has_preloaded = any(f.mode == 'preload' for f in data_files)
448
options.has_embedded = any(f.mode == 'embed' for f in data_files)
449
450
if options.has_preloaded and options.has_embedded:
451
diagnostics.error('--preload and --embed are mutually exclusive (See https://github.com/emscripten-core/emscripten/issues/24803)')
452
453
if options.has_embedded and not options.obj_output:
454
diagnostics.error('--obj-output is required when using --embed. This outputs an object file for linking directly into your application and is more efficient than the old JS encoding')
455
456
if options.separate_metadata and (not options.has_preloaded or not options.jsoutput):
457
diagnostics.error('cannot separate-metadata without both --preloaded files and a specified --js-output')
458
459
if not options.from_emcc and not options.quiet:
460
diagnostics.warn('Remember to build the main file with `-sFORCE_FILESYSTEM` '
461
'so that it includes support for loading this file package')
462
463
if options.jsoutput and os.path.abspath(options.jsoutput) == os.path.abspath(data_target):
464
diagnostics.error('TARGET should not be the same value of --js-output')
465
466
if options.from_emcc and options.export_es6:
467
diagnostics.error("Can't use --export-es6 option together with --from-emcc since the code should be embedded within emcc's code")
468
469
walked.append(__file__)
470
for file_ in data_files:
471
if not should_ignore(file_.srcpath):
472
if os.path.isdir(file_.srcpath):
473
add(file_.mode, file_.srcpath, file_.dstpath)
474
else:
475
walked.append(file_.srcpath)
476
new_data_files.append(file_)
477
data_files = [file_ for file_ in new_data_files
478
if not os.path.isdir(file_.srcpath)]
479
if len(data_files) == 0:
480
diagnostics.error('Nothing to do!')
481
482
# Absolutize paths, and check that they make sense
483
# os.getcwd() always returns the hard path with any symbolic links resolved,
484
# even if we cd'd into a symbolic link.
485
curr_abspath = os.path.abspath(os.getcwd())
486
487
for file_ in data_files:
488
if not file_.explicit_dst_path:
489
# This file was not defined with src@dst, so we inferred the destination
490
# from the source. In that case, we require that the destination be
491
# within the current working directory.
492
path = file_.dstpath
493
# Use os.path.realpath to resolve any symbolic links to hard paths,
494
# to match the structure in curr_abspath.
495
abspath = os.path.realpath(os.path.abspath(path))
496
if DEBUG:
497
err(path, abspath, curr_abspath)
498
if not abspath.startswith(curr_abspath):
499
err('Error: Embedding "%s" which is not contained within the current directory '
500
'"%s". This is invalid since the current directory becomes the '
501
'root that the generated code will see. To include files outside of the current '
502
'working directory you can use the `--preload-file srcpath@dstpath` syntax to '
503
'explicitly specify the target location.' % (path, curr_abspath))
504
sys.exit(1)
505
file_.dstpath = abspath[len(curr_abspath) + 1:]
506
if os.path.isabs(path):
507
diagnostics.warn('Embedding an absolute file/directory name "%s" to the '
508
'virtual filesystem. The file will be made available in the '
509
'relative path "%s". You can use the `--preload-file srcpath@dstpath` '
510
'syntax to explicitly specify the target location the absolute source '
511
'path should be directed to.' % (path, file_.dstpath))
512
513
for file_ in data_files:
514
# name in the filesystem, native and emulated
515
file_.dstpath = utils.normalize_path(file_.dstpath)
516
# If user has submitted a directory name as the destination but omitted
517
# the destination filename, use the filename from source file
518
if file_.dstpath.endswith('/'):
519
file_.dstpath = file_.dstpath + os.path.basename(file_.srcpath)
520
# make destination path always relative to the root
521
file_.dstpath = posixpath.normpath(os.path.join('/', file_.dstpath))
522
if DEBUG:
523
err('Packaging file "%s" to VFS in path "%s".' % (file_.srcpath, file_.dstpath))
524
525
# Remove duplicates (can occur naively, for example preload dir/, preload dir/subdir/)
526
seen = set()
527
528
def was_seen(name):
529
if name in seen:
530
return True
531
seen.add(name)
532
return False
533
534
# The files are sorted by the dstpath to make the order of files reproducible
535
# across file systems / operating systems (os.walk does not produce the same
536
# file order on different file systems / operating systems)
537
data_files = sorted(data_files, key=lambda file_: file_.dstpath)
538
data_files = [file_ for file_ in data_files if not was_seen(file_.dstpath)]
539
540
if options.depfile:
541
targets = []
542
if options.obj_output:
543
targets.append(options.obj_output)
544
if options.jsoutput:
545
targets.append(data_target)
546
targets.append(options.jsoutput)
547
with open(options.depfile, 'w') as f:
548
for target in targets:
549
if target:
550
f.write(escape_for_makefile(target))
551
f.write(' \\\n')
552
f.write(': \\\n')
553
for dependency in walked:
554
f.write(escape_for_makefile(dependency))
555
f.write(' \\\n')
556
557
if options.obj_output:
558
if not options.has_embedded:
559
diagnostics.error('--obj-output is only applicable when embedding files')
560
generate_object_file(data_files)
561
else:
562
metadata = {'files': []}
563
564
ret = generate_preload_js(data_target, data_files, metadata)
565
566
if options.force or data_files:
567
if options.jsoutput is None:
568
print(ret)
569
else:
570
# Overwrite the old jsoutput file (if exists) only when its content
571
# differs from the current generated one, otherwise leave the file
572
# untouched preserving its old timestamp
573
if os.path.isfile(options.jsoutput):
574
old = utils.read_file(options.jsoutput)
575
if old != ret:
576
utils.write_file(options.jsoutput, ret)
577
else:
578
utils.write_file(options.jsoutput, ret)
579
if options.separate_metadata:
580
utils.write_file(options.jsoutput + '.metadata', json.dumps(metadata, separators=(',', ':')))
581
582
return 0
583
584
585
def escape_for_makefile(fpath):
586
# Escapes for CMake's "pathname" grammar as described here:
587
# https://cmake.org/cmake/help/latest/command/add_custom_command.html#grammar-token-depfile-pathname
588
# Which is congruent with how Ninja and GNU Make expect characters escaped.
589
fpath = utils.normalize_path(fpath)
590
return fpath.replace('$', '$$').replace('#', '\\#').replace(' ', '\\ ')
591
592
593
def generate_preload_js(data_target, data_files, metadata):
594
# emcc will add this to the output itself, so it is only needed for
595
# standalone calls
596
if options.from_emcc:
597
ret = ''
598
else:
599
if options.export_es6:
600
ret = 'export default async function loadDataFile(Module) {\n'
601
else:
602
ret = '''
603
var Module = typeof %(EXPORT_NAME)s != 'undefined' ? %(EXPORT_NAME)s : {};\n''' % {"EXPORT_NAME": options.export_name}
604
605
ret += '''
606
if (!Module['expectedDataFileDownloads']) Module['expectedDataFileDownloads'] = 0;
607
Module['expectedDataFileDownloads']++;'''
608
609
if not options.export_es6:
610
ret += '''
611
(() => {'''
612
613
ret += '''
614
// Do not attempt to redownload the virtual filesystem data when in a pthread or a Wasm Worker context.
615
var isPthread = typeof ENVIRONMENT_IS_PTHREAD != 'undefined' && ENVIRONMENT_IS_PTHREAD;
616
var isWasmWorker = typeof ENVIRONMENT_IS_WASM_WORKER != 'undefined' && ENVIRONMENT_IS_WASM_WORKER;
617
if (isPthread || isWasmWorker) return;\n'''
618
619
if options.support_node:
620
ret += " var isNode = globalThis.process && globalThis.process.versions && globalThis.process.versions.node && globalThis.process.type != 'renderer';\n"
621
622
if options.support_node and options.export_es6:
623
ret += '''if (isNode) {
624
const { createRequire } = await import('node:module');
625
/** @suppress{duplicate} */
626
var require = createRequire(import.meta.url);
627
}\n'''
628
629
if options.export_es6:
630
ret += 'return new Promise((loadDataResolve, loadDataReject) => {\n'
631
ret += ' async function loadPackage(metadata) {\n'
632
633
code = '''
634
function assert(check, msg) {
635
if (!check) throw new Error(msg);
636
}\n'''
637
638
# Set up folders
639
partial_dirs = []
640
for file_ in data_files:
641
assert file_.mode == 'preload'
642
dirname = os.path.dirname(file_.dstpath)
643
dirname = dirname.lstrip('/') # absolute paths start with '/', remove that
644
if dirname != '':
645
parts = dirname.split('/')
646
for i in range(len(parts)):
647
partial = '/'.join(parts[:i + 1])
648
if partial not in partial_dirs:
649
code += ('''Module['FS_createPath'](%s, %s, true, true);\n'''
650
% (json.dumps('/' + '/'.join(parts[:i])), json.dumps(parts[i])))
651
partial_dirs.append(partial)
652
653
# Bundle all datafiles into one archive. Avoids doing lots of simultaneous
654
# XHRs which has overhead.
655
start = 0
656
with open(data_target, 'wb') as data:
657
for file_ in data_files:
658
file_.data_start = start
659
curr = utils.read_binary(file_.srcpath)
660
file_.data_end = start + len(curr)
661
start += len(curr)
662
data.write(curr)
663
664
if start > 256 * 1024 * 1024:
665
diagnostics.warn('file packager is creating an asset bundle of %d MB. '
666
'this is very large, and browsers might have trouble loading it. '
667
'see https://hacks.mozilla.org/2015/02/synchronous-execution-and-filesystem-access-in-emscripten/'
668
% (start / (1024 * 1024)))
669
670
create_preloaded = '''
671
try {
672
// canOwn this data in the filesystem, it is a slice into the heap that will never change
673
await Module['FS_preloadFile'](name, null, data, true, true, false, true);
674
Module['removeRunDependency'](`fp ${name}`);
675
} catch (e) {
676
err(`Preloading file ${name} failed`, e);
677
}\n'''
678
create_data = '''// canOwn this data in the filesystem, it is a slice into the heap that will never change
679
Module['FS_createDataFile'](name, null, data, true, true, true);
680
Module['removeRunDependency'](`fp ${name}`);'''
681
682
finish_handler = create_preloaded if options.use_preload_plugins else create_data
683
684
if not options.lz4:
685
# Data requests - for getting a block of data out of the big archive - have
686
# a similar API to XHRs
687
code += '''
688
for (var file of metadata['files']) {
689
var name = file['filename']
690
Module['addRunDependency'](`fp ${name}`);
691
}\n'''
692
693
catch_handler = ''
694
if options.export_es6:
695
catch_handler += '''
696
.catch((error) => {
697
loadDataReject(error);
698
})'''
699
700
for file_ in data_files:
701
filename = file_.dstpath
702
dirname = os.path.dirname(filename)
703
metadata['files'].append({
704
'filename': file_.dstpath,
705
'start': file_.data_start,
706
'end': file_.data_end,
707
})
708
709
if options.has_preloaded:
710
if not options.lz4:
711
# Get the big archive and split it up
712
use_data = '''// Reuse the bytearray from the XHR as the source for file reads.
713
for (var file of metadata['files']) {
714
var name = file['filename'];
715
var data = byteArray.subarray(file['start'], file['end']);
716
%s
717
}
718
Module['removeRunDependency']('datafile_%s');''' % (finish_handler,
719
js_manipulation.escape_for_js_string(data_target))
720
else:
721
# LZ4FS usage
722
temp = data_target + '.orig'
723
shutil.move(data_target, temp)
724
meta = shared.run_js_tool(utils.path_from_root('tools/lz4-compress.mjs'),
725
[temp, data_target], stdout=PIPE)
726
os.unlink(temp)
727
use_data = '''var compressedData = %s;
728
compressedData['data'] = byteArray;
729
assert(typeof Module['LZ4'] === 'object', 'LZ4 not present - was your app build with -sLZ4?');
730
Module['LZ4'].loadPackage({ 'metadata': metadata, 'compressedData': compressedData }, %s);
731
Module['removeRunDependency']('datafile_%s');''' % (meta, "true" if options.use_preload_plugins else "false", js_manipulation.escape_for_js_string(data_target))
732
733
if options.export_es6:
734
use_data += '\nloadDataResolve();'
735
736
package_name = data_target
737
remote_package_size = os.path.getsize(package_name)
738
remote_package_name = os.path.basename(package_name)
739
ret += '''
740
var PACKAGE_PATH = '';
741
if (typeof window === 'object') {
742
PACKAGE_PATH = window['encodeURIComponent'](window.location.pathname.substring(0, window.location.pathname.lastIndexOf('/')) + '/');
743
} else if (typeof process === 'undefined' && typeof location !== 'undefined') {
744
// web worker
745
PACKAGE_PATH = encodeURIComponent(location.pathname.substring(0, location.pathname.lastIndexOf('/')) + '/');
746
}
747
var PACKAGE_NAME = '%s';
748
var REMOTE_PACKAGE_BASE = '%s';
749
var REMOTE_PACKAGE_NAME = Module['locateFile'] ? Module['locateFile'](REMOTE_PACKAGE_BASE, '') : REMOTE_PACKAGE_BASE;\n''' % (js_manipulation.escape_for_js_string(data_target), js_manipulation.escape_for_js_string(remote_package_name))
750
metadata['remote_package_size'] = remote_package_size
751
ret += " var REMOTE_PACKAGE_SIZE = metadata['remote_package_size'];\n"
752
753
if options.use_preload_cache:
754
# Set the id to a hash of the preloaded data, so that caches survive over multiple builds
755
# if the data has not changed.
756
data = utils.read_binary(data_target)
757
package_uuid = 'sha256-' + hashlib.sha256(data).hexdigest()
758
metadata['package_uuid'] = str(package_uuid)
759
760
code += r'''
761
var PACKAGE_UUID = metadata['package_uuid'];
762
var IDB_RO = "readonly";
763
var IDB_RW = "readwrite";
764
var DB_NAME = "''' + options.indexeddb_name + '''";
765
var DB_VERSION = 1;
766
var METADATA_STORE_NAME = 'METADATA';
767
var PACKAGE_STORE_NAME = 'PACKAGES';
768
769
async function openDatabase() {
770
if (typeof indexedDB == 'undefined') {
771
throw new Error('using IndexedDB to cache data can only be done on a web page or in a web worker');
772
}
773
return new Promise((resolve, reject) => {
774
var openRequest = indexedDB.open(DB_NAME, DB_VERSION);
775
openRequest.onupgradeneeded = (event) => {
776
var db = /** @type {IDBDatabase} */ (event.target.result);
777
778
if (db.objectStoreNames.contains(PACKAGE_STORE_NAME)) {
779
db.deleteObjectStore(PACKAGE_STORE_NAME);
780
}
781
var packages = db.createObjectStore(PACKAGE_STORE_NAME);
782
783
if (db.objectStoreNames.contains(METADATA_STORE_NAME)) {
784
db.deleteObjectStore(METADATA_STORE_NAME);
785
}
786
var metadata = db.createObjectStore(METADATA_STORE_NAME);
787
};
788
openRequest.onsuccess = (event) => {
789
var db = /** @type {IDBDatabase} */ (event.target.result);
790
resolve(db);
791
};
792
openRequest.onerror = reject;
793
});
794
}
795
796
// This is needed as chromium has a limit on per-entry files in IndexedDB
797
// https://cs.chromium.org/chromium/src/content/renderer/indexed_db/webidbdatabase_impl.cc?type=cs&sq=package:chromium&g=0&l=177
798
// https://cs.chromium.org/chromium/src/out/Debug/gen/third_party/blink/public/mojom/indexeddb/indexeddb.mojom.h?type=cs&sq=package:chromium&g=0&l=60
799
// We set the chunk size to 64MB to stay well-below the limit
800
var CHUNK_SIZE = 64 * 1024 * 1024;
801
802
async function cacheRemotePackage(db, packageName, packageData, packageMeta) {
803
var transactionPackages = db.transaction([PACKAGE_STORE_NAME], IDB_RW);
804
var packages = transactionPackages.objectStore(PACKAGE_STORE_NAME);
805
var chunkSliceStart = 0;
806
var nextChunkSliceStart = 0;
807
var chunkCount = Math.ceil(packageData.byteLength / CHUNK_SIZE);
808
var finishedChunks = 0;
809
810
return new Promise((resolve, reject) => {
811
for (var chunkId = 0; chunkId < chunkCount; chunkId++) {
812
nextChunkSliceStart += CHUNK_SIZE;
813
var putPackageRequest = packages.put(
814
packageData.slice(chunkSliceStart, nextChunkSliceStart),
815
`package/${packageName}/${chunkId}`
816
);
817
chunkSliceStart = nextChunkSliceStart;
818
putPackageRequest.onsuccess = (event) => {
819
finishedChunks++;
820
if (finishedChunks == chunkCount) {
821
var transaction_metadata = db.transaction(
822
[METADATA_STORE_NAME],
823
IDB_RW
824
);
825
var metadata = transaction_metadata.objectStore(METADATA_STORE_NAME);
826
var putMetadataRequest = metadata.put(
827
{
828
'uuid': packageMeta.uuid,
829
'chunkCount': chunkCount
830
},
831
`metadata/${packageName}`
832
);
833
putMetadataRequest.onsuccess = (event) => resolve(packageData);
834
putMetadataRequest.onerror = reject;
835
}
836
};
837
putPackageRequest.onerror = reject;
838
}
839
});
840
}
841
842
/*
843
* Check if there's a cached package, and if so whether it's the latest available.
844
* Resolves to the cached metadata, or `null` if it is missing or out-of-date.
845
*/
846
async function checkCachedPackage(db, packageName) {
847
var transaction = db.transaction([METADATA_STORE_NAME], IDB_RO);
848
var metadata = transaction.objectStore(METADATA_STORE_NAME);
849
var getRequest = metadata.get(`metadata/${packageName}`);
850
return new Promise((resolve, reject) => {
851
getRequest.onsuccess = (event) => {
852
var result = event.target.result;
853
if (result && PACKAGE_UUID === result['uuid']) {
854
resolve(result);
855
} else {
856
resolve(null);
857
}
858
}
859
getRequest.onerror = reject;
860
});
861
}
862
863
async function fetchCachedPackage(db, packageName, metadata) {
864
var transaction = db.transaction([PACKAGE_STORE_NAME], IDB_RO);
865
var packages = transaction.objectStore(PACKAGE_STORE_NAME);
866
867
var chunksDone = 0;
868
var totalSize = 0;
869
var chunkCount = metadata['chunkCount'];
870
var chunks = new Array(chunkCount);
871
872
return new Promise((resolve, reject) => {
873
for (var chunkId = 0; chunkId < chunkCount; chunkId++) {
874
var getRequest = packages.get(`package/${packageName}/${chunkId}`);
875
getRequest.onsuccess = (event) => {
876
if (!event.target.result) {
877
reject(`CachedPackageNotFound for: ${packageName}`);
878
return;
879
}
880
// If there's only 1 chunk, there's nothing to concatenate it with so we can just return it now
881
if (chunkCount == 1) {
882
resolve(event.target.result);
883
} else {
884
chunksDone++;
885
totalSize += event.target.result.byteLength;
886
chunks.push(event.target.result);
887
if (chunksDone == chunkCount) {
888
if (chunksDone == 1) {
889
resolve(event.target.result);
890
} else {
891
var tempTyped = new Uint8Array(totalSize);
892
var byteOffset = 0;
893
for (var chunkId in chunks) {
894
var buffer = chunks[chunkId];
895
tempTyped.set(new Uint8Array(buffer), byteOffset);
896
byteOffset += buffer.byteLength;
897
buffer = undefined;
898
}
899
chunks = undefined;
900
resolve(tempTyped.buffer);
901
tempTyped = undefined;
902
}
903
}
904
}
905
};
906
getRequest.onerror = reject;
907
}
908
});
909
}\n'''
910
911
# add Node.js support code, if necessary
912
node_support_code = ''
913
if options.support_node:
914
node_support_code = '''
915
if (isNode) {
916
var contents = require('fs').readFileSync(packageName);
917
return new Uint8Array(contents).buffer;
918
}'''.strip()
919
920
ret += '''
921
async function fetchRemotePackage(packageName, packageSize) {
922
%(node_support_code)s
923
if (!Module['dataFileDownloads']) Module['dataFileDownloads'] = {};
924
try {
925
var response = await fetch(packageName);
926
} catch (e) {
927
throw new Error(`Network Error: ${packageName}`, {e});
928
}
929
if (!response.ok) {
930
throw new Error(`${response.status}: ${response.url}`);
931
}
932
933
const chunks = [];
934
const headers = response.headers;
935
const total = Number(headers.get('Content-Length') || packageSize);
936
let loaded = 0;
937
938
Module['setStatus'] && Module['setStatus']('Downloading data...');
939
const reader = response.body.getReader();
940
941
while (1) {
942
var {done, value} = await reader.read();
943
if (done) break;
944
chunks.push(value);
945
loaded += value.length;
946
Module['dataFileDownloads'][packageName] = {loaded, total};
947
948
let totalLoaded = 0;
949
let totalSize = 0;
950
951
for (const download of Object.values(Module['dataFileDownloads'])) {
952
totalLoaded += download.loaded;
953
totalSize += download.total;
954
}
955
956
Module['setStatus'] && Module['setStatus'](`Downloading data... (${totalLoaded}/${totalSize})`);
957
}
958
959
const packageData = new Uint8Array(chunks.map((c) => c.length).reduce((a, b) => a + b, 0));
960
let offset = 0;
961
for (const chunk of chunks) {
962
packageData.set(chunk, offset);
963
offset += chunk.length;
964
}
965
return packageData.buffer;
966
}\n''' % {'node_support_code': node_support_code}
967
968
code += '''
969
async function processPackageData(arrayBuffer) {
970
assert(arrayBuffer, 'Loading data file failed.');
971
assert(arrayBuffer.constructor.name === ArrayBuffer.name, 'bad input to processPackageData ' + arrayBuffer.constructor.name);
972
var byteArray = new Uint8Array(arrayBuffer);
973
var curr;
974
%s
975
}
976
Module['addRunDependency']('datafile_%s');\n''' % (use_data, js_manipulation.escape_for_js_string(data_target))
977
# use basename because from the browser's point of view,
978
# we need to find the datafile in the same dir as the html file
979
980
code += '''
981
if (!Module['preloadResults']) Module['preloadResults'] = {};\n'''
982
983
if options.use_preload_cache:
984
code += '''
985
async function preloadFallback(error) {
986
console.error(error);
987
console.error('falling back to default preload behavior');
988
processPackageData(await fetchRemotePackage(REMOTE_PACKAGE_NAME, REMOTE_PACKAGE_SIZE));
989
}
990
991
try {
992
var db = await openDatabase();
993
var pkgMetadata = await checkCachedPackage(db, PACKAGE_PATH + PACKAGE_NAME);
994
var useCached = !!pkgMetadata;
995
Module['preloadResults'][PACKAGE_NAME] = {fromCache: useCached};
996
if (useCached) {
997
processPackageData(await fetchCachedPackage(db, PACKAGE_PATH + PACKAGE_NAME, pkgMetadata));
998
} else {
999
var packageData = await fetchRemotePackage(REMOTE_PACKAGE_NAME, REMOTE_PACKAGE_SIZE);
1000
try {
1001
processPackageData(await cacheRemotePackage(db, PACKAGE_PATH + PACKAGE_NAME, packageData, {uuid:PACKAGE_UUID}))
1002
} catch (error) {
1003
console.error(error);
1004
processPackageData(packageData);
1005
}
1006
}
1007
} catch(e) {
1008
await preloadFallback(e)%s;
1009
}
1010
1011
Module['setStatus'] && Module['setStatus']('Downloading...');\n''' % catch_handler
1012
else:
1013
# Not using preload cache, so we might as well start the xhr ASAP,
1014
# potentially before JS parsing of the main codebase if it's after us.
1015
# Only tricky bit is the fetch is async, but also when runWithFS is called
1016
# is async, so we handle both orderings.
1017
ret += '''
1018
var fetchPromise;
1019
var fetched = Module['getPreloadedPackage'] && Module['getPreloadedPackage'](REMOTE_PACKAGE_NAME, REMOTE_PACKAGE_SIZE);
1020
1021
if (!fetched) {
1022
// Note that we don't use await here because we want to execute the
1023
// the rest of this function immediately.
1024
fetchPromise = fetchRemotePackage(REMOTE_PACKAGE_NAME, REMOTE_PACKAGE_SIZE)%s;
1025
}\n''' % catch_handler
1026
1027
code += '''
1028
Module['preloadResults'][PACKAGE_NAME] = {fromCache: false};
1029
if (!fetched) {
1030
fetched = await fetchPromise;
1031
}
1032
processPackageData(fetched);\n'''
1033
1034
ret += '''
1035
async function runWithFS(Module) {\n'''
1036
ret += code
1037
ret += '''
1038
}
1039
if (Module['calledRun']) {
1040
runWithFS(Module)%s;
1041
} else {
1042
if (!Module['preRun']) Module['preRun'] = [];
1043
Module['preRun'].push(runWithFS); // FS is not initialized yet, wait for it
1044
}\n''' % catch_handler
1045
1046
if options.separate_metadata:
1047
node_support_code = ''
1048
if options.support_node:
1049
node_support_code = '''
1050
if (isNode) {
1051
var contents = require('fs').readFileSync(metadataUrl, 'utf8');
1052
// The await here is needed, even though JSON.parse is a sync API. It works
1053
// around a issue with `removeRunDependency` otherwise being called to early
1054
// on the metadata object.
1055
var json = await JSON.parse(contents);
1056
return loadPackage(json);
1057
}'''.strip()
1058
1059
ret += '''
1060
Module['removeRunDependency']('%(metadata_file)s');
1061
}
1062
1063
async function runMetaWithFS() {
1064
Module['addRunDependency']('%(metadata_file)s');
1065
var metadataUrl = Module['locateFile'] ? Module['locateFile']('%(metadata_file)s', '') : '%(metadata_file)s';
1066
%(node_support_code)s
1067
var response = await fetch(metadataUrl);
1068
if (!response.ok) {
1069
throw new Error(`${response.status}: ${response.url}`);
1070
}
1071
var json = await response.json();
1072
await loadPackage(json);
1073
}
1074
1075
if (Module['calledRun']) {
1076
runMetaWithFS();
1077
} else {
1078
if (!Module['preRun']) Module['preRun'] = [];
1079
Module['preRun'].push(runMetaWithFS);
1080
}\n''' % {'node_support_code': node_support_code, 'metadata_file': os.path.basename(options.jsoutput + '.metadata')}
1081
else:
1082
ret += '''
1083
}
1084
loadPackage(%s);\n''' % json.dumps(metadata)
1085
1086
if options.export_es6:
1087
ret += '''
1088
});
1089
}
1090
// END the loadDataFile function
1091
'''
1092
else:
1093
ret += '''
1094
})();\n'''
1095
1096
return ret
1097
1098
1099
if __name__ == '__main__':
1100
sys.exit(main())
1101
1102