CoCalc -- file_packager.py

GitHub Repository: emscripten-core/emscripten
Path: blob/main/tools/file_packager.py
⁴¹²⁸ views
1
#!/usr/bin/env python3
2
# Copyright 2012 The Emscripten Authors.  All rights reserved.
3
# Emscripten is available under two separate licenses, the MIT license and the
4
# University of Illinois/NCSA Open Source License.  Both these licenses can be
5
# found in the LICENSE file.
6

7
"""A tool that generates FS API calls to generate a filesystem, and packages the files
8
to work with that.
9

10
This is called by emcc. You can also call it yourself.
11

12
You can split your files into "asset bundles", and create each bundle separately
13
with this tool. Then just include the generated js for each and they will load
14
the data and prepare it accordingly. This allows you to share assets and reduce
15
data downloads.
16

17
 * If you run this yourself, separately/standalone from emcc, then the main program
18
   compiled by emcc must be built with filesystem support. You can do that with
19
   -sFORCE_FILESYSTEM (if you forget that, an unoptimized build or one with
20
   ASSERTIONS enabled will show an error suggesting you use that flag).
21

22
Usage:
23

24
  file_packager TARGET [--preload A [B..]] [--embed C [D..]] [--exclude E [F..]] [--js-output=OUTPUT.js] [--no-force] [--use-preload-cache] [--indexedDB-name=EM_PRELOAD_CACHE] [--separate-metadata] [--lz4] [--use-preload-plugins] [--no-node] [--export-es6] [--help]
25

26
  --preload  ,
27
  --embed    See emcc --help for more details on those options.
28

29
  --exclude E [F..] Specifies filename pattern matches to use for excluding given files from being added to the package.
30
                    See https://docs.python.org/2/library/fnmatch.html for syntax.
31

32
  --from-emcc Indicate that `file_packager` was called from `emcc` and will be further processed by it, so some code generation can be skipped here
33

34
  --js-output=FILE Writes output in FILE, if not specified, standard output is used.
35

36
  --obj-output=FILE create an object file from embedded files, for direct linking into a wasm binary.
37

38
  --depfile=FILE Writes a dependency list containing the list of directories and files walked, compatible with Make, Ninja, CMake, etc.
39

40
  --wasm64 When used with `--obj-output` create a wasm64 object file
41

42
  --export-name=EXPORT_NAME Use custom export name (default is `Module`)
43

44
  --export-es6 Wrap generated code inside ES6 exported function
45

46
  --no-force Don't create output if no valid input file is specified.
47

48
  --use-preload-cache Stores package in IndexedDB so that subsequent loads don't need to do XHR. Checks package version.
49

50
  --indexedDB-name Use specified IndexedDB database name (Default: 'EM_PRELOAD_CACHE')
51

52
  --separate-metadata Stores package metadata separately. Only applicable when preloading and js-output file is specified.
53

54
  --lz4 Uses LZ4. This compresses the data using LZ4 when this utility is run, then the client decompresses chunks on the fly, avoiding storing
55
        the entire decompressed data in memory at once. See LZ4 in src/settings.js, you must build the main program with that flag.
56

57
  --use-preload-plugins Tells the file packager to run preload plugins on the files as they are loaded. This performs tasks like decoding images
58
                        and audio using the browser's codecs.
59

60
  --no-node Whether to support Node.js. By default we do, which emits some extra code.
61

62
  --quiet Suppress reminder about using `FORCE_FILESYSTEM`
63

64
Notes:
65

66
  * The file packager generates unix-style file paths. So if you are on windows and a file is accessed at
67
    subdir\file, in JS it will be subdir/file. For simplicity we treat the web platform as a *NIX.
68
"""
69

70
import base64
71
import ctypes
72
import fnmatch
73
import hashlib
74
import json
75
import os
76
import posixpath
77
import shutil
78
import sys
79
from subprocess import PIPE
80
from textwrap import dedent
81
from typing import List
82

83
__scriptdir__ = os.path.dirname(os.path.abspath(__file__))
84
__rootdir__ = os.path.dirname(__scriptdir__)
85
sys.path.insert(0, __rootdir__)
86

87
from tools import shared, utils, js_manipulation, diagnostics
88
from tools.response_file import substitute_response_files
89

90

91
DEBUG = os.environ.get('EMCC_DEBUG')
92

93
excluded_patterns: List[str] = []
94
new_data_files = []
95
walked = []
96

97

98
class Options:
99
  def __init__(self):
100
    self.export_name = 'Module'
101
    self.has_preloaded = False
102
    self.has_embedded = False
103
    self.jsoutput = None
104
    self.obj_output = None
105
    self.depfile = None
106
    self.from_emcc = False
107
    self.quiet = False
108
    self.force = True
109
    # If set to True, IndexedDB (IDBFS in library_idbfs.js) is used to locally
110
    # cache VFS XHR so that subsequent page loads can read the data from the
111
    # offline cache instead.
112
    self.use_preload_cache = False
113
    self.indexeddb_name = 'EM_PRELOAD_CACHE'
114
    # If set to True, the package metadata is stored separately from js-output
115
    # file which makes js-output file immutable to the package content changes.
116
    # If set to False, the package metadata is stored inside the js-output file
117
    # which makes js-output file to mutate on each invocation of this packager tool.
118
    self.separate_metadata = False
119
    self.lz4 = False
120
    self.use_preload_plugins = False
121
    self.support_node = True
122
    self.wasm64 = False
123
    self.export_es6 = False
124

125

126
class DataFile:
127
  def __init__(self, srcpath, dstpath, mode, explicit_dst_path):
128
    self.srcpath = srcpath
129
    self.dstpath = dstpath
130
    self.mode = mode
131
    self.explicit_dst_path = explicit_dst_path
132

133

134
options = Options()
135

136

137
def err(*args):
138
  print(*args, file=sys.stderr)
139

140

141
def base64_encode(b):
142
  b64 = base64.b64encode(b)
143
  return b64.decode('ascii')
144

145

146
def has_hidden_attribute(filepath):
147
  """Win32 code to test whether the given file has the hidden property set."""
148

149
  if sys.platform != 'win32':
150
    return False
151

152
  try:
153
    attrs = ctypes.windll.kernel32.GetFileAttributesW(filepath)
154
    assert attrs != -1
155
    result = bool(attrs & 2)
156
  except Exception:
157
    result = False
158
  return result
159

160

161
def should_ignore(fullname):
162
  """The packager should never preload/embed files if the file
163
  is hidden (Win32) or it matches any pattern specified in --exclude"""
164
  if has_hidden_attribute(fullname):
165
    return True
166

167
  return any(fnmatch.fnmatch(fullname, p) for p in excluded_patterns)
168

169

170
def add(mode, rootpathsrc, rootpathdst):
171
  """Expand directories into individual files
172

173
  rootpathsrc: The path name of the root directory on the local FS we are
174
               adding to emscripten virtual FS.
175
  rootpathdst: The name we want to make the source path available on the
176
               emscripten virtual FS.
177
  """
178
  walked.append(rootpathsrc)
179
  for dirpath, dirnames, filenames in os.walk(rootpathsrc):
180
    new_dirnames = []
181
    for name in dirnames:
182
      fullname = os.path.join(dirpath, name)
183
      if not should_ignore(fullname):
184
        walked.append(fullname)
185
        new_dirnames.append(name)
186
      elif DEBUG:
187
        err('Skipping directory "%s" from inclusion in the emscripten '
188
            'virtual file system.' % fullname)
189
    for name in filenames:
190
      fullname = os.path.join(dirpath, name)
191
      if not should_ignore(fullname):
192
        walked.append(fullname)
193
        # Convert source filename relative to root directory of target FS.
194
        dstpath = os.path.join(rootpathdst,
195
                               os.path.relpath(fullname, rootpathsrc))
196
        new_data_files.append(DataFile(srcpath=fullname, dstpath=dstpath,
197
                                       mode=mode, explicit_dst_path=True))
198
      elif DEBUG:
199
        err('Skipping file "%s" from inclusion in the emscripten '
200
            'virtual file system.' % fullname)
201
    dirnames.clear()
202
    dirnames.extend(new_dirnames)
203

204

205
def to_asm_string(string):
206
  """Convert a python string to string suitable for including in an
207
  assembly file using the `.asciz` directive.
208

209
  The result will be an UTF-8 encoded string in the data section.
210
  """
211
  # See MCAsmStreamer::PrintQuotedString in llvm/lib/MC/MCAsmStreamer.cpp
212
  # And isPrint in llvm/include/llvm/ADT/StringExtras.h
213

214
  def is_print(c):
215
    return c >= 0x20 and c <= 0x7E
216

217
  def escape(c):
218
    if is_print(c):
219
      return chr(c)
220
    escape_chars = {
221
      '\b': '\\b',
222
      '\f': '\\f',
223
      '\n': '\\n',
224
      '\r': '\\r',
225
      '\t': '\\t',
226
    }
227
    if c in escape_chars:
228
      return escape_chars[c]
229
    # Enscode all other chars are three octal digits(!)
230
    return '\\%s%s%s' % (oct(c >> 6), oct(c >> 3), oct(c >> 0))
231

232
  return ''.join(escape(c) for c in string.encode('utf-8'))
233

234

235
def to_c_symbol(filename, used):
236
  """Convert a filename (python string) to a legal C symbols, avoiding collisions."""
237
  def escape(c):
238
     if c.isalnum():
239
       return c
240
     else:
241
       return '_'
242
  c_symbol = ''.join(escape(c) for c in filename)
243
  # Handle collisions
244
  if c_symbol in used:
245
    counter = 2
246
    while c_symbol + str(counter) in used:
247
      counter = counter + 1
248
    c_symbol = c_symbol + str(counter)
249
  used.add(c_symbol)
250
  return c_symbol
251

252

253
def generate_object_file(data_files):
254
  embed_files = [f for f in data_files if f.mode == 'embed']
255
  assert embed_files
256

257
  asm_file = shared.replace_suffix(options.obj_output, '.s')
258

259
  used = set()
260
  for f in embed_files:
261
    f.c_symbol_name = '__em_file_data_%s' % to_c_symbol(f.dstpath, used)
262

263
  with open(asm_file, 'w') as out:
264
    out.write('# Emscripten embedded file data, generated by tools/file_packager.py\n')
265

266
    for f in embed_files:
267
      if DEBUG:
268
        err('embedding %s at %s' % (f.srcpath, f.dstpath))
269

270
      size = os.path.getsize(f.srcpath)
271
      dstpath = to_asm_string(f.dstpath)
272
      srcpath = utils.normalize_path(f.srcpath)
273
      out.write(dedent(f'''
274
      .section .rodata.{f.c_symbol_name},"",@
275

276
      # The name of file
277
      {f.c_symbol_name}_name:
278
      .asciz "{dstpath}"
279
      .size {f.c_symbol_name}_name, {len(dstpath) + 1}
280

281
      # The size of the file followed by the content itself
282
      {f.c_symbol_name}:
283
      .incbin "{srcpath}"
284
      .size {f.c_symbol_name}, {size}
285
      '''))
286

287
    if options.wasm64:
288
      align = 3
289
      ptr_type = 'i64'
290
      bits = 64
291
    else:
292
      align = 2
293
      ptr_type = 'i32'
294
      bits = 32
295
    out.write(dedent(f'''
296
      .functype _emscripten_fs_load_embedded_files ({ptr_type}) -> ()
297
      .section .text,"",@
298
      init_file_data:
299
        .functype init_file_data () -> ()
300
        global.get __emscripten_embedded_file_data@GOT
301
        call _emscripten_fs_load_embedded_files
302
        end_function
303

304
      # Run init_file_data on startup.
305
      # See system/lib/README.md for ordering of system constructors.
306
      .section .init_array.49,"",@
307
      .p2align {align}
308
      .int{bits} init_file_data
309

310
      # A list of triples of:
311
      # (file_name_ptr, file_data_size, file_data_ptr)
312
      # The list in null terminate with a single 0
313
      .section .rodata.__emscripten_embedded_file_data,"",@
314
      __emscripten_embedded_file_data:
315
      .p2align {align}
316
      '''))
317

318
    for f in embed_files:
319
      # The `.dc.a` directive gives us a pointer (address) sized entry.
320
      # See https://sourceware.org/binutils/docs/as/Dc.html
321
      out.write(dedent(f'''\
322
        .p2align %s
323
        .dc.a {f.c_symbol_name}_name
324
        .p2align %s
325
        .int32 {os.path.getsize(f.srcpath)}
326
        .p2align %s
327
        .dc.a {f.c_symbol_name}
328
        ''' % (align, align, align)))
329

330
    ptr_size = 4
331
    elem_size = (2 * ptr_size) + 4
332
    total_size = len(embed_files) * elem_size + 4
333
    out.write(dedent(f'''\
334
      .dc.a 0
335
      .size __emscripten_embedded_file_data, {total_size}
336
      '''))
337
  cmd = [shared.EMCC, '-c', asm_file, '-o', options.obj_output]
338
  if options.wasm64:
339
    target = 'wasm64-unknown-emscripten'
340
    cmd.append('-Wno-experimental')
341
  else:
342
    target = 'wasm32-unknown-emscripten'
343
  cmd.append('--target=' + target)
344
  shared.check_call(cmd)
345

346

347
def main():  # noqa: C901, PLR0912, PLR0915
348
  """Future modifications should consider refactoring to reduce complexity.
349

350
  * The McCabe cyclomatiic complexity is currently 60 vs 10 recommended.
351
  * There are currently 63 branches vs 12 recommended.
352
  * There are currently 151 statements vs 50 recommended.
353

354
  To revalidate these numbers, run `ruff check --select=C901,PLR091`.
355
  """
356
  if len(sys.argv) == 1:
357
    err('''Usage: file_packager TARGET [--preload A [B..]] [--embed C [D..]] [--exclude E [F..]] [--js-output=OUTPUT.js] [--no-force] [--use-preload-cache] [--indexedDB-name=EM_PRELOAD_CACHE] [--separate-metadata] [--lz4] [--use-preload-plugins] [--no-node] [--export-es6] [--help]
358
  Try 'file_packager --help' for more details.''')
359
    return 1
360

361
  # read response files very early on
362
  try:
363
    args = substitute_response_files(sys.argv[1:])
364
  except OSError as e:
365
    shared.exit_with_error(e)
366

367
  if '--help' in args:
368
    print(__doc__.strip())
369
    return 0
370

371
  data_target = args[0]
372
  data_files = []
373
  leading = ''
374

375
  for arg in args[1:]:
376
    if arg == '--preload':
377
      leading = 'preload'
378
    elif arg == '--embed':
379
      leading = 'embed'
380
    elif arg == '--exclude':
381
      leading = 'exclude'
382
    elif arg == '--no-force':
383
      options.force = False
384
      leading = ''
385
    elif arg == '--export-es6':
386
      options.export_es6 = True
387
      leading = ''
388
    elif arg == '--use-preload-cache':
389
      options.use_preload_cache = True
390
      leading = ''
391
    elif arg.startswith('--indexedDB-name'):
392
      options.indexeddb_name = arg.split('=', 1)[1] if '=' in arg else None
393
      leading = ''
394
    elif arg == '--no-heap-copy':
395
      diagnostics.warn('ignoring legacy flag --no-heap-copy (that is the only mode supported now)')
396
      leading = ''
397
    elif arg == '--separate-metadata':
398
      options.separate_metadata = True
399
      leading = ''
400
    elif arg == '--lz4':
401
      options.lz4 = True
402
      leading = ''
403
    elif arg == '--use-preload-plugins':
404
      options.use_preload_plugins = True
405
      leading = ''
406
    elif arg == '--no-node':
407
      options.support_node = False
408
      leading = ''
409
    elif arg.startswith('--js-output'):
410
      options.jsoutput = arg.split('=', 1)[1] if '=' in arg else None
411
      leading = ''
412
    elif arg.startswith('--obj-output'):
413
      options.obj_output = arg.split('=', 1)[1] if '=' in arg else None
414
      leading = ''
415
    elif arg.startswith('--depfile'):
416
      options.depfile = arg.split('=', 1)[1] if '=' in arg else None
417
      leading = ''
418
    elif arg == '--wasm64':
419
      options.wasm64 = True
420
    elif arg.startswith('--export-name'):
421
      if '=' in arg:
422
        options.export_name = arg.split('=', 1)[1]
423
      leading = ''
424
    elif arg == '--from-emcc':
425
      options.from_emcc = True
426
      leading = ''
427
    elif arg == '--quiet':
428
      options.quiet = True
429
    elif leading in {'preload', 'embed'}:
430
      mode = leading
431
      # position of @ if we're doing 'src@dst'. '__' is used to keep the index
432
      # same with the original if they escaped with '@@'.
433
      at_position = arg.replace('@@', '__').find('@')
434
      # '@@' in input string means there is an actual @ character, a single '@'
435
      # means the 'src@dst' notation.
436
      uses_at_notation = (at_position != -1)
437

438
      if uses_at_notation:
439
        srcpath = arg[0:at_position].replace('@@', '@') # split around the @
440
        dstpath = arg[at_position + 1:].replace('@@', '@')
441
      else:
442
        # Use source path as destination path.
443
        srcpath = dstpath = arg.replace('@@', '@')
444
      if os.path.isfile(srcpath) or os.path.isdir(srcpath):
445
        data_files.append(DataFile(srcpath=srcpath, dstpath=dstpath, mode=mode,
446
                                   explicit_dst_path=uses_at_notation))
447
      else:
448
        diagnostics.error(f'${arg} does not exist')
449
    elif leading == 'exclude':
450
      excluded_patterns.append(arg)
451
    else:
452
      diagnostics.error('Unknown parameter:', arg)
453

454
  options.has_preloaded = any(f.mode == 'preload' for f in data_files)
455
  options.has_embedded = any(f.mode == 'embed' for f in data_files)
456

457
  if options.has_preloaded and options.has_embedded:
458
    diagnostics.warn('support for using --preload and --embed in the same command is scheduled '
459
        'for deprecation.  If you need this feature please comment at '
460
        'https://github.com/emscripten-core/emscripten/issues/24803')
461

462
  if options.separate_metadata and (not options.has_preloaded or not options.jsoutput):
463
    diagnostics.error('cannot separate-metadata without both --preloaded files and a specified --js-output')
464

465
  if not options.from_emcc and not options.quiet:
466
    diagnostics.warn('Remember to build the main file with `-sFORCE_FILESYSTEM` '
467
        'so that it includes support for loading this file package')
468

469
  if options.jsoutput and os.path.abspath(options.jsoutput) == os.path.abspath(data_target):
470
    diagnostics.error('TARGET should not be the same value of --js-output')
471

472
  if options.from_emcc and options.export_es6:
473
    diagnostics.error("Can't use --export-es6 option together with --from-emcc since the code should be embedded within emcc's code")
474

475
  walked.append(__file__)
476
  for file_ in data_files:
477
    if not should_ignore(file_.srcpath):
478
      if os.path.isdir(file_.srcpath):
479
        add(file_.mode, file_.srcpath, file_.dstpath)
480
      else:
481
        walked.append(file_.srcpath)
482
        new_data_files.append(file_)
483
  data_files = [file_ for file_ in new_data_files
484
                if not os.path.isdir(file_.srcpath)]
485
  if len(data_files) == 0:
486
    diagnostics.error('Nothing to do!')
487

488
  # Absolutize paths, and check that they make sense
489
  # os.getcwd() always returns the hard path with any symbolic links resolved,
490
  # even if we cd'd into a symbolic link.
491
  curr_abspath = os.path.abspath(os.getcwd())
492

493
  for file_ in data_files:
494
    if not file_.explicit_dst_path:
495
      # This file was not defined with src@dst, so we inferred the destination
496
      # from the source. In that case, we require that the destination be
497
      # within the current working directory.
498
      path = file_.dstpath
499
      # Use os.path.realpath to resolve any symbolic links to hard paths,
500
      # to match the structure in curr_abspath.
501
      abspath = os.path.realpath(os.path.abspath(path))
502
      if DEBUG:
503
        err(path, abspath, curr_abspath)
504
      if not abspath.startswith(curr_abspath):
505
        err('Error: Embedding "%s" which is not contained within the current directory '
506
            '"%s".  This is invalid since the current directory becomes the '
507
            'root that the generated code will see.  To include files outside of the current '
508
            'working directory you can use the `--preload-file srcpath@dstpath` syntax to '
509
            'explicitly specify the target location.' % (path, curr_abspath))
510
        sys.exit(1)
511
      file_.dstpath = abspath[len(curr_abspath) + 1:]
512
      if os.path.isabs(path):
513
        diagnostics.warn('Embedding an absolute file/directory name "%s" to the '
514
            'virtual filesystem. The file will be made available in the '
515
            'relative path "%s". You can use the `--preload-file srcpath@dstpath` '
516
            'syntax to explicitly specify the target location the absolute source '
517
            'path should be directed to.' % (path, file_.dstpath))
518

519
  for file_ in data_files:
520
    # name in the filesystem, native and emulated
521
    file_.dstpath = utils.normalize_path(file_.dstpath)
522
    # If user has submitted a directory name as the destination but omitted
523
    # the destination filename, use the filename from source file
524
    if file_.dstpath.endswith('/'):
525
      file_.dstpath = file_.dstpath + os.path.basename(file_.srcpath)
526
    # make destination path always relative to the root
527
    file_.dstpath = posixpath.normpath(os.path.join('/', file_.dstpath))
528
    if DEBUG:
529
      err('Packaging file "%s" to VFS in path "%s".' % (file_.srcpath,  file_.dstpath))
530

531
  # Remove duplicates (can occur naively, for example preload dir/, preload dir/subdir/)
532
  seen = set()
533

534
  def was_seen(name):
535
    if name in seen:
536
      return True
537
    seen.add(name)
538
    return False
539

540
  # The files are sorted by the dstpath to make the order of files reproducible
541
  # across file systems / operating systems (os.walk does not produce the same
542
  # file order on different file systems / operating systems)
543
  data_files = sorted(data_files, key=lambda file_: file_.dstpath)
544
  data_files = [file_ for file_ in data_files if not was_seen(file_.dstpath)]
545

546
  metadata = {'files': []}
547

548
  if options.depfile:
549
    targets = []
550
    if options.obj_output:
551
      targets.append(options.obj_output)
552
    if options.jsoutput:
553
      targets.append(data_target)
554
      targets.append(options.jsoutput)
555
    with open(options.depfile, 'w') as f:
556
      for target in targets:
557
        if target:
558
          f.write(escape_for_makefile(target))
559
          f.write(' \\\n')
560
      f.write(': \\\n')
561
      for dependency in walked:
562
        f.write(escape_for_makefile(dependency))
563
        f.write(' \\\n')
564

565
  if options.obj_output:
566
    if not options.has_embedded:
567
      diagnostics.error('--obj-output is only applicable when embedding files')
568
    generate_object_file(data_files)
569
    if not options.has_preloaded:
570
      return 0
571

572
  ret = generate_js(data_target, data_files, metadata)
573

574
  if options.force or len(data_files):
575
    if options.jsoutput is None:
576
      print(ret)
577
    else:
578
      # Overwrite the old jsoutput file (if exists) only when its content
579
      # differs from the current generated one, otherwise leave the file
580
      # untouched preserving its old timestamp
581
      if os.path.isfile(options.jsoutput):
582
        old = utils.read_file(options.jsoutput)
583
        if old != ret:
584
          utils.write_file(options.jsoutput, ret)
585
      else:
586
        utils.write_file(options.jsoutput, ret)
587
      if options.separate_metadata:
588
        utils.write_file(options.jsoutput + '.metadata', json.dumps(metadata, separators=(',', ':')))
589

590
  return 0
591

592

593
def escape_for_makefile(fpath):
594
  # Escapes for CMake's "pathname" grammar as described here:
595
  #   https://cmake.org/cmake/help/latest/command/add_custom_command.html#grammar-token-depfile-pathname
596
  # Which is congruent with how Ninja and GNU Make expect characters escaped.
597
  fpath = utils.normalize_path(fpath)
598
  return fpath.replace('$', '$$').replace('#', '\\#').replace(' ', '\\ ')
599

600

601
def generate_js(data_target, data_files, metadata):
602
  # emcc will add this to the output itself, so it is only needed for
603
  # standalone calls
604
  if options.from_emcc:
605
    ret = ''
606
  else:
607
    if options.export_es6:
608
      ret = 'export default async function loadDataFile(Module) {\n'
609
    else:
610
      ret = '''
611
  var Module = typeof %(EXPORT_NAME)s != 'undefined' ? %(EXPORT_NAME)s : {};\n''' % {"EXPORT_NAME": options.export_name}
612

613
  ret += '''
614
  Module['expectedDataFileDownloads'] ??= 0;
615
  Module['expectedDataFileDownloads']++;'''
616

617
  if not options.export_es6:
618
    ret += '''
619
  (() => {'''
620

621
  ret += '''
622
    // Do not attempt to redownload the virtual filesystem data when in a pthread or a Wasm Worker context.
623
    var isPthread = typeof ENVIRONMENT_IS_PTHREAD != 'undefined' && ENVIRONMENT_IS_PTHREAD;
624
    var isWasmWorker = typeof ENVIRONMENT_IS_WASM_WORKER != 'undefined' && ENVIRONMENT_IS_WASM_WORKER;
625
    if (isPthread || isWasmWorker) return;\n'''
626

627
  if options.support_node:
628
    ret += "    var isNode = typeof process === 'object' && typeof process.versions === 'object' && typeof process.versions.node === 'string';\n"
629

630
  if options.support_node and options.export_es6:
631
        ret += '''if (isNode) {
632
    const { createRequire } = await import('module');
633
    /** @suppress{duplicate} */
634
    var require = createRequire(import.meta.url);
635
  }\n'''
636

637
  if options.export_es6:
638
    ret += 'return new Promise((loadDataResolve, loadDataReject) => {\n'
639
  ret += '    async function loadPackage(metadata) {\n'
640

641
  code = '''
642
      function assert(check, msg) {
643
        if (!check) throw new Error(msg);
644
      }\n'''
645

646
  # Set up folders
647
  partial_dirs = []
648
  for file_ in data_files:
649
    dirname = os.path.dirname(file_.dstpath)
650
    dirname = dirname.lstrip('/') # absolute paths start with '/', remove that
651
    if dirname != '':
652
      parts = dirname.split('/')
653
      for i in range(len(parts)):
654
        partial = '/'.join(parts[:i + 1])
655
        if partial not in partial_dirs:
656
          code += ('''Module['FS_createPath'](%s, %s, true, true);\n'''
657
                   % (json.dumps('/' + '/'.join(parts[:i])), json.dumps(parts[i])))
658
          partial_dirs.append(partial)
659

660
  if options.has_preloaded:
661
    # Bundle all datafiles into one archive. Avoids doing lots of simultaneous
662
    # XHRs which has overhead.
663
    start = 0
664
    with open(data_target, 'wb') as data:
665
      for file_ in data_files:
666
        file_.data_start = start
667
        curr = utils.read_binary(file_.srcpath)
668
        file_.data_end = start + len(curr)
669
        start += len(curr)
670
        data.write(curr)
671

672
    if start > 256 * 1024 * 1024:
673
      diagnostics.warn('file packager is creating an asset bundle of %d MB. '
674
          'this is very large, and browsers might have trouble loading it. '
675
          'see https://hacks.mozilla.org/2015/02/synchronous-execution-and-filesystem-access-in-emscripten/'
676
          % (start / (1024 * 1024)))
677

678
    create_preloaded = '''
679
          try {
680
            // canOwn this data in the filesystem, it is a slice into the heap that will never change
681
            await Module['FS_preloadFile'](name, null, data, true, true, false, true);
682
            Module['removeRunDependency'](`fp ${name}`);
683
          } catch (e) {
684
            err(`Preloading file ${name} failed`);
685
          }\n'''
686
    create_data = '''// canOwn this data in the filesystem, it is a slice into the heap that will never change
687
          Module['FS_createDataFile'](name, null, data, true, true, true);
688
          Module['removeRunDependency'](`fp ${name}`);'''
689

690
    finish_handler = create_preloaded if options.use_preload_plugins else create_data
691

692
    if not options.lz4:
693
      # Data requests - for getting a block of data out of the big archive - have
694
      # a similar API to XHRs
695
      code += '''
696
      for (var file of metadata['files']) {
697
        var name = file['filename']
698
        Module['addRunDependency'](`fp ${name}`);
699
      }\n'''
700

701
  if options.has_embedded and not options.obj_output:
702
    diagnostics.warn('--obj-output is recommended when using --embed.  This outputs an object file for linking directly into your application is more efficient than JS encoding')
703

704
  catch_handler = ''
705
  if options.export_es6:
706
    catch_handler += '''
707
        .catch((error) => {
708
          loadDataReject(error);
709
        })'''
710

711
  for counter, file_ in enumerate(data_files):
712
    filename = file_.dstpath
713
    dirname = os.path.dirname(filename)
714
    basename = os.path.basename(filename)
715
    if file_.mode == 'embed':
716
      if not options.obj_output:
717
        # Embed (only needed when not generating object file output)
718
        data = base64_encode(utils.read_binary(file_.srcpath))
719
        code += "      var fileData%d = '%s';\n" % (counter, data)
720
        # canOwn this data in the filesystem (i.e. there is no need to create a copy in the FS layer).
721
        code += ("      Module['FS_createDataFile']('%s', '%s', atob(fileData%d), true, true, true);\n"
722
                 % (dirname, basename, counter))
723
    elif file_.mode == 'preload':
724
      # Preload
725
      metadata['files'].append({
726
        'filename': file_.dstpath,
727
        'start': file_.data_start,
728
        'end': file_.data_end,
729
      })
730
    else:
731
      assert 0
732

733
  if options.has_preloaded:
734
    if not options.lz4:
735
      # Get the big archive and split it up
736
      use_data = '''// Reuse the bytearray from the XHR as the source for file reads.
737
          for (var file of metadata['files']) {
738
            var name = file['filename'];
739
            var data = byteArray.subarray(file['start'], file['end']);
740
            %s
741
          }
742
          Module['removeRunDependency']('datafile_%s');''' % (finish_handler,
743
                                                              js_manipulation.escape_for_js_string(data_target))
744
    else:
745
      # LZ4FS usage
746
      temp = data_target + '.orig'
747
      shutil.move(data_target, temp)
748
      meta = shared.run_js_tool(utils.path_from_root('tools/lz4-compress.mjs'),
749
                                [temp, data_target], stdout=PIPE)
750
      os.unlink(temp)
751
      use_data = '''var compressedData = %s;
752
            compressedData['data'] = byteArray;
753
            assert(typeof Module['LZ4'] === 'object', 'LZ4 not present - was your app build with -sLZ4?');
754
            Module['LZ4'].loadPackage({ 'metadata': metadata, 'compressedData': compressedData }, %s);
755
            Module['removeRunDependency']('datafile_%s');''' % (meta, "true" if options.use_preload_plugins else "false", js_manipulation.escape_for_js_string(data_target))
756

757
    if options.export_es6:
758
      use_data += '\nloadDataResolve();'
759

760
    package_name = data_target
761
    remote_package_size = os.path.getsize(package_name)
762
    remote_package_name = os.path.basename(package_name)
763
    ret += '''
764
      var PACKAGE_PATH = '';
765
      if (typeof window === 'object') {
766
        PACKAGE_PATH = window['encodeURIComponent'](window.location.pathname.substring(0, window.location.pathname.lastIndexOf('/')) + '/');
767
      } else if (typeof process === 'undefined' && typeof location !== 'undefined') {
768
        // web worker
769
        PACKAGE_PATH = encodeURIComponent(location.pathname.substring(0, location.pathname.lastIndexOf('/')) + '/');
770
      }
771
      var PACKAGE_NAME = '%s';
772
      var REMOTE_PACKAGE_BASE = '%s';
773
      var REMOTE_PACKAGE_NAME = Module['locateFile']?.(REMOTE_PACKAGE_BASE, '') ?? REMOTE_PACKAGE_BASE;\n''' % (js_manipulation.escape_for_js_string(data_target), js_manipulation.escape_for_js_string(remote_package_name))
774
    metadata['remote_package_size'] = remote_package_size
775
    ret += "      var REMOTE_PACKAGE_SIZE = metadata['remote_package_size'];\n"
776

777
    if options.use_preload_cache:
778
      # Set the id to a hash of the preloaded data, so that caches survive over multiple builds
779
      # if the data has not changed.
780
      data = utils.read_binary(data_target)
781
      package_uuid = 'sha256-' + hashlib.sha256(data).hexdigest()
782
      metadata['package_uuid'] = str(package_uuid)
783

784
      code += r'''
785
        var PACKAGE_UUID = metadata['package_uuid'];
786
        var IDB_RO = "readonly";
787
        var IDB_RW = "readwrite";
788
        var DB_NAME = "''' + options.indexeddb_name + '''";
789
        var DB_VERSION = 1;
790
        var METADATA_STORE_NAME = 'METADATA';
791
        var PACKAGE_STORE_NAME = 'PACKAGES';
792

793
        async function openDatabase() {
794
          if (typeof indexedDB == 'undefined') {
795
            throw new Error('using IndexedDB to cache data can only be done on a web page or in a web worker');
796
          }
797
          return new Promise((resolve, reject) => {
798
            var openRequest = indexedDB.open(DB_NAME, DB_VERSION);
799
            openRequest.onupgradeneeded = (event) => {
800
              var db = /** @type {IDBDatabase} */ (event.target.result);
801

802
              if (db.objectStoreNames.contains(PACKAGE_STORE_NAME)) {
803
                db.deleteObjectStore(PACKAGE_STORE_NAME);
804
              }
805
              var packages = db.createObjectStore(PACKAGE_STORE_NAME);
806

807
              if (db.objectStoreNames.contains(METADATA_STORE_NAME)) {
808
                db.deleteObjectStore(METADATA_STORE_NAME);
809
              }
810
              var metadata = db.createObjectStore(METADATA_STORE_NAME);
811
            };
812
            openRequest.onsuccess = (event) => {
813
              var db = /** @type {IDBDatabase} */ (event.target.result);
814
              resolve(db);
815
            };
816
            openRequest.onerror = reject;
817
          });
818
        }
819

820
        // This is needed as chromium has a limit on per-entry files in IndexedDB
821
        // https://cs.chromium.org/chromium/src/content/renderer/indexed_db/webidbdatabase_impl.cc?type=cs&sq=package:chromium&g=0&l=177
822
        // https://cs.chromium.org/chromium/src/out/Debug/gen/third_party/blink/public/mojom/indexeddb/indexeddb.mojom.h?type=cs&sq=package:chromium&g=0&l=60
823
        // We set the chunk size to 64MB to stay well-below the limit
824
        var CHUNK_SIZE = 64 * 1024 * 1024;
825

826
        async function cacheRemotePackage(db, packageName, packageData, packageMeta) {
827
          var transactionPackages = db.transaction([PACKAGE_STORE_NAME], IDB_RW);
828
          var packages = transactionPackages.objectStore(PACKAGE_STORE_NAME);
829
          var chunkSliceStart = 0;
830
          var nextChunkSliceStart = 0;
831
          var chunkCount = Math.ceil(packageData.byteLength / CHUNK_SIZE);
832
          var finishedChunks = 0;
833

834
          return new Promise((resolve, reject) => {
835
            for (var chunkId = 0; chunkId < chunkCount; chunkId++) {
836
              nextChunkSliceStart += CHUNK_SIZE;
837
              var putPackageRequest = packages.put(
838
                packageData.slice(chunkSliceStart, nextChunkSliceStart),
839
                `package/${packageName}/${chunkId}`
840
              );
841
              chunkSliceStart = nextChunkSliceStart;
842
              putPackageRequest.onsuccess = (event) => {
843
                finishedChunks++;
844
                if (finishedChunks == chunkCount) {
845
                  var transaction_metadata = db.transaction(
846
                    [METADATA_STORE_NAME],
847
                    IDB_RW
848
                  );
849
                  var metadata = transaction_metadata.objectStore(METADATA_STORE_NAME);
850
                  var putMetadataRequest = metadata.put(
851
                    {
852
                      'uuid': packageMeta.uuid,
853
                      'chunkCount': chunkCount
854
                    },
855
                    `metadata/${packageName}`
856
                  );
857
                  putMetadataRequest.onsuccess = (event) => resolve(packageData);
858
                  putMetadataRequest.onerror = reject;
859
                }
860
              };
861
              putPackageRequest.onerror = reject;
862
            }
863
          });
864
        }
865

866
        /*
867
         * Check if there's a cached package, and if so whether it's the latest available.
868
         * Resolves to the cached metadata, or `null` if it is missing or out-of-date.
869
         */
870
        async function checkCachedPackage(db, packageName) {
871
          var transaction = db.transaction([METADATA_STORE_NAME], IDB_RO);
872
          var metadata = transaction.objectStore(METADATA_STORE_NAME);
873
          var getRequest = metadata.get(`metadata/${packageName}`);
874
          return new Promise((resolve, reject) => {
875
            getRequest.onsuccess = (event) => {
876
              var result = event.target.result;
877
              if (result && PACKAGE_UUID === result['uuid']) {
878
                resolve(result);
879
              } else {
880
                resolve(null);
881
              }
882
            }
883
            getRequest.onerror = reject;
884
          });
885
        }
886

887
        async function fetchCachedPackage(db, packageName, metadata) {
888
          var transaction = db.transaction([PACKAGE_STORE_NAME], IDB_RO);
889
          var packages = transaction.objectStore(PACKAGE_STORE_NAME);
890

891
          var chunksDone = 0;
892
          var totalSize = 0;
893
          var chunkCount = metadata['chunkCount'];
894
          var chunks = new Array(chunkCount);
895

896
          return new Promise((resolve, reject) => {
897
            for (var chunkId = 0; chunkId < chunkCount; chunkId++) {
898
              var getRequest = packages.get(`package/${packageName}/${chunkId}`);
899
              getRequest.onsuccess = (event) => {
900
                if (!event.target.result) {
901
                  reject(`CachedPackageNotFound for: ${packageName}`);
902
                  return;
903
                }
904
                // If there's only 1 chunk, there's nothing to concatenate it with so we can just return it now
905
                if (chunkCount == 1) {
906
                  resolve(event.target.result);
907
                } else {
908
                  chunksDone++;
909
                  totalSize += event.target.result.byteLength;
910
                  chunks.push(event.target.result);
911
                  if (chunksDone == chunkCount) {
912
                    if (chunksDone == 1) {
913
                      resolve(event.target.result);
914
                    } else {
915
                      var tempTyped = new Uint8Array(totalSize);
916
                      var byteOffset = 0;
917
                      for (var chunkId in chunks) {
918
                        var buffer = chunks[chunkId];
919
                        tempTyped.set(new Uint8Array(buffer), byteOffset);
920
                        byteOffset += buffer.byteLength;
921
                        buffer = undefined;
922
                      }
923
                      chunks = undefined;
924
                      resolve(tempTyped.buffer);
925
                      tempTyped = undefined;
926
                    }
927
                  }
928
                }
929
              };
930
              getRequest.onerror = reject;
931
            }
932
          });
933
        }\n'''
934

935
    # add Node.js support code, if necessary
936
    node_support_code = ''
937
    if options.support_node:
938
      node_support_code = '''
939
        if (isNode) {
940
          var fsPromises = require('fs/promises');
941
          var contents = await fsPromises.readFile(packageName);
942
          return contents.buffer;
943
        }'''.strip()
944

945
    ret += '''
946
      async function fetchRemotePackage(packageName, packageSize) {
947
        %(node_support_code)s
948
        Module['dataFileDownloads'] ??= {};
949
        try {
950
          var response = await fetch(packageName);
951
        } catch (e) {
952
          throw new Error(`Network Error: ${packageName}`, {e});
953
        }
954
        if (!response.ok) {
955
          throw new Error(`${response.status}: ${response.url}`);
956
        }
957

958
        const chunks = [];
959
        const headers = response.headers;
960
        const total = Number(headers.get('Content-Length') ?? packageSize);
961
        let loaded = 0;
962

963
        Module['setStatus']?.('Downloading data...');
964
        const reader = response.body.getReader();
965

966
        while (1) {
967
          var {done, value} = await reader.read();
968
          if (done) break;
969
          chunks.push(value);
970
          loaded += value.length;
971
          Module['dataFileDownloads'][packageName] = {loaded, total};
972

973
          let totalLoaded = 0;
974
          let totalSize = 0;
975

976
          for (const download of Object.values(Module['dataFileDownloads'])) {
977
            totalLoaded += download.loaded;
978
            totalSize += download.total;
979
          }
980

981
          Module['setStatus']?.(`Downloading data... (${totalLoaded}/${totalSize})`);
982
        }
983

984
        const packageData = new Uint8Array(chunks.map((c) => c.length).reduce((a, b) => a + b, 0));
985
        let offset = 0;
986
        for (const chunk of chunks) {
987
          packageData.set(chunk, offset);
988
          offset += chunk.length;
989
        }
990
        return packageData.buffer;
991
      }\n''' % {'node_support_code': node_support_code}
992

993
    code += '''
994
      async function processPackageData(arrayBuffer) {
995
        assert(arrayBuffer, 'Loading data file failed.');
996
        assert(arrayBuffer.constructor.name === ArrayBuffer.name, 'bad input to processPackageData');
997
        var byteArray = new Uint8Array(arrayBuffer);
998
        var curr;
999
        %s
1000
      }
1001
      Module['addRunDependency']('datafile_%s');\n''' % (use_data, js_manipulation.escape_for_js_string(data_target))
1002
    # use basename because from the browser's point of view,
1003
    # we need to find the datafile in the same dir as the html file
1004

1005
    code += '''
1006
      Module['preloadResults'] ??= {};\n'''
1007

1008
    if options.use_preload_cache:
1009
      code += '''
1010
        async function preloadFallback(error) {
1011
          console.error(error);
1012
          console.error('falling back to default preload behavior');
1013
          processPackageData(await fetchRemotePackage(REMOTE_PACKAGE_NAME, REMOTE_PACKAGE_SIZE));
1014
        }
1015

1016
        try {
1017
          var db = await openDatabase();
1018
          var pkgMetadata = await checkCachedPackage(db, PACKAGE_PATH + PACKAGE_NAME);
1019
          var useCached = !!pkgMetadata;
1020
          Module['preloadResults'][PACKAGE_NAME] = {fromCache: useCached};
1021
          if (useCached) {
1022
            processPackageData(await fetchCachedPackage(db, PACKAGE_PATH + PACKAGE_NAME, pkgMetadata));
1023
          } else {
1024
            var packageData = await fetchRemotePackage(REMOTE_PACKAGE_NAME, REMOTE_PACKAGE_SIZE);
1025
            try {
1026
              processPackageData(await cacheRemotePackage(db, PACKAGE_PATH + PACKAGE_NAME, packageData, {uuid:PACKAGE_UUID}))
1027
            } catch (error) {
1028
              console.error(error);
1029
              processPackageData(packageData);
1030
            }
1031
          }
1032
        } catch(e) {
1033
          await preloadFallback(e)%s;
1034
        }
1035

1036
        Module['setStatus']?.('Downloading...');\n''' % catch_handler
1037
    else:
1038
      # Not using preload cache, so we might as well start the xhr ASAP,
1039
      # potentially before JS parsing of the main codebase if it's after us.
1040
      # Only tricky bit is the fetch is async, but also when runWithFS is called
1041
      # is async, so we handle both orderings.
1042
      ret += '''
1043
      var fetchPromise;
1044
      var fetched = Module['getPreloadedPackage']?.(REMOTE_PACKAGE_NAME, REMOTE_PACKAGE_SIZE);
1045

1046
      if (!fetched) {
1047
        // Note that we don't use await here because we want to execute the
1048
        // the rest of this function immediately.
1049
        fetchPromise = fetchRemotePackage(REMOTE_PACKAGE_NAME, REMOTE_PACKAGE_SIZE)%s;
1050
      }\n''' % catch_handler
1051

1052
      code += '''
1053
      Module['preloadResults'][PACKAGE_NAME] = {fromCache: false};
1054
      if (!fetched) {
1055
        fetched = await fetchPromise;
1056
      }
1057
      processPackageData(fetched);\n'''
1058

1059
  ret += '''
1060
    async function runWithFS(Module) {\n'''
1061
  ret += code
1062
  ret += '''
1063
    }
1064
    if (Module['calledRun']) {
1065
      runWithFS(Module)%s;
1066
    } else {
1067
      (Module['preRun'] ??= []).push(runWithFS); // FS is not initialized yet, wait for it
1068
    }\n''' % catch_handler
1069

1070
  if options.separate_metadata:
1071
    node_support_code = ''
1072
    if options.support_node:
1073
      node_support_code = '''
1074
        if (isNode) {
1075
          var fsPromises = require('fs/promises');
1076
          var contents = await fsPromises.readFile(metadataUrl, 'utf8');
1077
          return loadPackage(JSON.parse(contents));
1078
        }'''.strip()
1079

1080
    ret += '''
1081
    Module['removeRunDependency']('%(metadata_file)s');
1082
  }
1083

1084
  async function runMetaWithFS() {
1085
    Module['addRunDependency']('%(metadata_file)s');
1086
    var metadataUrl = Module['locateFile']?.('%(metadata_file)s', '') ?? '%(metadata_file)s';
1087
    %(node_support_code)s
1088
    var response = await fetch(metadataUrl);
1089
    if (!response.ok) {
1090
      throw new Error(`${response.status}: ${response.url}`);
1091
    }
1092
    var json = await response.json();
1093
    return loadPackage(json);
1094
  }
1095

1096
  if (Module['calledRun']) {
1097
    runMetaWithFS();
1098
  } else {
1099
    (Module['preRun'] ??= []).push(runMetaWithFS);
1100
  }\n''' % {'node_support_code': node_support_code, 'metadata_file': os.path.basename(options.jsoutput + '.metadata')}
1101
  else:
1102
    ret += '''
1103
    }
1104
    loadPackage(%s);\n''' % json.dumps(metadata)
1105

1106
  if options.export_es6:
1107
    ret += '''
1108
  });
1109
}
1110
// END the loadDataFile function
1111
'''
1112
  else:
1113
    ret += '''
1114
  })();\n'''
1115

1116
  return ret
1117

1118

1119
if __name__ == '__main__':
1120
  sys.exit(main())
1121

1122
Product

Resources

Company