Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
emscripten-core
GitHub Repository: emscripten-core/emscripten
Path: blob/main/tools/acorn-optimizer.mjs
4128 views
1
#!/usr/bin/env node
2
3
import * as acorn from 'acorn';
4
import * as terser from '../third_party/terser/terser.js';
5
import * as fs from 'node:fs';
6
import assert from 'node:assert';
7
import {parseArgs} from 'node:util';
8
9
// Utilities
10
11
function read(x) {
12
return fs.readFileSync(x, 'utf-8');
13
}
14
15
function assertAt(condition, node, message = '') {
16
if (!condition) {
17
if (!process.env.EMCC_DEBUG_SAVE) {
18
message += ' (use EMCC_DEBUG_SAVE=1 to preserve temporary inputs)';
19
}
20
let err = new Error(message);
21
err['loc'] = acorn.getLineInfo(input, node.start);
22
throw err;
23
}
24
}
25
26
// Visits and walks
27
// (We don't use acorn-walk because it ignores x in 'x = y'.)
28
29
function visitChildren(node, c) {
30
// emptyOut() and temporary ignoring may mark nodes as empty,
31
// while they have properties with children we should ignore.
32
if (node.type === 'EmptyStatement') {
33
return;
34
}
35
function maybeChild(child) {
36
if (typeof child?.type === 'string') {
37
c(child);
38
return true;
39
}
40
return false;
41
}
42
for (const child of Object.values(node)) {
43
// Check for a child.
44
if (!maybeChild(child)) {
45
// Check for an array of children.
46
if (Array.isArray(child)) {
47
child.forEach(maybeChild);
48
}
49
}
50
}
51
}
52
53
// Simple post-order walk, calling properties on an object by node type,
54
// if the type exists.
55
function simpleWalk(node, cs) {
56
visitChildren(node, (child) => simpleWalk(child, cs));
57
if (node.type in cs) {
58
cs[node.type](node);
59
}
60
}
61
62
// Full post-order walk, calling a single function for all types. If |pre| is
63
// provided, it is called in pre-order (before children). If |pre| returns
64
// `false`, the node and its children will be skipped.
65
function fullWalk(node, c, pre) {
66
if (pre?.(node) !== false) {
67
visitChildren(node, (child) => fullWalk(child, c, pre));
68
c(node);
69
}
70
}
71
72
// Recursive post-order walk, calling properties on an object by node type,
73
// if the type exists, and if so leaving recursion to that function.
74
function recursiveWalk(node, cs) {
75
(function c(node) {
76
if (!(node.type in cs)) {
77
visitChildren(node, (child) => recursiveWalk(child, cs));
78
} else {
79
cs[node.type](node, c);
80
}
81
})(node);
82
}
83
84
// AST Utilities
85
86
function emptyOut(node) {
87
node.type = 'EmptyStatement';
88
}
89
90
function setLiteralValue(item, value) {
91
item.value = value;
92
item.raw = null;
93
}
94
95
function isLiteralString(node) {
96
return node.type === 'Literal' && typeof node.value === 'string';
97
}
98
99
function dump(node) {
100
console.log(JSON.stringify(node, null, ' '));
101
}
102
103
// Traverse a pattern node (identifier, object/array pattern, etc) invoking onExpr on any nested expressions and onBoundIdent on any bound identifiers.
104
function walkPattern(node, onExpr, onBoundIdent) {
105
recursiveWalk(node, {
106
AssignmentPattern(node, c) {
107
c(node.left);
108
onExpr(node.right);
109
},
110
Property(node, c) {
111
if (node.computed) {
112
onExpr(node.key);
113
}
114
c(node.value);
115
},
116
Identifier({name}) {
117
onBoundIdent(name);
118
},
119
});
120
}
121
122
function hasSideEffects(node) {
123
// Conservative analysis.
124
let has = false;
125
fullWalk(
126
node,
127
(node) => {
128
switch (node.type) {
129
case 'ExpressionStatement':
130
if (node.directive) {
131
has = true;
132
}
133
break;
134
// TODO: go through all the ESTree spec
135
case 'Literal':
136
case 'Identifier':
137
case 'UnaryExpression':
138
case 'BinaryExpression':
139
case 'LogicalExpression':
140
case 'UpdateOperator':
141
case 'ConditionalExpression':
142
case 'VariableDeclaration':
143
case 'VariableDeclarator':
144
case 'ObjectExpression':
145
case 'Property':
146
case 'SpreadElement':
147
case 'BlockStatement':
148
case 'ArrayExpression':
149
case 'EmptyStatement': {
150
break; // safe
151
}
152
case 'MemberExpression': {
153
// safe if on Math (or other familiar objects, TODO)
154
if (node.object.type !== 'Identifier' || node.object.name !== 'Math') {
155
// console.error('because member on ' + node.object.name);
156
has = true;
157
}
158
break;
159
}
160
case 'NewExpression': {
161
// default to unsafe, but can be safe on some familiar objects
162
if (node.callee.type === 'Identifier') {
163
const name = node.callee.name;
164
if (
165
name === 'TextDecoder' ||
166
name === 'ArrayBuffer' ||
167
name === 'Int8Array' ||
168
name === 'Uint8Array' ||
169
name === 'Int16Array' ||
170
name === 'Uint16Array' ||
171
name === 'Int32Array' ||
172
name === 'Uint32Array' ||
173
name === 'Float32Array' ||
174
name === 'Float64Array'
175
) {
176
// no side effects, but the arguments might (we walk them in
177
// full walk as well)
178
break;
179
}
180
}
181
// not one of the safe cases
182
has = true;
183
break;
184
}
185
default: {
186
has = true;
187
}
188
}
189
},
190
(node) =>
191
// Ignore inner scopes.
192
!['FunctionDeclaration', 'FunctionExpression', 'ArrowFunctionExpression'].includes(node.type),
193
);
194
return has;
195
}
196
197
// Passes
198
199
// Removes obviously-unused code. Similar to closure compiler in its rules -
200
// export e.g. by Module['..'] = theThing; , or use it somewhere, otherwise
201
// it goes away.
202
//
203
// Note that this is somewhat conservative, since the ESTree AST does not
204
// have a simple separation between definitions and uses, e.g.
205
// Identifier is used both for the x in function foo(x) {
206
// and for y = x + 1 . That means we need to consider new ES6+ constructs
207
// as they appear (like ArrowFunctionExpression). Instead, we do a conservative
208
// analysis here.
209
210
function JSDCE(ast, aggressive) {
211
function iteration() {
212
let removed = 0;
213
const scopes = [{}]; // begin with empty toplevel scope
214
function ensureData(scope, name) {
215
if (Object.prototype.hasOwnProperty.call(scope, name)) return scope[name];
216
scope[name] = {
217
def: 0,
218
use: 0,
219
param: 0, // true for function params, which cannot be eliminated
220
};
221
return scope[name];
222
}
223
function cleanUp(ast, names) {
224
recursiveWalk(ast, {
225
ForStatement(node, c) {
226
visitChildren(node, c);
227
// If we had `for (var x = ...; ...)` and we removed `x`, we need to change to `for (; ...)`.
228
if (node.init?.type === 'EmptyStatement') {
229
node.init = null;
230
}
231
},
232
ForInStatement(node, c) {
233
// We can't remove the var in a for-in, as that would result in an invalid syntax. Skip the LHS.
234
c(node.right);
235
c(node.body);
236
},
237
ForOfStatement(node, c) {
238
// We can't remove the var in a for-of, as that would result in an invalid syntax. Skip the LHS.
239
c(node.right);
240
c(node.body);
241
},
242
VariableDeclaration(node, _c) {
243
let removedHere = 0;
244
node.declarations = node.declarations.filter((node) => {
245
assert(node.type === 'VariableDeclarator');
246
let keep = node.init && hasSideEffects(node.init);
247
walkPattern(
248
node.id,
249
(value) => {
250
keep ||= hasSideEffects(value);
251
},
252
(boundName) => {
253
keep ||= !names.has(boundName);
254
},
255
);
256
if (!keep) removedHere = 1;
257
return keep;
258
});
259
removed += removedHere;
260
if (node.declarations.length === 0) {
261
emptyOut(node);
262
}
263
},
264
ExpressionStatement(node, _c) {
265
if (aggressive && !hasSideEffects(node)) {
266
emptyOut(node);
267
removed++;
268
}
269
},
270
FunctionDeclaration(node, _c) {
271
if (names.has(node.id.name)) {
272
removed++;
273
emptyOut(node);
274
return;
275
}
276
// do not recurse into other scopes
277
},
278
// do not recurse into other scopes
279
FunctionExpression() {},
280
ArrowFunctionExpression() {},
281
});
282
}
283
284
function handleFunction(node, c, defun) {
285
// defun names matter - function names (the y in var x = function y() {..}) are just for stack traces.
286
if (defun) {
287
ensureData(scopes[scopes.length - 1], node.id.name).def = 1;
288
}
289
const scope = {};
290
scopes.push(scope);
291
for (const param of node.params) {
292
walkPattern(param, c, (name) => {
293
ensureData(scope, name).def = 1;
294
scope[name].param = 1;
295
});
296
}
297
c(node.body);
298
// we can ignore self-references, i.e., references to ourselves inside
299
// ourselves, for named defined (defun) functions
300
const ownName = defun ? node.id.name : '';
301
const names = new Set();
302
for (const name in scopes.pop()) {
303
if (name === ownName) continue;
304
const data = scope[name];
305
if (data.use && !data.def) {
306
// this is used from a higher scope, propagate the use down
307
ensureData(scopes[scopes.length - 1], name).use = 1;
308
continue;
309
}
310
if (data.def && !data.use && !data.param) {
311
// this is eliminateable!
312
names.add(name);
313
}
314
}
315
cleanUp(node.body, names);
316
}
317
318
recursiveWalk(ast, {
319
VariableDeclarator(node, c) {
320
walkPattern(node.id, c, (name) => {
321
ensureData(scopes[scopes.length - 1], name).def = 1;
322
});
323
if (node.init) c(node.init);
324
},
325
ObjectExpression(node, c) {
326
// ignore the property identifiers
327
node.properties.forEach((node) => {
328
if (node.value) {
329
c(node.value);
330
} else if (node.argument) {
331
c(node.argument);
332
}
333
});
334
},
335
MemberExpression(node, c) {
336
c(node.object);
337
// Ignore a property identifier (a.X), but notice a[X] (computed props).
338
if (node.computed) {
339
c(node.property);
340
}
341
},
342
FunctionDeclaration(node, c) {
343
handleFunction(node, c, true /* defun */);
344
},
345
FunctionExpression(node, c) {
346
handleFunction(node, c);
347
},
348
ArrowFunctionExpression(node, c) {
349
handleFunction(node, c);
350
},
351
Identifier(node, _c) {
352
const name = node.name;
353
ensureData(scopes[scopes.length - 1], name).use = 1;
354
},
355
ExportDefaultDeclaration(node, c) {
356
const name = node.declaration.id.name;
357
ensureData(scopes[scopes.length - 1], name).use = 1;
358
c(node.declaration);
359
},
360
ExportNamedDeclaration(node, c) {
361
if (node.declaration) {
362
if (node.declaration.type == 'FunctionDeclaration') {
363
const name = node.declaration.id.name;
364
ensureData(scopes[scopes.length - 1], name).use = 1;
365
} else {
366
assert(node.declaration.type == 'VariableDeclaration');
367
for (const decl of node.declaration.declarations) {
368
const name = decl.id.name;
369
ensureData(scopes[scopes.length - 1], name).use = 1;
370
}
371
}
372
c(node.declaration);
373
} else {
374
for (const specifier of node.specifiers) {
375
const name = specifier.local.name;
376
ensureData(scopes[scopes.length - 1], name).use = 1;
377
}
378
}
379
},
380
});
381
382
// toplevel
383
const scope = scopes.pop();
384
assert(scopes.length === 0);
385
386
const names = new Set();
387
for (const [name, data] of Object.entries(scope)) {
388
if (data.def && !data.use) {
389
assert(!data.param); // can't be
390
// this is eliminateable!
391
names.add(name);
392
}
393
}
394
cleanUp(ast, names);
395
return removed;
396
}
397
while (iteration() && aggressive) {} // eslint-disable-line no-empty
398
}
399
400
// Aggressive JSDCE - multiple iterations
401
function AJSDCE(ast) {
402
JSDCE(ast, /* aggressive= */ true);
403
}
404
405
function isWasmImportsAssign(node) {
406
// var wasmImports = ..
407
// or
408
// wasmImports = ..
409
if (
410
node.type === 'AssignmentExpression' &&
411
node.left.name == 'wasmImports' &&
412
node.right.type == 'ObjectExpression'
413
) {
414
return true;
415
}
416
return (
417
node.type === 'VariableDeclaration' &&
418
node.declarations.length === 1 &&
419
node.declarations[0].id.name === 'wasmImports' &&
420
node.declarations[0].init &&
421
node.declarations[0].init.type === 'ObjectExpression'
422
);
423
}
424
425
function getWasmImportsValue(node) {
426
if (node.declarations) {
427
return node.declarations[0].init;
428
} else {
429
return node.right;
430
}
431
}
432
433
function isExportUse(node) {
434
// Match usages of symbols on the `wasmExports` object. e.g:
435
// wasmExports['X']
436
return (
437
node.type === 'MemberExpression' &&
438
node.object.type === 'Identifier' &&
439
isLiteralString(node.property) &&
440
node.object.name === 'wasmExports'
441
);
442
}
443
444
function getExportOrModuleUseName(node) {
445
return node.property.value;
446
}
447
448
function isModuleUse(node) {
449
return (
450
node.type === 'MemberExpression' && // Module['X']
451
node.object.type === 'Identifier' &&
452
node.object.name === 'Module' &&
453
isLiteralString(node.property)
454
);
455
}
456
457
// Apply import/export name changes (after minifying them)
458
function applyImportAndExportNameChanges(ast) {
459
const mapping = extraInfo.mapping;
460
fullWalk(ast, (node) => {
461
if (isWasmImportsAssign(node)) {
462
const assignedObject = getWasmImportsValue(node);
463
assignedObject.properties.forEach((item) => {
464
if (mapping[item.key.name]) {
465
item.key.name = mapping[item.key.name];
466
}
467
});
468
} else if (node.type === 'AssignmentExpression') {
469
const value = node.right;
470
if (isExportUse(value)) {
471
const name = value.property.value;
472
if (mapping[name]) {
473
setLiteralValue(value.property, mapping[name]);
474
}
475
}
476
} else if (node.type === 'CallExpression' && isExportUse(node.callee)) {
477
// wasmExports["___wasm_call_ctors"](); -> wasmExports["M"]();
478
const callee = node.callee;
479
const name = callee.property.value;
480
if (mapping[name]) {
481
setLiteralValue(callee.property, mapping[name]);
482
}
483
} else if (isExportUse(node)) {
484
const prop = node.property;
485
const name = prop.value;
486
if (mapping[name]) {
487
setLiteralValue(prop, mapping[name]);
488
}
489
}
490
});
491
}
492
493
// A static dyncall is dynCall('vii', ..), which is actually static even
494
// though we call dynCall() - we see the string signature statically.
495
function isStaticDynCall(node) {
496
return (
497
node.type === 'CallExpression' &&
498
node.callee.type === 'Identifier' &&
499
node.callee.name === 'dynCall' &&
500
isLiteralString(node.arguments[0])
501
);
502
}
503
504
function getStaticDynCallName(node) {
505
return 'dynCall_' + node.arguments[0].value;
506
}
507
508
// a dynamic dyncall is one in which all we know is *some* dynCall may
509
// be called, but not who. This can be either
510
// dynCall(*not a string*, ..)
511
// or, to be conservative,
512
// "dynCall_"
513
// as that prefix means we may be constructing a dynamic dyncall name
514
// (dynCall and embind's requireFunction do this internally).
515
function isDynamicDynCall(node) {
516
return (
517
(node.type === 'CallExpression' &&
518
node.callee.type === 'Identifier' &&
519
node.callee.name === 'dynCall' &&
520
!isLiteralString(node.arguments[0])) ||
521
(isLiteralString(node) && node.value === 'dynCall_')
522
);
523
}
524
525
//
526
// Emit the DCE graph, to help optimize the combined JS+wasm.
527
// This finds where JS depends on wasm, and where wasm depends
528
// on JS, and prints that out.
529
//
530
// The analysis here is simplified, and not completely general. It
531
// is enough to optimize the common case of JS library and runtime
532
// functions involved in loops with wasm, but not more complicated
533
// things like JS objects and sub-functions. Specifically we
534
// analyze as follows:
535
//
536
// * We consider (1) the toplevel scope, and (2) the scopes of toplevel defined
537
// functions (defun, not function; i.e., function X() {} where
538
// X can be called later, and not y = function Z() {} where Z is
539
// just a name for stack traces). We also consider the wasm, which
540
// we can see things going to and arriving from.
541
// * Anything used in a defun creates a link in the DCE graph, either
542
// to another defun, or the wasm.
543
// * Anything used in the toplevel scope is rooted, as it is code
544
// we assume will execute. The exceptions are
545
// * when we receive something from wasm; those are "free" and
546
// do not cause rooting. (They will become roots if they are
547
// exported, the metadce logic will handle that.)
548
// * when we send something to wasm; sending a defun causes a
549
// link in the DCE graph.
550
// * Anything not in the toplevel or not in a toplevel defun is
551
// considering rooted. We don't optimize those cases.
552
//
553
// Special handling:
554
//
555
// * dynCall('vii', ..) are dynamic dynCalls, but we analyze them
556
// statically, to preserve the dynCall_vii etc. method they depend on.
557
// Truly dynamic dynCalls (not to a string constant) will not work,
558
// and require the user to export them.
559
// * Truly dynamic dynCalls are assumed to reach any dynCall_*.
560
//
561
// XXX this modifies the input AST. if you want to keep using it,
562
// that should be fixed. Currently the main use case here does
563
// not require that. TODO FIXME
564
//
565
function emitDCEGraph(ast) {
566
// First pass: find the wasm imports and exports, and the toplevel
567
// defuns, and save them on the side, removing them from the AST,
568
// which makes the second pass simpler.
569
//
570
// The imports that wasm receives look like this:
571
//
572
// var wasmImports = { "abort": abort, "assert": assert, [..] };
573
//
574
// The exports are trickier, as they have a different form whether or not
575
// async compilation is enabled. It can be either:
576
//
577
// var _malloc = Module['_malloc'] = wasmExports['_malloc'];
578
//
579
// or
580
//
581
// var _malloc = wasmExports['_malloc'];
582
//
583
// or
584
//
585
// var _malloc = Module['_malloc'] = (x) => wasmExports['_malloc'](x);
586
//
587
// or, in the minimal runtime, it looks like
588
//
589
// function assignWasmExports(wasmExports)
590
// ..
591
// _malloc = wasmExports["malloc"];
592
// ..
593
// });
594
const imports = [];
595
const defuns = [];
596
const dynCallNames = [];
597
const nameToGraphName = {};
598
const modulePropertyToGraphName = {};
599
const exportNameToGraphName = {}; // identical to wasmExports['..'] nameToGraphName
600
let foundWasmImportsAssign = false;
601
let foundMinimalRuntimeExports = false;
602
603
function saveAsmExport(name, asmName) {
604
// the asmName is what the wasm provides directly; the outside JS
605
// name may be slightly different (extra "_" in wasm backend)
606
const graphName = getGraphName(name, 'export');
607
nameToGraphName[name] = graphName;
608
modulePropertyToGraphName[name] = graphName;
609
exportNameToGraphName[asmName] = graphName;
610
if (/^dynCall_/.test(name)) {
611
dynCallNames.push(graphName);
612
}
613
}
614
615
// We track defined functions very carefully, so that we can remove them and
616
// the things they call, but other function scopes (like arrow functions and
617
// object methods) are trickier to track (object methods require knowing what
618
// object a function name is called on), so we do not track those. We consider
619
// all content inside them as top-level, which means it is used.
620
var specialScopes = 0;
621
622
fullWalk(
623
ast,
624
(node) => {
625
if (isWasmImportsAssign(node)) {
626
const assignedObject = getWasmImportsValue(node);
627
assignedObject.properties.forEach((item) => {
628
let value = item.value;
629
if (value.type === 'Literal' || value.type === 'FunctionExpression') {
630
return; // if it's a numeric or function literal, nothing to do here
631
}
632
if (value.type === 'LogicalExpression') {
633
// We may have something like wasmMemory || Module.wasmMemory in pthreads code;
634
// use the left hand identifier.
635
value = value.left;
636
}
637
assertAt(value.type === 'Identifier', value);
638
const nativeName = item.key.type == 'Literal' ? item.key.value : item.key.name;
639
assert(nativeName);
640
imports.push([value.name, nativeName]);
641
});
642
foundWasmImportsAssign = true;
643
emptyOut(node); // ignore this in the second pass; this does not root
644
} else if (node.type === 'AssignmentExpression') {
645
const target = node.left;
646
// Ignore assignment to the wasmExports object (as happens in
647
// applySignatureConversions).
648
if (isExportUse(target)) {
649
emptyOut(node);
650
}
651
} else if (node.type === 'VariableDeclaration') {
652
if (node.declarations.length === 1) {
653
const item = node.declarations[0];
654
const name = item.id.name;
655
const value = item.init;
656
if (value && isExportUse(value)) {
657
const asmName = getExportOrModuleUseName(value);
658
// this is:
659
// var _x = wasmExports['x'];
660
saveAsmExport(name, asmName);
661
emptyOut(node);
662
} else if (value && value.type === 'AssignmentExpression') {
663
const assigned = value.left;
664
if (isModuleUse(assigned) && getExportOrModuleUseName(assigned) === name) {
665
// this is
666
// var x = Module['x'] = ?
667
// which looks like a wasm export being received. confirm with the asm use
668
let found = 0;
669
let asmName;
670
fullWalk(value.right, (node) => {
671
if (isExportUse(node)) {
672
found++;
673
asmName = getExportOrModuleUseName(node);
674
}
675
});
676
// in the wasm backend, the asm name may have one fewer "_" prefixed
677
if (found === 1) {
678
// this is indeed an export
679
// the asmName is what the wasm provides directly; the outside JS
680
// name may be slightly different (extra "_" in wasm backend)
681
saveAsmExport(name, asmName);
682
emptyOut(node); // ignore this in the second pass; this does not root
683
return;
684
}
685
if (value.right.type === 'Literal') {
686
// this is
687
// var x = Module['x'] = 1234;
688
// this form occurs when global addresses are exported from the
689
// module. It doesn't constitute a usage.
690
assertAt(typeof value.right.value === 'number', value.right);
691
emptyOut(node);
692
}
693
}
694
}
695
}
696
// A variable declaration that has no initial values can be ignored in
697
// the second pass, these are just declarations, not roots - an actual
698
// use must be found in order to root.
699
if (!node.declarations.reduce((hasInit, decl) => hasInit || !!decl.init, false)) {
700
emptyOut(node);
701
}
702
} else if (node.type === 'FunctionDeclaration') {
703
const name = node.id.name;
704
// Check if this is the minimal runtime exports function, which looks like
705
// function assignWasmExports(wasmExports)
706
if (
707
name == 'assignWasmExports' &&
708
node.params.length === 1 &&
709
node.params[0].type === 'Identifier' &&
710
node.params[0].name === 'wasmExports'
711
) {
712
// This looks very much like what we are looking for.
713
const body = node.body.body;
714
assert(!foundMinimalRuntimeExports);
715
foundMinimalRuntimeExports = true;
716
for (let i = 0; i < body.length; i++) {
717
const item = body[i];
718
if (
719
item.type === 'ExpressionStatement' &&
720
item.expression.type === 'AssignmentExpression' &&
721
item.expression.operator === '=' &&
722
item.expression.left.type === 'Identifier' &&
723
item.expression.right.type === 'MemberExpression' &&
724
item.expression.right.object.type === 'Identifier' &&
725
item.expression.right.object.name === 'wasmExports' &&
726
item.expression.right.property.type === 'Literal'
727
) {
728
const name = item.expression.left.name;
729
const asmName = item.expression.right.property.value;
730
saveAsmExport(name, asmName);
731
emptyOut(item); // ignore all this in the second pass; this does not root
732
}
733
}
734
} else if (!specialScopes) {
735
defuns.push(node);
736
nameToGraphName[name] = getGraphName(name, 'defun');
737
emptyOut(node); // ignore this in the second pass; we scan defuns separately
738
}
739
} else if (node.type === 'ArrowFunctionExpression') {
740
assert(specialScopes > 0);
741
specialScopes--;
742
} else if (node.type === 'Property' && node.method) {
743
assert(specialScopes > 0);
744
specialScopes--;
745
}
746
},
747
(node) => {
748
// Pre-walking logic. We note special scopes (see above).
749
if (node.type === 'ArrowFunctionExpression' || (node.type === 'Property' && node.method)) {
750
specialScopes++;
751
}
752
},
753
);
754
// Scoping must balance out.
755
assert(specialScopes === 0);
756
// We must have found the info we need.
757
assert(
758
foundWasmImportsAssign,
759
'could not find the assignment to "wasmImports". perhaps --pre-js or --post-js code moved it out of the global scope? (things like that should be done after emcc runs, as they do not need to be run through the optimizer which is the special thing about --pre-js/--post-js code)',
760
);
761
// Read exports that were declared in extraInfo
762
if (extraInfo) {
763
for (const exp of extraInfo.exports) {
764
saveAsmExport(exp[0], exp[1]);
765
}
766
}
767
768
// Second pass: everything used in the toplevel scope is rooted;
769
// things used in defun scopes create links
770
function getGraphName(name, what) {
771
return 'emcc$' + what + '$' + name;
772
}
773
const infos = {}; // the graph name of the item => info for it
774
for (const [jsName, nativeName] of imports) {
775
const name = getGraphName(jsName, 'import');
776
const info = (infos[name] = {
777
name: name,
778
import: ['env', nativeName],
779
reaches: new Set(),
780
});
781
if (nameToGraphName.hasOwnProperty(jsName)) {
782
info.reaches.add(nameToGraphName[jsName]);
783
} // otherwise, it's a number, ignore
784
}
785
for (const [e, _] of Object.entries(exportNameToGraphName)) {
786
const name = exportNameToGraphName[e];
787
infos[name] = {
788
name: name,
789
export: e,
790
reaches: new Set(),
791
};
792
}
793
// a function that handles a node we visit, in either a defun or
794
// the toplevel scope (in which case the second param is not provided)
795
function visitNode(node, defunInfo) {
796
// TODO: scope awareness here. for now we just assume all uses are
797
// from the top scope, which might create more uses than needed
798
let reached;
799
if (node.type === 'Identifier') {
800
const name = node.name;
801
if (nameToGraphName.hasOwnProperty(name)) {
802
reached = nameToGraphName[name];
803
}
804
} else if (isModuleUse(node)) {
805
const name = getExportOrModuleUseName(node);
806
if (modulePropertyToGraphName.hasOwnProperty(name)) {
807
reached = modulePropertyToGraphName[name];
808
}
809
} else if (isStaticDynCall(node)) {
810
reached = getGraphName(getStaticDynCallName(node), 'export');
811
} else if (isDynamicDynCall(node)) {
812
// this can reach *all* dynCall_* targets, we can't narrow it down
813
reached = dynCallNames;
814
} else if (isExportUse(node)) {
815
// any remaining asm uses are always rooted in any case
816
const name = getExportOrModuleUseName(node);
817
if (exportNameToGraphName.hasOwnProperty(name)) {
818
infos[exportNameToGraphName[name]].root = true;
819
}
820
return;
821
}
822
if (reached) {
823
function addReach(reached) {
824
if (defunInfo) {
825
defunInfo.reaches.add(reached); // defun reaches it
826
} else {
827
if (infos[reached]) {
828
infos[reached].root = true; // in global scope, root it
829
} else {
830
// An info might not exist for the identifier if it is missing, for
831
// example, we might call Module.dynCall_vi in library code, but it
832
// won't exist in a standalone (non-JS) build anyhow. We can ignore
833
// it in that case as the JS won't be used, but warn to be safe.
834
trace('metadce: missing declaration for ' + reached);
835
}
836
}
837
}
838
if (typeof reached === 'string') {
839
addReach(reached);
840
} else {
841
reached.forEach(addReach);
842
}
843
}
844
}
845
defuns.forEach((defun) => {
846
const name = getGraphName(defun.id.name, 'defun');
847
const info = (infos[name] = {
848
name: name,
849
reaches: new Set(),
850
});
851
fullWalk(defun.body, (node) => visitNode(node, info));
852
});
853
fullWalk(ast, (node) => visitNode(node, null));
854
// Final work: print out the graph
855
// sort for determinism
856
const graph = Object.entries(infos)
857
.sort(([name1], [name2]) => (name1 > name2 ? 1 : -1))
858
.map(([_name, info]) => ({
859
...info,
860
reaches: Array.from(info.reaches).sort(),
861
}));
862
dump(graph);
863
}
864
865
// Apply graph removals from running wasm-metadce. This only removes imports and
866
// exports from JS side, effectively disentangling the wasm and JS sides that
867
// way (and we leave further DCE on the JS and wasm sides to their respective
868
// optimizers, closure compiler and binaryen).
869
function applyDCEGraphRemovals(ast) {
870
const unusedExports = new Set(extraInfo.unusedExports);
871
const unusedImports = new Set(extraInfo.unusedImports);
872
const foundUnusedImports = new Set();
873
const foundUnusedExports = new Set();
874
trace('unusedExports:', unusedExports);
875
trace('unusedImports:', unusedImports);
876
877
fullWalk(ast, (node) => {
878
if (isWasmImportsAssign(node)) {
879
const assignedObject = getWasmImportsValue(node);
880
assignedObject.properties = assignedObject.properties.filter((item) => {
881
const name = item.key.name;
882
const value = item.value;
883
if (unusedImports.has(name)) {
884
foundUnusedImports.add(name);
885
return hasSideEffects(value);
886
}
887
return true;
888
});
889
} else if (node.type === 'ExpressionStatement') {
890
let expr = node.expression;
891
// Inside the assignWasmExports function we have
892
//
893
// _x = wasmExports['x']
894
//
895
// or:
896
//
897
// Module['_x'] = _x = wasmExports['x']
898
//
899
if (expr.type == 'AssignmentExpression' && expr.right.type == 'AssignmentExpression') {
900
expr = expr.right;
901
}
902
if (expr.operator === '=' && expr.left.type === 'Identifier' && isExportUse(expr.right)) {
903
const export_name = getExportOrModuleUseName(expr.right);
904
if (unusedExports.has(export_name)) {
905
emptyOut(node);
906
foundUnusedExports.add(export_name);
907
}
908
}
909
}
910
});
911
912
for (const i of unusedImports) {
913
assert(foundUnusedImports.has(i), 'unused import not found: ' + i);
914
}
915
for (const e of unusedExports) {
916
assert(foundUnusedExports.has(e), 'unused export not found: ' + e);
917
}
918
}
919
920
function createLiteral(value) {
921
return {
922
type: 'Literal',
923
value: value,
924
raw: '' + value,
925
};
926
}
927
928
function makeCallExpression(node, name, args) {
929
Object.assign(node, {
930
type: 'CallExpression',
931
callee: {
932
type: 'Identifier',
933
name: name,
934
},
935
arguments: args,
936
});
937
}
938
939
function isEmscriptenHEAP(name) {
940
switch (name) {
941
case 'HEAP8':
942
case 'HEAPU8':
943
case 'HEAP16':
944
case 'HEAPU16':
945
case 'HEAP32':
946
case 'HEAPU32':
947
case 'HEAP64':
948
case 'HEAPU64':
949
case 'HEAPF32':
950
case 'HEAPF64': {
951
return true;
952
}
953
default: {
954
return false;
955
}
956
}
957
}
958
959
// Replaces each HEAP access with function call that uses DataView to enforce
960
// LE byte order for HEAP buffer
961
function littleEndianHeap(ast) {
962
recursiveWalk(ast, {
963
FunctionDeclaration(node, c) {
964
// do not recurse into LE_HEAP_STORE, LE_HEAP_LOAD functions
965
if (
966
!(
967
node.id.type === 'Identifier' &&
968
(node.id.name.startsWith('LE_HEAP') || node.id.name.startsWith('LE_ATOMICS_'))
969
)
970
) {
971
c(node.body);
972
}
973
},
974
VariableDeclarator(node, c) {
975
if (!(node.id.type === 'Identifier' && node.id.name.startsWith('LE_ATOMICS_'))) {
976
c(node.id);
977
if (node.init) c(node.init);
978
}
979
},
980
AssignmentExpression(node, c) {
981
const target = node.left;
982
const value = node.right;
983
c(value);
984
if (!isHEAPAccess(target)) {
985
// not accessing the HEAP
986
c(target);
987
} else {
988
// replace the heap access with LE_HEAP_STORE
989
const name = target.object.name;
990
const idx = target.property;
991
switch (name) {
992
case 'HEAP8':
993
case 'HEAPU8': {
994
// no action required - storing only 1 byte
995
break;
996
}
997
case 'HEAP16': {
998
// change "name[idx] = value" to "LE_HEAP_STORE_I16(idx*2, value)"
999
makeCallExpression(node, 'LE_HEAP_STORE_I16', [multiply(idx, 2), value]);
1000
break;
1001
}
1002
case 'HEAPU16': {
1003
// change "name[idx] = value" to "LE_HEAP_STORE_U16(idx*2, value)"
1004
makeCallExpression(node, 'LE_HEAP_STORE_U16', [multiply(idx, 2), value]);
1005
break;
1006
}
1007
case 'HEAP32': {
1008
// change "name[idx] = value" to "LE_HEAP_STORE_I32(idx*4, value)"
1009
makeCallExpression(node, 'LE_HEAP_STORE_I32', [multiply(idx, 4), value]);
1010
break;
1011
}
1012
case 'HEAPU32': {
1013
// change "name[idx] = value" to "LE_HEAP_STORE_U32(idx*4, value)"
1014
makeCallExpression(node, 'LE_HEAP_STORE_U32', [multiply(idx, 4), value]);
1015
break;
1016
}
1017
case 'HEAP64': {
1018
// change "name[idx] = value" to "LE_HEAP_STORE_I64(idx*8, value)"
1019
makeCallExpression(node, 'LE_HEAP_STORE_I64', [multiply(idx, 8), value]);
1020
break;
1021
}
1022
case 'HEAPU64': {
1023
// change "name[idx] = value" to "LE_HEAP_STORE_U64(idx*8, value)"
1024
makeCallExpression(node, 'LE_HEAP_STORE_U64', [multiply(idx, 8), value]);
1025
break;
1026
}
1027
case 'HEAPF32': {
1028
// change "name[idx] = value" to "LE_HEAP_STORE_F32(idx*4, value)"
1029
makeCallExpression(node, 'LE_HEAP_STORE_F32', [multiply(idx, 4), value]);
1030
break;
1031
}
1032
case 'HEAPF64': {
1033
// change "name[idx] = value" to "LE_HEAP_STORE_F64(idx*8, value)"
1034
makeCallExpression(node, 'LE_HEAP_STORE_F64', [multiply(idx, 8), value]);
1035
break;
1036
}
1037
}
1038
}
1039
},
1040
CallExpression(node, c) {
1041
if (node.arguments) {
1042
for (var a of node.arguments) c(a);
1043
}
1044
if (
1045
// Atomics.X(args) -> LE_ATOMICS_X(args)
1046
node.callee.type === 'MemberExpression' &&
1047
node.callee.object.type === 'Identifier' &&
1048
node.callee.object.name === 'Atomics' &&
1049
!node.callee.computed
1050
) {
1051
makeCallExpression(
1052
node,
1053
'LE_ATOMICS_' + node.callee.property.name.toUpperCase(),
1054
node.arguments,
1055
);
1056
} else {
1057
c(node.callee);
1058
}
1059
},
1060
MemberExpression(node, c) {
1061
c(node.property);
1062
if (!isHEAPAccess(node)) {
1063
// not accessing the HEAP
1064
c(node.object);
1065
} else {
1066
// replace the heap access with LE_HEAP_LOAD
1067
const idx = node.property;
1068
switch (node.object.name) {
1069
case 'HEAP8':
1070
case 'HEAPU8': {
1071
// no action required - loading only 1 byte
1072
break;
1073
}
1074
case 'HEAP16': {
1075
// change "name[idx]" to "LE_HEAP_LOAD_I16(idx*2)"
1076
makeCallExpression(node, 'LE_HEAP_LOAD_I16', [multiply(idx, 2)]);
1077
break;
1078
}
1079
case 'HEAPU16': {
1080
// change "name[idx]" to "LE_HEAP_LOAD_U16(idx*2)"
1081
makeCallExpression(node, 'LE_HEAP_LOAD_U16', [multiply(idx, 2)]);
1082
break;
1083
}
1084
case 'HEAP32': {
1085
// change "name[idx]" to "LE_HEAP_LOAD_I32(idx*4)"
1086
makeCallExpression(node, 'LE_HEAP_LOAD_I32', [multiply(idx, 4)]);
1087
break;
1088
}
1089
case 'HEAPU32': {
1090
// change "name[idx]" to "LE_HEAP_LOAD_U32(idx*4)"
1091
makeCallExpression(node, 'LE_HEAP_LOAD_U32', [multiply(idx, 4)]);
1092
break;
1093
}
1094
case 'HEAP64': {
1095
// change "name[idx]" to "LE_HEAP_LOAD_I64(idx*8)"
1096
makeCallExpression(node, 'LE_HEAP_LOAD_I64', [multiply(idx, 8)]);
1097
break;
1098
}
1099
case 'HEAPU64': {
1100
// change "name[idx]" to "LE_HEAP_LOAD_U64(idx*8)"
1101
makeCallExpression(node, 'LE_HEAP_LOAD_U64', [multiply(idx, 8)]);
1102
break;
1103
}
1104
case 'HEAPF32': {
1105
// change "name[idx]" to "LE_HEAP_LOAD_F32(idx*4)"
1106
makeCallExpression(node, 'LE_HEAP_LOAD_F32', [multiply(idx, 4)]);
1107
break;
1108
}
1109
case 'HEAPF64': {
1110
// change "name[idx]" to "LE_HEAP_LOAD_F64(idx*8)"
1111
makeCallExpression(node, 'LE_HEAP_LOAD_F64', [multiply(idx, 8)]);
1112
break;
1113
}
1114
}
1115
}
1116
},
1117
});
1118
}
1119
1120
// Instrument heap accesses to call growMemViews helper function, which allows
1121
// pthreads + memory growth to work (we check if the memory was grown on another thread
1122
// in each access), see #8365.
1123
function growableHeap(ast) {
1124
recursiveWalk(ast, {
1125
ExportNamedDeclaration() {
1126
// Do not recurse export statements since we don't want to rewrite, for example, `export { HEAP32 }`
1127
},
1128
FunctionDeclaration(node, c) {
1129
// Do not recurse into the helper function itself.
1130
if (
1131
!(
1132
node.id.type === 'Identifier' &&
1133
(node.id.name === 'growMemViews' || node.id.name === 'LE_HEAP_UPDATE')
1134
)
1135
) {
1136
c(node.body);
1137
}
1138
},
1139
AssignmentExpression(node) {
1140
if (node.left.type !== 'Identifier') {
1141
// Don't transform `HEAPxx =` assignments.
1142
growableHeap(node.left);
1143
}
1144
growableHeap(node.right);
1145
},
1146
VariableDeclarator(node) {
1147
// Don't transform the var declarations for HEAP8 etc
1148
// but do transform anything that sets a var to
1149
// something from HEAP8 etc
1150
if (node.init) {
1151
growableHeap(node.init);
1152
}
1153
},
1154
Identifier(node) {
1155
if (isEmscriptenHEAP(node.name)) {
1156
// Transform `HEAPxx` into `(growMemViews(), HEAPxx)`.
1157
// Important: don't just do `growMemViews(HEAPxx)` because `growMemViews` reassigns `HEAPxx`
1158
// and we want to get an updated value after that reassignment.
1159
Object.assign(node, {
1160
type: 'SequenceExpression',
1161
expressions: [
1162
{
1163
type: 'CallExpression',
1164
callee: {
1165
type: 'Identifier',
1166
name: 'growMemViews',
1167
},
1168
arguments: [],
1169
},
1170
{...node},
1171
],
1172
});
1173
}
1174
},
1175
});
1176
}
1177
1178
// Make all JS pointers unsigned. We do this by modifying things like
1179
// HEAP32[X >> 2] to HEAP32[X >>> 2]. We also need to handle the case of
1180
// HEAP32[X] and make that HEAP32[X >>> 0], things like subarray(), etc.
1181
function unsignPointers(ast) {
1182
// Aside from the standard emscripten HEAP*s, also identify just "HEAP"/"heap"
1183
// as representing a heap. This can be used in JS library code in order
1184
// to get this pass to fix it up.
1185
function isHeap(name) {
1186
return isEmscriptenHEAP(name) || name === 'heap' || name === 'HEAP';
1187
}
1188
1189
function unsign(node) {
1190
// The pointer is often a >> shift, which we can just turn into >>>
1191
if (node.type === 'BinaryExpression') {
1192
if (node.operator === '>>') {
1193
node.operator = '>>>';
1194
return node;
1195
}
1196
}
1197
// If nothing else worked out, add a new shift.
1198
return {
1199
type: 'BinaryExpression',
1200
left: node,
1201
operator: '>>>',
1202
right: {
1203
type: 'Literal',
1204
value: 0,
1205
},
1206
};
1207
}
1208
1209
fullWalk(ast, (node) => {
1210
if (node.type === 'MemberExpression') {
1211
// Check if this is HEAP*[?]
1212
if (node.object.type === 'Identifier' && isHeap(node.object.name) && node.computed) {
1213
node.property = unsign(node.property);
1214
}
1215
} else if (node.type === 'CallExpression') {
1216
if (
1217
node.callee.type === 'MemberExpression' &&
1218
node.callee.object.type === 'Identifier' &&
1219
isHeap(node.callee.object.name) &&
1220
!node.callee.computed
1221
) {
1222
// This is a call on HEAP*.?. Specific things we need to fix up are
1223
// subarray, set, and copyWithin. TODO more?
1224
if (node.callee.property.name === 'set') {
1225
if (node.arguments.length >= 2) {
1226
node.arguments[1] = unsign(node.arguments[1]);
1227
}
1228
} else if (node.callee.property.name === 'subarray') {
1229
if (node.arguments.length >= 1) {
1230
node.arguments[0] = unsign(node.arguments[0]);
1231
if (node.arguments.length >= 2) {
1232
node.arguments[1] = unsign(node.arguments[1]);
1233
}
1234
}
1235
} else if (node.callee.property.name === 'copyWithin') {
1236
node.arguments[0] = unsign(node.arguments[0]);
1237
node.arguments[1] = unsign(node.arguments[1]);
1238
if (node.arguments.length >= 3) {
1239
node.arguments[2] = unsign(node.arguments[2]);
1240
}
1241
}
1242
}
1243
}
1244
});
1245
}
1246
1247
function isHEAPAccess(node) {
1248
return (
1249
node.type === 'MemberExpression' &&
1250
node.object.type === 'Identifier' &&
1251
node.computed && // notice a[X] but not a.X
1252
isEmscriptenHEAP(node.object.name)
1253
);
1254
}
1255
1256
// Replace direct HEAP* loads/stores with calls into C, in which ASan checks
1257
// are applied. That lets ASan cover JS too.
1258
function asanify(ast) {
1259
recursiveWalk(ast, {
1260
FunctionDeclaration(node, c) {
1261
if (
1262
node.id.type === 'Identifier' &&
1263
(node.id.name.startsWith('_asan_js_') || node.id.name === 'establishStackSpace')
1264
) {
1265
// do not recurse into this js impl function, which we use during
1266
// startup before the wasm is ready
1267
} else {
1268
c(node.body);
1269
}
1270
},
1271
AssignmentExpression(node, c) {
1272
const target = node.left;
1273
const value = node.right;
1274
c(value);
1275
if (isHEAPAccess(target)) {
1276
// Instrument a store.
1277
makeCallExpression(node, '_asan_js_store', [target.object, target.property, value]);
1278
} else {
1279
c(target);
1280
}
1281
},
1282
MemberExpression(node, c) {
1283
c(node.property);
1284
if (!isHEAPAccess(node)) {
1285
c(node.object);
1286
} else {
1287
// Instrument a load.
1288
makeCallExpression(node, '_asan_js_load', [node.object, node.property]);
1289
}
1290
},
1291
});
1292
}
1293
1294
function multiply(value, by) {
1295
return {
1296
type: 'BinaryExpression',
1297
left: value,
1298
operator: '*',
1299
right: createLiteral(by),
1300
};
1301
}
1302
1303
// Replace direct heap access with SAFE_HEAP* calls.
1304
function safeHeap(ast) {
1305
recursiveWalk(ast, {
1306
FunctionDeclaration(node, c) {
1307
if (node.id.type === 'Identifier' && node.id.name.startsWith('SAFE_HEAP')) {
1308
// do not recurse into this js impl function, which we use during
1309
// startup before the wasm is ready
1310
} else {
1311
c(node.body);
1312
}
1313
},
1314
AssignmentExpression(node, c) {
1315
const target = node.left;
1316
const value = node.right;
1317
c(value);
1318
if (isHEAPAccess(target)) {
1319
// Instrument a store.
1320
makeCallExpression(node, 'SAFE_HEAP_STORE', [target.object, target.property, value]);
1321
} else {
1322
c(target);
1323
}
1324
},
1325
MemberExpression(node, c) {
1326
c(node.property);
1327
if (!isHEAPAccess(node)) {
1328
c(node.object);
1329
} else {
1330
// Instrument a load.
1331
makeCallExpression(node, 'SAFE_HEAP_LOAD', [node.object, node.property]);
1332
}
1333
},
1334
});
1335
}
1336
1337
// Name minification
1338
1339
const RESERVED = new Set([
1340
'do',
1341
'if',
1342
'in',
1343
'for',
1344
'new',
1345
'try',
1346
'var',
1347
'env',
1348
'let',
1349
'case',
1350
'else',
1351
'enum',
1352
'void',
1353
'this',
1354
'void',
1355
'with',
1356
]);
1357
const VALID_MIN_INITS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$';
1358
const VALID_MIN_LATERS = VALID_MIN_INITS + '0123456789';
1359
1360
const minifiedNames = [];
1361
const minifiedState = [0];
1362
1363
// Make sure the nth index in minifiedNames exists. Done 100% deterministically.
1364
function ensureMinifiedNames(n) {
1365
while (minifiedNames.length < n + 1) {
1366
// generate the current name
1367
let name = VALID_MIN_INITS[minifiedState[0]];
1368
for (let i = 1; i < minifiedState.length; i++) {
1369
name += VALID_MIN_LATERS[minifiedState[i]];
1370
}
1371
if (!RESERVED.has(name)) minifiedNames.push(name);
1372
// increment the state
1373
let i = 0;
1374
while (true) {
1375
minifiedState[i]++;
1376
if (minifiedState[i] < (i === 0 ? VALID_MIN_INITS : VALID_MIN_LATERS).length) break;
1377
// overflow
1378
minifiedState[i] = 0;
1379
i++;
1380
// will become 0 after increment in next loop head
1381
if (i === minifiedState.length) minifiedState.push(-1);
1382
}
1383
}
1384
}
1385
1386
function minifyLocals(ast) {
1387
// We are given a mapping of global names to their minified forms.
1388
assert(extraInfo?.globals);
1389
1390
for (const fun of ast.body) {
1391
if (fun.type !== 'FunctionDeclaration') {
1392
continue;
1393
}
1394
// Find the list of local names, including params.
1395
const localNames = new Set();
1396
for (const param of fun.params) {
1397
localNames.add(param.name);
1398
}
1399
simpleWalk(fun, {
1400
VariableDeclaration(node, _c) {
1401
for (const dec of node.declarations) {
1402
localNames.add(dec.id.name);
1403
}
1404
},
1405
});
1406
1407
function isLocalName(name) {
1408
return localNames.has(name);
1409
}
1410
1411
// Names old to new names.
1412
const newNames = new Map();
1413
1414
// The names in use, that must not be collided with.
1415
const usedNames = new Set();
1416
1417
// Put the function name aside. We don't want to traverse it as it is not
1418
// in the scope of itself.
1419
const funId = fun.id;
1420
fun.id = null;
1421
1422
// Find all the globals that we need to minify using pre-assigned names.
1423
// Don't actually minify them yet as that might interfere with local
1424
// variable names; just mark them as used, and what their new name will be.
1425
simpleWalk(fun, {
1426
Identifier(node, _c) {
1427
const name = node.name;
1428
if (!isLocalName(name)) {
1429
const minified = extraInfo.globals[name];
1430
if (minified) {
1431
newNames.set(name, minified);
1432
usedNames.add(minified);
1433
}
1434
}
1435
},
1436
CallExpression(node, _c) {
1437
// We should never call a local name, as in asm.js-style code our
1438
// locals are just numbers, not functions; functions are all declared
1439
// in the outer scope. If a local is called, that is a bug.
1440
if (node.callee.type === 'Identifier') {
1441
assertAt(!isLocalName(node.callee.name), node.callee, 'cannot call a local');
1442
}
1443
},
1444
});
1445
1446
// The first time we encounter a local name, we assign it a/ minified name
1447
// that's not currently in use. Allocating on demand means they're processed
1448
// in a predictable order, which is very handy for testing/debugging
1449
// purposes.
1450
let nextMinifiedName = 0;
1451
1452
function getNextMinifiedName() {
1453
while (true) {
1454
ensureMinifiedNames(nextMinifiedName);
1455
const minified = minifiedNames[nextMinifiedName++];
1456
// TODO: we can probably remove !isLocalName here
1457
if (!usedNames.has(minified) && !isLocalName(minified)) {
1458
return minified;
1459
}
1460
}
1461
}
1462
1463
// Traverse and minify all names. First the function parameters.
1464
for (const param of fun.params) {
1465
const minified = getNextMinifiedName();
1466
newNames.set(param.name, minified);
1467
param.name = minified;
1468
}
1469
1470
// Label minification is done in a separate namespace.
1471
const labelNames = new Map();
1472
let nextMinifiedLabel = 0;
1473
function getNextMinifiedLabel() {
1474
ensureMinifiedNames(nextMinifiedLabel);
1475
return minifiedNames[nextMinifiedLabel++];
1476
}
1477
1478
// Finally, the function body.
1479
recursiveWalk(fun, {
1480
Identifier(node) {
1481
const name = node.name;
1482
if (newNames.has(name)) {
1483
node.name = newNames.get(name);
1484
} else if (isLocalName(name)) {
1485
const minified = getNextMinifiedName();
1486
newNames.set(name, minified);
1487
node.name = minified;
1488
}
1489
},
1490
LabeledStatement(node, c) {
1491
if (!labelNames.has(node.label.name)) {
1492
labelNames.set(node.label.name, getNextMinifiedLabel());
1493
}
1494
node.label.name = labelNames.get(node.label.name);
1495
c(node.body);
1496
},
1497
BreakStatement(node, _c) {
1498
if (node.label) {
1499
node.label.name = labelNames.get(node.label.name);
1500
}
1501
},
1502
ContinueStatement(node, _c) {
1503
if (node.label) {
1504
node.label.name = labelNames.get(node.label.name);
1505
}
1506
},
1507
});
1508
1509
// Finally, the function name, after restoring it.
1510
fun.id = funId;
1511
assert(extraInfo.globals.hasOwnProperty(fun.id.name));
1512
fun.id.name = extraInfo.globals[fun.id.name];
1513
}
1514
}
1515
1516
function minifyGlobals(ast) {
1517
// The input is in form
1518
//
1519
// function instantiate(wasmImports, wasmMemory, wasmTable) {
1520
// var helper..
1521
// function asmFunc(global, env, buffer) {
1522
// var memory = env.memory;
1523
// var HEAP8 = new global.Int8Array(buffer);
1524
//
1525
// We want to minify the interior of instantiate, basically everything but
1526
// the name instantiate itself, which is used externally to call it.
1527
//
1528
// This is *not* a complete minification algorithm. It does not have a full
1529
// understanding of nested scopes. Instead it assumes the code is fairly
1530
// simple - as wasm2js output is - and looks at all the minifiable names as
1531
// a whole. A possible bug here is something like
1532
//
1533
// function instantiate(wasmImports, wasmMemory, wasmTable) {
1534
// var x = foo;
1535
// function asmFunc(global, env, buffer) {
1536
// var foo = 10;
1537
//
1538
// Here foo is declared in an inner scope, and the outer use of foo looks
1539
// to the global scope. The analysis here only thinks something is from the
1540
// global scope if it is not in any var or function declaration. In practice,
1541
// the globals used from wasm2js output are things like Int8Array that we
1542
// don't declare as locals, but we should probably have a fully scope-aware
1543
// analysis here. FIXME
1544
1545
// We must run on a singleton instantiate() function as described above.
1546
assert(
1547
ast.type === 'Program' &&
1548
ast.body.length === 1 &&
1549
ast.body[0].type === 'FunctionDeclaration' &&
1550
ast.body[0].id.name === 'instantiate',
1551
);
1552
const fun = ast.body[0];
1553
1554
// Swap the function's name away so that we can then minify everything else.
1555
const funId = fun.id;
1556
fun.id = null;
1557
1558
// Find all the declarations.
1559
const declared = new Set();
1560
1561
// Some identifiers must be left as they are and not minified.
1562
const ignore = new Set();
1563
1564
simpleWalk(fun, {
1565
FunctionDeclaration(node) {
1566
if (node.id) {
1567
declared.add(node.id.name);
1568
}
1569
for (const param of node.params) {
1570
declared.add(param.name);
1571
}
1572
},
1573
FunctionExpression(node) {
1574
for (const param of node.params) {
1575
declared.add(param.name);
1576
}
1577
},
1578
VariableDeclaration(node) {
1579
for (const decl of node.declarations) {
1580
declared.add(decl.id.name);
1581
}
1582
},
1583
MemberExpression(node) {
1584
// In x.a we must not minify a. However, for x[a] we must.
1585
if (!node.computed) {
1586
ignore.add(node.property);
1587
}
1588
},
1589
});
1590
1591
// TODO: find names to avoid, that are not declared (should not happen in
1592
// wasm2js output)
1593
1594
// Minify the names.
1595
let nextMinifiedName = 0;
1596
1597
function getNewMinifiedName() {
1598
ensureMinifiedNames(nextMinifiedName);
1599
return minifiedNames[nextMinifiedName++];
1600
}
1601
1602
const minified = new Map();
1603
1604
function minify(name) {
1605
if (!minified.has(name)) {
1606
minified.set(name, getNewMinifiedName());
1607
}
1608
assert(minified.get(name));
1609
return minified.get(name);
1610
}
1611
1612
// Start with the declared things in the lowest indices. Things like HEAP8
1613
// can have very high use counts.
1614
for (const name of declared) {
1615
minify(name);
1616
}
1617
1618
// Minify all globals in function chunks, i.e. not seen here, but will be in
1619
// the minifyLocals work on functions.
1620
for (const name of extraInfo.globals) {
1621
declared.add(name);
1622
minify(name);
1623
}
1624
1625
// Replace the names with their minified versions.
1626
simpleWalk(fun, {
1627
Identifier(node) {
1628
if (declared.has(node.name) && !ignore.has(node)) {
1629
node.name = minify(node.name);
1630
}
1631
},
1632
});
1633
1634
// Restore the name
1635
fun.id = funId;
1636
1637
// Emit the metadata
1638
const json = {};
1639
for (const x of minified.entries()) json[x[0]] = x[1];
1640
1641
suffix = '// EXTRA_INFO:' + JSON.stringify(json);
1642
}
1643
1644
// Utilities
1645
1646
function reattachComments(ast, commentsMap) {
1647
const symbols = [];
1648
1649
// Collect all code symbols
1650
ast.walk(
1651
new terser.TreeWalker((node) => {
1652
if (node.start?.pos) {
1653
symbols.push(node);
1654
}
1655
}),
1656
);
1657
1658
// Sort them by ascending line number
1659
symbols.sort((a, b) => a.start.pos - b.start.pos);
1660
1661
// Walk through all comments in ascending line number, and match each
1662
// comment to the appropriate code block.
1663
let j = 0;
1664
for (const [pos, comments] of Object.entries(commentsMap)) {
1665
while (j < symbols.length && symbols[j].start.pos < pos) {
1666
++j;
1667
}
1668
if (j >= symbols.length) {
1669
trace('dropping comments: no symbol comes after them');
1670
break;
1671
}
1672
if (symbols[j].start.pos != pos) {
1673
// This comment must have been associated with a node that still
1674
// exists in the AST, otherwise to drop it.
1675
trace('dropping comments: not linked to any remaining AST node');
1676
continue;
1677
}
1678
symbols[j].start.comments_before ??= [];
1679
for (const comment of comments) {
1680
trace('reattaching comment');
1681
symbols[j].start.comments_before.push(
1682
new terser.AST_Token(
1683
comment.type == 'Line' ? 'comment1' : 'comment2',
1684
comment.value,
1685
undefined,
1686
undefined,
1687
false,
1688
undefined,
1689
undefined,
1690
'0',
1691
),
1692
);
1693
}
1694
}
1695
}
1696
1697
// Main
1698
1699
let suffix = '';
1700
1701
const {
1702
values: {
1703
'closure-friendly': closureFriendly,
1704
'export-es6': exportES6,
1705
verbose,
1706
'no-print': noPrint,
1707
'minify-whitespace': minifyWhitespace,
1708
outfile,
1709
},
1710
positionals: [infile, ...passes],
1711
} = parseArgs({
1712
options: {
1713
'closure-friendly': {type: 'boolean'},
1714
'export-es6': {type: 'boolean'},
1715
verbose: {type: 'boolean'},
1716
'no-print': {type: 'boolean'},
1717
'minify-whitespace': {type: 'boolean'},
1718
outfile: {type: 'string', short: 'o'},
1719
},
1720
allowPositionals: true,
1721
});
1722
1723
function trace(...args) {
1724
if (verbose) {
1725
console.warn(...args);
1726
}
1727
}
1728
1729
// If enabled, output retains parentheses and comments so that the
1730
// output can further be passed out to Closure.
1731
1732
const input = read(infile);
1733
const extraInfoStart = input.lastIndexOf('// EXTRA_INFO:');
1734
let extraInfo = null;
1735
if (extraInfoStart > 0) {
1736
extraInfo = JSON.parse(input.slice(extraInfoStart + 14));
1737
}
1738
// Collect all JS code comments to this map so that we can retain them in the
1739
// outputted code if --closureFriendly was requested.
1740
const sourceComments = {};
1741
const params = {
1742
ecmaVersion: 'latest',
1743
sourceType: exportES6 ? 'module' : 'script',
1744
allowAwaitOutsideFunction: true,
1745
};
1746
if (closureFriendly) {
1747
const currentComments = [];
1748
Object.assign(params, {
1749
preserveParens: true,
1750
onToken(token) {
1751
// Associate comments with the start position of the next token.
1752
sourceComments[token.start] = currentComments.slice();
1753
currentComments.length = 0;
1754
},
1755
onComment: currentComments,
1756
});
1757
}
1758
1759
const registry = {
1760
JSDCE,
1761
AJSDCE,
1762
applyImportAndExportNameChanges,
1763
emitDCEGraph,
1764
applyDCEGraphRemovals,
1765
dump,
1766
littleEndianHeap,
1767
growableHeap,
1768
unsignPointers,
1769
minifyLocals,
1770
asanify,
1771
safeHeap,
1772
minifyGlobals,
1773
};
1774
1775
let ast;
1776
try {
1777
ast = acorn.parse(input, params);
1778
for (let pass of passes) {
1779
const resolvedPass = registry[pass];
1780
assert(resolvedPass, `unknown optimizer pass: ${pass}`);
1781
resolvedPass(ast);
1782
}
1783
} catch (err) {
1784
if (err.loc) {
1785
err.message +=
1786
'\n' +
1787
`${input.split(acorn.lineBreak)[err.loc.line - 1]}\n` +
1788
`${' '.repeat(err.loc.column)}^ ${infile}:${err.loc.line}:${err.loc.column + 1}`;
1789
}
1790
throw err;
1791
}
1792
1793
if (!noPrint) {
1794
const terserAst = terser.AST_Node.from_mozilla_ast(ast);
1795
1796
if (closureFriendly) {
1797
reattachComments(terserAst, sourceComments);
1798
}
1799
1800
let output = terserAst.print_to_string({
1801
beautify: !minifyWhitespace,
1802
indent_level: minifyWhitespace ? 0 : 2,
1803
keep_quoted_props: closureFriendly, // for closure
1804
wrap_func_args: false, // don't add extra braces
1805
comments: true, // for closure as well
1806
shorthand: true, // Use object literal shorthand notation
1807
});
1808
1809
output += '\n';
1810
if (suffix) {
1811
output += suffix + '\n';
1812
}
1813
1814
if (outfile) {
1815
fs.writeFileSync(outfile, output);
1816
} else {
1817
// Simply using `fs.writeFileSync` on `process.stdout` has issues with
1818
// large amount of data. It can cause:
1819
// Error: EAGAIN: resource temporarily unavailable, write
1820
process.stdout.write(output);
1821
}
1822
}
1823
1824