Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
emscripten-core
GitHub Repository: emscripten-core/emscripten
Path: blob/main/tools/acorn-optimizer.mjs
6170 views
1
#!/usr/bin/env node
2
3
import * as acorn from 'acorn';
4
import * as terser from '../third_party/terser/terser.js';
5
import * as fs from 'node:fs';
6
import assert from 'node:assert';
7
import {parseArgs} from 'node:util';
8
9
// Utilities
10
11
function read(x) {
12
return fs.readFileSync(x, 'utf-8');
13
}
14
15
function assertAt(condition, node, message = '') {
16
if (!condition) {
17
if (!process.env.EMCC_DEBUG_SAVE) {
18
message += ' (use EMCC_DEBUG_SAVE=1 to preserve temporary inputs)';
19
}
20
let err = new Error(message);
21
err['loc'] = acorn.getLineInfo(input, node.start);
22
throw err;
23
}
24
}
25
26
// Visits and walks
27
// (We don't use acorn-walk because it ignores x in 'x = y'.)
28
29
function visitChildren(node, c) {
30
// emptyOut() and temporary ignoring may mark nodes as empty,
31
// while they have properties with children we should ignore.
32
if (node.type === 'EmptyStatement') {
33
return;
34
}
35
function maybeChild(child) {
36
if (typeof child?.type === 'string') {
37
c(child);
38
return true;
39
}
40
return false;
41
}
42
for (const child of Object.values(node)) {
43
// Check for a child.
44
if (!maybeChild(child)) {
45
// Check for an array of children.
46
if (Array.isArray(child)) {
47
child.forEach(maybeChild);
48
}
49
}
50
}
51
}
52
53
// Simple post-order walk, calling properties on an object by node type,
54
// if the type exists.
55
function simpleWalk(node, cs) {
56
visitChildren(node, (child) => simpleWalk(child, cs));
57
if (node.type in cs) {
58
cs[node.type](node);
59
}
60
}
61
62
// Full post-order walk, calling a single function for all types. If |pre| is
63
// provided, it is called in pre-order (before children). If |pre| returns
64
// `false`, the node and its children will be skipped.
65
function fullWalk(node, c, pre) {
66
if (pre?.(node) !== false) {
67
visitChildren(node, (child) => fullWalk(child, c, pre));
68
c(node);
69
}
70
}
71
72
// Recursive post-order walk, calling properties on an object by node type,
73
// if the type exists, and if so leaving recursion to that function.
74
function recursiveWalk(node, cs) {
75
(function c(node) {
76
if (!(node.type in cs)) {
77
visitChildren(node, (child) => recursiveWalk(child, cs));
78
} else {
79
cs[node.type](node, c);
80
}
81
})(node);
82
}
83
84
// AST Utilities
85
86
function emptyOut(node) {
87
node.type = 'EmptyStatement';
88
}
89
90
function setLiteralValue(item, value) {
91
item.value = value;
92
item.raw = null;
93
}
94
95
function isLiteralString(node) {
96
return node.type === 'Literal' && typeof node.value === 'string';
97
}
98
99
function dump(node) {
100
console.log(JSON.stringify(node, null, ' '));
101
}
102
103
// Traverse a pattern node (identifier, object/array pattern, etc) invoking onExpr on any nested expressions and onBoundIdent on any bound identifiers.
104
function walkPattern(node, onExpr, onBoundIdent) {
105
recursiveWalk(node, {
106
AssignmentPattern(node, c) {
107
c(node.left);
108
onExpr(node.right);
109
},
110
Property(node, c) {
111
if (node.computed) {
112
onExpr(node.key);
113
}
114
c(node.value);
115
},
116
Identifier({name}) {
117
onBoundIdent(name);
118
},
119
});
120
}
121
122
function hasSideEffects(node) {
123
// Conservative analysis.
124
let has = false;
125
fullWalk(
126
node,
127
(node) => {
128
switch (node.type) {
129
case 'ExpressionStatement':
130
if (node.directive) {
131
has = true;
132
}
133
break;
134
// TODO: go through all the ESTree spec
135
case 'Literal':
136
case 'Identifier':
137
case 'UnaryExpression':
138
case 'BinaryExpression':
139
case 'LogicalExpression':
140
case 'UpdateOperator':
141
case 'ConditionalExpression':
142
case 'VariableDeclaration':
143
case 'VariableDeclarator':
144
case 'ObjectExpression':
145
case 'Property':
146
case 'SpreadElement':
147
case 'BlockStatement':
148
case 'ArrayExpression':
149
case 'EmptyStatement': {
150
break; // safe
151
}
152
case 'MemberExpression': {
153
// safe if on Math (or other familiar objects, TODO)
154
if (node.object.type !== 'Identifier' || node.object.name !== 'Math') {
155
// console.error('because member on ' + node.object.name);
156
has = true;
157
}
158
break;
159
}
160
case 'NewExpression': {
161
// default to unsafe, but can be safe on some familiar objects
162
if (node.callee.type === 'Identifier') {
163
const name = node.callee.name;
164
if (
165
name === 'TextDecoder' ||
166
name === 'ArrayBuffer' ||
167
name === 'Int8Array' ||
168
name === 'Uint8Array' ||
169
name === 'Int16Array' ||
170
name === 'Uint16Array' ||
171
name === 'Int32Array' ||
172
name === 'Uint32Array' ||
173
name === 'Float32Array' ||
174
name === 'Float64Array'
175
) {
176
// no side effects, but the arguments might (we walk them in
177
// full walk as well)
178
break;
179
}
180
}
181
// not one of the safe cases
182
has = true;
183
break;
184
}
185
default: {
186
has = true;
187
}
188
}
189
},
190
(node) =>
191
// Ignore inner scopes.
192
!['FunctionDeclaration', 'FunctionExpression', 'ArrowFunctionExpression'].includes(node.type),
193
);
194
return has;
195
}
196
197
// Passes
198
199
// Removes obviously-unused code. Similar to closure compiler in its rules -
200
// export e.g. by Module['..'] = theThing; , or use it somewhere, otherwise
201
// it goes away.
202
//
203
// Note that this is somewhat conservative, since the ESTree AST does not
204
// have a simple separation between definitions and uses, e.g.
205
// Identifier is used both for the x in function foo(x) {
206
// and for y = x + 1 . That means we need to consider new ES6+ constructs
207
// as they appear (like ArrowFunctionExpression). Instead, we do a conservative
208
// analysis here.
209
210
function JSDCE(ast, aggressive) {
211
function iteration() {
212
let removed = 0;
213
const scopes = [{}]; // begin with empty toplevel scope
214
function ensureData(scope, name) {
215
if (Object.prototype.hasOwnProperty.call(scope, name)) return scope[name];
216
scope[name] = {
217
def: 0,
218
use: 0,
219
param: 0, // true for function params, which cannot be eliminated
220
};
221
return scope[name];
222
}
223
function cleanUp(ast, names) {
224
recursiveWalk(ast, {
225
ForStatement(node, c) {
226
visitChildren(node, c);
227
// If we had `for (var x = ...; ...)` and we removed `x`, we need to change to `for (; ...)`.
228
if (node.init?.type === 'EmptyStatement') {
229
node.init = null;
230
}
231
},
232
ForInStatement(node, c) {
233
// We can't remove the var in a for-in, as that would result in an invalid syntax. Skip the LHS.
234
c(node.right);
235
c(node.body);
236
},
237
ForOfStatement(node, c) {
238
// We can't remove the var in a for-of, as that would result in an invalid syntax. Skip the LHS.
239
c(node.right);
240
c(node.body);
241
},
242
VariableDeclaration(node, _c) {
243
let removedHere = 0;
244
node.declarations = node.declarations.filter((node) => {
245
assert(node.type === 'VariableDeclarator');
246
let keep = node.init && hasSideEffects(node.init);
247
walkPattern(
248
node.id,
249
(value) => {
250
keep ||= hasSideEffects(value);
251
},
252
(boundName) => {
253
keep ||= !names.has(boundName);
254
},
255
);
256
if (!keep) removedHere = 1;
257
return keep;
258
});
259
removed += removedHere;
260
if (node.declarations.length === 0) {
261
emptyOut(node);
262
}
263
},
264
ExpressionStatement(node, _c) {
265
if (aggressive && !hasSideEffects(node)) {
266
emptyOut(node);
267
removed++;
268
}
269
},
270
FunctionDeclaration(node, _c) {
271
if (names.has(node.id.name)) {
272
removed++;
273
emptyOut(node);
274
return;
275
}
276
// do not recurse into other scopes
277
},
278
// do not recurse into other scopes
279
FunctionExpression() {},
280
ArrowFunctionExpression() {},
281
});
282
}
283
284
function handleFunction(node, c, defun) {
285
// defun names matter - function names (the y in var x = function y() {..}) are just for stack traces.
286
if (defun) {
287
ensureData(scopes[scopes.length - 1], node.id.name).def = 1;
288
}
289
const scope = {};
290
scopes.push(scope);
291
for (const param of node.params) {
292
walkPattern(param, c, (name) => {
293
ensureData(scope, name).def = 1;
294
scope[name].param = 1;
295
});
296
}
297
c(node.body);
298
// we can ignore self-references, i.e., references to ourselves inside
299
// ourselves, for named defined (defun) functions
300
const ownName = defun ? node.id.name : '';
301
const names = new Set();
302
for (const name in scopes.pop()) {
303
if (name === ownName) continue;
304
const data = scope[name];
305
if (data.use && !data.def) {
306
// this is used from a higher scope, propagate the use down
307
ensureData(scopes[scopes.length - 1], name).use = 1;
308
continue;
309
}
310
if (data.def && !data.use && !data.param) {
311
// this is eliminatable!
312
names.add(name);
313
}
314
}
315
cleanUp(node.body, names);
316
}
317
318
recursiveWalk(ast, {
319
VariableDeclarator(node, c) {
320
walkPattern(node.id, c, (name) => {
321
ensureData(scopes[scopes.length - 1], name).def = 1;
322
});
323
if (node.init) c(node.init);
324
},
325
ObjectExpression(node, c) {
326
// ignore the property identifiers
327
node.properties.forEach((node) => {
328
if (node.value) {
329
c(node.value);
330
} else if (node.argument) {
331
c(node.argument);
332
}
333
});
334
},
335
MemberExpression(node, c) {
336
c(node.object);
337
// Ignore a property identifier (a.X), but notice a[X] (computed props).
338
if (node.computed) {
339
c(node.property);
340
}
341
},
342
FunctionDeclaration(node, c) {
343
handleFunction(node, c, true /* defun */);
344
},
345
FunctionExpression(node, c) {
346
handleFunction(node, c);
347
},
348
ArrowFunctionExpression(node, c) {
349
handleFunction(node, c);
350
},
351
Identifier(node, _c) {
352
const name = node.name;
353
ensureData(scopes[scopes.length - 1], name).use = 1;
354
},
355
ExportDefaultDeclaration(node, c) {
356
const name = node.declaration.id.name;
357
ensureData(scopes[scopes.length - 1], name).use = 1;
358
c(node.declaration);
359
},
360
ExportNamedDeclaration(node, c) {
361
if (node.declaration) {
362
if (node.declaration.type == 'FunctionDeclaration') {
363
const name = node.declaration.id.name;
364
ensureData(scopes[scopes.length - 1], name).use = 1;
365
} else {
366
assert(node.declaration.type == 'VariableDeclaration');
367
for (const decl of node.declaration.declarations) {
368
const name = decl.id.name;
369
ensureData(scopes[scopes.length - 1], name).use = 1;
370
}
371
}
372
c(node.declaration);
373
} else {
374
for (const specifier of node.specifiers) {
375
const name = specifier.local.name;
376
ensureData(scopes[scopes.length - 1], name).use = 1;
377
}
378
}
379
},
380
});
381
382
// toplevel
383
const scope = scopes.pop();
384
assert(scopes.length === 0);
385
386
const names = new Set();
387
for (const [name, data] of Object.entries(scope)) {
388
if (data.def && !data.use) {
389
assert(!data.param); // can't be
390
// this is eliminatable!
391
names.add(name);
392
}
393
}
394
cleanUp(ast, names);
395
return removed;
396
}
397
while (iteration() && aggressive) {} // eslint-disable-line no-empty
398
}
399
400
// Aggressive JSDCE - multiple iterations
401
function AJSDCE(ast) {
402
JSDCE(ast, /* aggressive= */ true);
403
}
404
405
function isWasmImportsAssign(node) {
406
// var wasmImports = ..
407
// or
408
// wasmImports = ..
409
if (
410
node.type === 'AssignmentExpression' &&
411
node.left.name == 'wasmImports' &&
412
node.right.type == 'ObjectExpression'
413
) {
414
return true;
415
}
416
return (
417
node.type === 'VariableDeclaration' &&
418
node.declarations.length === 1 &&
419
node.declarations[0].id.name === 'wasmImports' &&
420
node.declarations[0].init &&
421
node.declarations[0].init.type === 'ObjectExpression'
422
);
423
}
424
425
function getWasmImportsValue(node) {
426
if (node.declarations) {
427
return node.declarations[0].init;
428
} else {
429
return node.right;
430
}
431
}
432
433
function isExportUse(node) {
434
// Match usages of symbols on the `wasmExports` object. e.g:
435
// wasmExports['X']
436
return (
437
node.type === 'MemberExpression' &&
438
node.object.type === 'Identifier' &&
439
isLiteralString(node.property) &&
440
node.object.name === 'wasmExports'
441
);
442
}
443
444
function getExportOrModuleUseName(node) {
445
return node.property.value;
446
}
447
448
function isModuleUse(node) {
449
return (
450
node.type === 'MemberExpression' && // Module['X']
451
node.object.type === 'Identifier' &&
452
node.object.name === 'Module' &&
453
isLiteralString(node.property)
454
);
455
}
456
457
// Apply import/export name changes (after minifying them)
458
function applyImportAndExportNameChanges(ast) {
459
const mapping = extraInfo.mapping;
460
fullWalk(ast, (node) => {
461
if (isWasmImportsAssign(node)) {
462
const assignedObject = getWasmImportsValue(node);
463
assignedObject.properties.forEach((item) => {
464
if (mapping[item.key.name]) {
465
item.key.name = mapping[item.key.name];
466
}
467
});
468
} else if (node.type === 'AssignmentExpression') {
469
const value = node.right;
470
if (isExportUse(value)) {
471
const name = value.property.value;
472
if (mapping[name]) {
473
setLiteralValue(value.property, mapping[name]);
474
}
475
}
476
} else if (node.type === 'CallExpression' && isExportUse(node.callee)) {
477
// wasmExports["___wasm_call_ctors"](); -> wasmExports["M"]();
478
const callee = node.callee;
479
const name = callee.property.value;
480
if (mapping[name]) {
481
setLiteralValue(callee.property, mapping[name]);
482
}
483
} else if (isExportUse(node)) {
484
const prop = node.property;
485
const name = prop.value;
486
if (mapping[name]) {
487
setLiteralValue(prop, mapping[name]);
488
}
489
}
490
});
491
}
492
493
// A static dyncall is dynCall('vii', ..), which is actually static even
494
// though we call dynCall() - we see the string signature statically.
495
function isStaticDynCall(node) {
496
return (
497
node.type === 'CallExpression' &&
498
node.callee.type === 'Identifier' &&
499
node.callee.name === 'dynCall' &&
500
isLiteralString(node.arguments[0])
501
);
502
}
503
504
function getStaticDynCallName(node) {
505
return 'dynCall_' + node.arguments[0].value;
506
}
507
508
// a dynamic dyncall is one in which all we know is *some* dynCall may
509
// be called, but not who. This can be either
510
// dynCall(*not a string*, ..)
511
// or, to be conservative,
512
// "dynCall_"
513
// as that prefix means we may be constructing a dynamic dyncall name
514
// (dynCall and embind's requireFunction do this internally).
515
function isDynamicDynCall(node) {
516
return (
517
(node.type === 'CallExpression' &&
518
node.callee.type === 'Identifier' &&
519
node.callee.name === 'dynCall' &&
520
!isLiteralString(node.arguments[0])) ||
521
(isLiteralString(node) && node.value === 'dynCall_')
522
);
523
}
524
525
//
526
// Emit the DCE graph, to help optimize the combined JS+wasm.
527
// This finds where JS depends on wasm, and where wasm depends
528
// on JS, and prints that out.
529
//
530
// The analysis here is simplified, and not completely general. It
531
// is enough to optimize the common case of JS library and runtime
532
// functions involved in loops with wasm, but not more complicated
533
// things like JS objects and sub-functions. Specifically we
534
// analyze as follows:
535
//
536
// * We consider (1) the toplevel scope, and (2) the scopes of toplevel defined
537
// functions (defun, not function; i.e., function X() {} where
538
// X can be called later, and not y = function Z() {} where Z is
539
// just a name for stack traces). We also consider the wasm, which
540
// we can see things going to and arriving from.
541
// * Anything used in a defun creates a link in the DCE graph, either
542
// to another defun, or the wasm.
543
// * Anything used in the toplevel scope is rooted, as it is code
544
// we assume will execute. The exceptions are
545
// * when we receive something from wasm; those are "free" and
546
// do not cause rooting. (They will become roots if they are
547
// exported, the metadce logic will handle that.)
548
// * when we send something to wasm; sending a defun causes a
549
// link in the DCE graph.
550
// * Anything not in the toplevel or not in a toplevel defun is
551
// considering rooted. We don't optimize those cases.
552
//
553
// Special handling:
554
//
555
// * dynCall('vii', ..) are dynamic dynCalls, but we analyze them
556
// statically, to preserve the dynCall_vii etc. method they depend on.
557
// Truly dynamic dynCalls (not to a string constant) will not work,
558
// and require the user to export them.
559
// * Truly dynamic dynCalls are assumed to reach any dynCall_*.
560
//
561
// XXX this modifies the input AST. if you want to keep using it,
562
// that should be fixed. Currently the main use case here does
563
// not require that. TODO FIXME
564
//
565
function emitDCEGraph(ast) {
566
// First pass: find the wasm imports and exports, and the toplevel
567
// defuns, and save them on the side, removing them from the AST,
568
// which makes the second pass simpler.
569
//
570
// The imports that wasm receives look like this:
571
//
572
// var wasmImports = { "abort": abort, "assert": assert, [..] };
573
//
574
// The exports are trickier, as they have a different form whether or not
575
// async compilation is enabled. It can be either:
576
//
577
// var _malloc = Module['_malloc'] = wasmExports['_malloc'];
578
//
579
// or
580
//
581
// var _malloc = wasmExports['_malloc'];
582
//
583
// or
584
//
585
// var _malloc = Module['_malloc'] = (x) => wasmExports['_malloc'](x);
586
//
587
// or, in the minimal runtime, it looks like
588
//
589
// function assignWasmExports(wasmExports)
590
// ..
591
// _malloc = wasmExports["malloc"];
592
// ..
593
// });
594
const imports = [];
595
const defuns = [];
596
const dynCallNames = [];
597
const nameToGraphName = {};
598
const modulePropertyToGraphName = {};
599
const exportNameToGraphName = {}; // identical to wasmExports['..'] nameToGraphName
600
let foundWasmImportsAssign = false;
601
let foundMinimalRuntimeExports = false;
602
603
function saveAsmExport(name, asmName) {
604
// the asmName is what the wasm provides directly; the outside JS
605
// name may be slightly different (extra "_" in wasm backend)
606
const graphName = getGraphName(name, 'export');
607
nameToGraphName[name] = graphName;
608
modulePropertyToGraphName[name] = graphName;
609
exportNameToGraphName[asmName] = graphName;
610
if (/^dynCall_/.test(name)) {
611
dynCallNames.push(graphName);
612
}
613
}
614
615
// We track defined functions very carefully, so that we can remove them and
616
// the things they call, but other function scopes (like arrow functions and
617
// object methods) are trickier to track (object methods require knowing what
618
// object a function name is called on), so we do not track those. We consider
619
// all content inside them as top-level, which means it is used.
620
var specialScopes = 0;
621
622
fullWalk(
623
ast,
624
(node) => {
625
if (isWasmImportsAssign(node)) {
626
const assignedObject = getWasmImportsValue(node);
627
assignedObject.properties.forEach((item) => {
628
let value = item.value;
629
if (value.type === 'Literal' || value.type === 'FunctionExpression') {
630
return; // if it's a numeric or function literal, nothing to do here
631
}
632
if (value.type === 'LogicalExpression') {
633
// We may have something like wasmMemory || Module.wasmMemory in pthreads code;
634
// use the left hand identifier.
635
value = value.left;
636
}
637
assertAt(value.type === 'Identifier', value);
638
const nativeName = item.key.type == 'Literal' ? item.key.value : item.key.name;
639
assert(nativeName);
640
imports.push([value.name, nativeName]);
641
});
642
foundWasmImportsAssign = true;
643
emptyOut(node); // ignore this in the second pass; this does not root
644
} else if (node.type === 'AssignmentExpression') {
645
const target = node.left;
646
// Ignore assignment to the wasmExports object (as happens in
647
// applySignatureConversions).
648
if (isExportUse(target)) {
649
emptyOut(node);
650
}
651
} else if (node.type === 'VariableDeclaration') {
652
if (node.declarations.length === 1) {
653
const item = node.declarations[0];
654
const name = item.id.name;
655
const value = item.init;
656
if (value && isExportUse(value)) {
657
const asmName = getExportOrModuleUseName(value);
658
// this is:
659
// var _x = wasmExports['x'];
660
saveAsmExport(name, asmName);
661
emptyOut(node);
662
} else if (value && value.type === 'AssignmentExpression') {
663
const assigned = value.left;
664
if (isModuleUse(assigned) && getExportOrModuleUseName(assigned) === name) {
665
// this is
666
// var x = Module['x'] = ?
667
// which looks like a wasm export being received. confirm with the asm use
668
let found = 0;
669
let asmName;
670
fullWalk(value.right, (node) => {
671
if (isExportUse(node)) {
672
found++;
673
asmName = getExportOrModuleUseName(node);
674
}
675
});
676
// in the wasm backend, the asm name may have one fewer "_" prefixed
677
if (found === 1) {
678
// this is indeed an export
679
// the asmName is what the wasm provides directly; the outside JS
680
// name may be slightly different (extra "_" in wasm backend)
681
saveAsmExport(name, asmName);
682
emptyOut(node); // ignore this in the second pass; this does not root
683
return;
684
}
685
if (value.right.type === 'Literal') {
686
// this is
687
// var x = Module['x'] = 1234;
688
// this form occurs when global addresses are exported from the
689
// module. It doesn't constitute a usage.
690
assertAt(typeof value.right.value === 'number', value.right);
691
emptyOut(node);
692
}
693
}
694
}
695
}
696
// A variable declaration that has no initial values can be ignored in
697
// the second pass, these are just declarations, not roots - an actual
698
// use must be found in order to root.
699
if (!node.declarations.reduce((hasInit, decl) => hasInit || !!decl.init, false)) {
700
emptyOut(node);
701
}
702
} else if (node.type === 'FunctionDeclaration') {
703
const name = node.id.name;
704
// Check if this is the minimal runtime exports function, which looks like
705
// function assignWasmExports(wasmExports)
706
if (
707
name == 'assignWasmExports' &&
708
node.params.length === 1 &&
709
node.params[0].type === 'Identifier' &&
710
node.params[0].name === 'wasmExports'
711
) {
712
// This looks very much like what we are looking for.
713
const body = node.body.body;
714
assert(!foundMinimalRuntimeExports);
715
foundMinimalRuntimeExports = true;
716
for (let i = 0; i < body.length; i++) {
717
const item = body[i];
718
if (
719
item.type === 'ExpressionStatement' &&
720
item.expression.type === 'AssignmentExpression' &&
721
item.expression.operator === '=' &&
722
item.expression.left.type === 'Identifier' &&
723
item.expression.right.type === 'MemberExpression' &&
724
item.expression.right.object.type === 'Identifier' &&
725
item.expression.right.object.name === 'wasmExports' &&
726
item.expression.right.property.type === 'Literal'
727
) {
728
const name = item.expression.left.name;
729
const asmName = item.expression.right.property.value;
730
saveAsmExport(name, asmName);
731
emptyOut(item); // ignore all this in the second pass; this does not root
732
}
733
}
734
} else if (!specialScopes) {
735
defuns.push(node);
736
nameToGraphName[name] = getGraphName(name, 'defun');
737
emptyOut(node); // ignore this in the second pass; we scan defuns separately
738
}
739
} else if (node.type === 'ArrowFunctionExpression') {
740
assert(specialScopes > 0);
741
specialScopes--;
742
} else if (node.type === 'Property' && node.method) {
743
assert(specialScopes > 0);
744
specialScopes--;
745
}
746
},
747
(node) => {
748
// Pre-walking logic. We note special scopes (see above).
749
if (node.type === 'ArrowFunctionExpression' || (node.type === 'Property' && node.method)) {
750
specialScopes++;
751
}
752
},
753
);
754
// Scoping must balance out.
755
assert(specialScopes === 0);
756
// We must have found the info we need.
757
assert(
758
foundWasmImportsAssign,
759
'could not find the assignment to "wasmImports". perhaps --pre-js or --post-js code moved it out of the global scope? (things like that should be done after emcc runs, as they do not need to be run through the optimizer which is the special thing about --pre-js/--post-js code)',
760
);
761
// Read exports that were declared in extraInfo
762
if (extraInfo) {
763
for (const exp of extraInfo.exports) {
764
saveAsmExport(exp[0], exp[1]);
765
}
766
}
767
768
// Second pass: everything used in the toplevel scope is rooted;
769
// things used in defun scopes create links
770
function getGraphName(name, what) {
771
return 'emcc$' + what + '$' + name;
772
}
773
const infos = {}; // the graph name of the item => info for it
774
for (const [jsName, nativeName] of imports) {
775
const name = getGraphName(jsName, 'import');
776
const info = (infos[name] = {
777
name: name,
778
import: ['env', nativeName],
779
reaches: new Set(),
780
});
781
if (nameToGraphName.hasOwnProperty(jsName)) {
782
info.reaches.add(nameToGraphName[jsName]);
783
} // otherwise, it's a number, ignore
784
}
785
for (const [e, _] of Object.entries(exportNameToGraphName)) {
786
const name = exportNameToGraphName[e];
787
infos[name] = {
788
name: name,
789
export: e,
790
reaches: new Set(),
791
};
792
}
793
// a function that handles a node we visit, in either a defun or
794
// the toplevel scope (in which case the second param is not provided)
795
function visitNode(node, defunInfo) {
796
// TODO: scope awareness here. for now we just assume all uses are
797
// from the top scope, which might create more uses than needed
798
let reached;
799
if (node.type === 'Identifier') {
800
const name = node.name;
801
if (nameToGraphName.hasOwnProperty(name)) {
802
reached = nameToGraphName[name];
803
}
804
} else if (isModuleUse(node)) {
805
const name = getExportOrModuleUseName(node);
806
if (modulePropertyToGraphName.hasOwnProperty(name)) {
807
reached = modulePropertyToGraphName[name];
808
}
809
} else if (isStaticDynCall(node)) {
810
reached = getGraphName(getStaticDynCallName(node), 'export');
811
} else if (isDynamicDynCall(node)) {
812
// this can reach *all* dynCall_* targets, we can't narrow it down
813
reached = dynCallNames;
814
} else if (isExportUse(node)) {
815
// any remaining asm uses are always rooted in any case
816
const name = getExportOrModuleUseName(node);
817
if (exportNameToGraphName.hasOwnProperty(name)) {
818
infos[exportNameToGraphName[name]].root = true;
819
}
820
return;
821
}
822
if (reached) {
823
function addReach(reached) {
824
if (defunInfo) {
825
defunInfo.reaches.add(reached); // defun reaches it
826
} else {
827
if (infos[reached]) {
828
infos[reached].root = true; // in global scope, root it
829
} else {
830
// An info might not exist for the identifier if it is missing, for
831
// example, we might call Module.dynCall_vi in library code, but it
832
// won't exist in a standalone (non-JS) build anyhow. We can ignore
833
// it in that case as the JS won't be used, but warn to be safe.
834
trace('metadce: missing declaration for ' + reached);
835
}
836
}
837
}
838
if (typeof reached === 'string') {
839
addReach(reached);
840
} else {
841
reached.forEach(addReach);
842
}
843
}
844
}
845
defuns.forEach((defun) => {
846
const name = getGraphName(defun.id.name, 'defun');
847
const info = (infos[name] = {
848
name: name,
849
reaches: new Set(),
850
});
851
fullWalk(defun.body, (node) => visitNode(node, info));
852
});
853
fullWalk(ast, (node) => visitNode(node, null));
854
// Final work: print out the graph
855
// sort for determinism
856
const graph = Object.entries(infos)
857
.sort(([name1], [name2]) => (name1 > name2 ? 1 : -1))
858
.map(([_name, info]) => ({
859
...info,
860
reaches: Array.from(info.reaches).sort(),
861
}));
862
dump(graph);
863
}
864
865
// Apply graph removals from running wasm-metadce. This only removes imports and
866
// exports from JS side, effectively disentangling the wasm and JS sides that
867
// way (and we leave further DCE on the JS and wasm sides to their respective
868
// optimizers, closure compiler and binaryen).
869
function applyDCEGraphRemovals(ast) {
870
const unusedExports = new Set(extraInfo.unusedExports);
871
const unusedImports = new Set(extraInfo.unusedImports);
872
const foundUnusedImports = new Set();
873
const foundUnusedExports = new Set();
874
trace('unusedExports:', unusedExports);
875
trace('unusedImports:', unusedImports);
876
877
fullWalk(ast, (node) => {
878
if (isWasmImportsAssign(node)) {
879
const assignedObject = getWasmImportsValue(node);
880
assignedObject.properties = assignedObject.properties.filter((item) => {
881
const name = item.key.name;
882
const value = item.value;
883
if (unusedImports.has(name)) {
884
foundUnusedImports.add(name);
885
return hasSideEffects(value);
886
}
887
return true;
888
});
889
} else if (node.type === 'ExpressionStatement') {
890
let expr = node.expression;
891
// Inside the assignWasmExports function we have
892
//
893
// _x = wasmExports['x']
894
//
895
// or:
896
//
897
// Module['_x'] = _x = wasmExports['x']
898
//
899
if (expr.type == 'AssignmentExpression' && expr.right.type == 'AssignmentExpression') {
900
expr = expr.right;
901
}
902
if (expr.operator === '=' && expr.left.type === 'Identifier' && isExportUse(expr.right)) {
903
const export_name = getExportOrModuleUseName(expr.right);
904
if (unusedExports.has(export_name)) {
905
emptyOut(node);
906
foundUnusedExports.add(export_name);
907
}
908
}
909
}
910
});
911
912
for (const i of unusedImports) {
913
assert(foundUnusedImports.has(i), 'unused import not found: ' + i);
914
}
915
for (const e of unusedExports) {
916
assert(foundUnusedExports.has(e), 'unused export not found: ' + e);
917
}
918
}
919
920
function createLiteral(value) {
921
return {
922
type: 'Literal',
923
value: value,
924
raw: '' + value,
925
};
926
}
927
928
function makeIdentifier(name) {
929
return {
930
type: 'Identifier',
931
name: name,
932
};
933
}
934
935
function makeCallExpression(node, name, args) {
936
Object.assign(node, {
937
type: 'CallExpression',
938
callee: makeIdentifier(name),
939
arguments: args,
940
});
941
}
942
943
function isEmscriptenHEAP(name) {
944
switch (name) {
945
case 'HEAP8':
946
case 'HEAPU8':
947
case 'HEAP16':
948
case 'HEAPU16':
949
case 'HEAP32':
950
case 'HEAPU32':
951
case 'HEAP64':
952
case 'HEAPU64':
953
case 'HEAPF32':
954
case 'HEAPF64': {
955
return true;
956
}
957
default: {
958
return false;
959
}
960
}
961
}
962
963
const littleEndianHelper = {
964
HEAP16: {width: 2, load: 'LE_HEAP_LOAD_I16', store: 'LE_HEAP_STORE_I16'},
965
HEAPU16: {width: 2, load: 'LE_HEAP_LOAD_U16', store: 'LE_HEAP_STORE_U16'},
966
HEAP32: {width: 4, load: 'LE_HEAP_LOAD_I32', store: 'LE_HEAP_STORE_I32'},
967
HEAPU32: {width: 4, load: 'LE_HEAP_LOAD_U32', store: 'LE_HEAP_STORE_U32'},
968
HEAP64: {width: 8, load: 'LE_HEAP_LOAD_I64', store: 'LE_HEAP_STORE_I64'},
969
HEAPU64: {width: 8, load: 'LE_HEAP_LOAD_U64', store: 'LE_HEAP_STORE_U64'},
970
HEAPF32: {width: 4, load: 'LE_HEAP_LOAD_F32', store: 'LE_HEAP_STORE_F32'},
971
HEAPF64: {width: 8, load: 'LE_HEAP_LOAD_F64', store: 'LE_HEAP_STORE_F64'},
972
};
973
974
// Replaces each HEAP access with function call that uses DataView to enforce
975
// LE byte order for HEAP buffer
976
function littleEndianHeap(ast) {
977
recursiveWalk(ast, {
978
FunctionDeclaration(node, c) {
979
// do not recurse into LE_HEAP_STORE, LE_HEAP_LOAD functions
980
if (
981
!(
982
node.id.type === 'Identifier' &&
983
(node.id.name.startsWith('LE_HEAP') || node.id.name.startsWith('LE_ATOMICS_'))
984
)
985
) {
986
c(node.body);
987
}
988
},
989
VariableDeclarator(node, c) {
990
if (!(node.id.type === 'Identifier' && node.id.name.startsWith('LE_ATOMICS_'))) {
991
c(node.id);
992
if (node.init) c(node.init);
993
}
994
},
995
AssignmentExpression(node, c) {
996
const target = node.left;
997
const value = node.right;
998
c(value);
999
const heap = isHEAPAccess(target);
1000
const growHeap = isGrowHEAPAccess(target);
1001
if (heap) {
1002
// replace the heap access with LE_HEAP_STORE
1003
const idx = target.property;
1004
const helper = littleEndianHelper[heap];
1005
if (helper) {
1006
// "nameXX[idx] = value" -> "LE_HEAP_STORE_XX(idx*XX, value)"
1007
makeCallExpression(node, helper.store, [multiply(idx, helper.width), value]);
1008
}
1009
} else if (growHeap) {
1010
const idx = target.property;
1011
const helper = littleEndianHelper[growHeap];
1012
if (helper) {
1013
// "(growMemViews(),nameXX)[idx] = value" -> "LE_HEAP_STORE_XX((growMemViews(),idx*XX), value)"
1014
makeCallExpression(node, helper.store, [
1015
makeSequence(makeCallGrowMemViews(), multiply(idx, helper.width)),
1016
value,
1017
]);
1018
}
1019
} else {
1020
// not accessing the HEAP
1021
c(target);
1022
}
1023
},
1024
CallExpression(node, c) {
1025
if (node.arguments) {
1026
for (var a of node.arguments) c(a);
1027
}
1028
if (
1029
// Atomics.X(args) -> LE_ATOMICS_X(args)
1030
node.callee.type === 'MemberExpression' &&
1031
node.callee.object.type === 'Identifier' &&
1032
node.callee.object.name === 'Atomics' &&
1033
!node.callee.computed
1034
) {
1035
makeCallExpression(
1036
node,
1037
'LE_ATOMICS_' + node.callee.property.name.toUpperCase(),
1038
node.arguments,
1039
);
1040
} else {
1041
c(node.callee);
1042
}
1043
},
1044
MemberExpression(node, c) {
1045
c(node.property);
1046
const heap = isHEAPAccess(node);
1047
const growHeap = isGrowHEAPAccess(node);
1048
if (heap) {
1049
// replace the heap access with LE_HEAP_LOAD
1050
const idx = node.property;
1051
const helper = littleEndianHelper[heap];
1052
if (helper) {
1053
// "nameXX[idx]" -> "LE_HEAP_LOAD_XX(idx*XX)"
1054
makeCallExpression(node, helper.load, [multiply(idx, helper.width)]);
1055
}
1056
} else if (growHeap) {
1057
const idx = node.property;
1058
const helper = littleEndianHelper[growHeap];
1059
if (helper) {
1060
// "(growMemViews(),nameXX)[idx]" -> "LE_HEAP_LOAD_XX((growMemViews(),idx*XX))"
1061
makeCallExpression(node, helper.load, [
1062
makeSequence(makeCallGrowMemViews(), multiply(idx, helper.width)),
1063
]);
1064
}
1065
} else {
1066
// not accessing the HEAP
1067
c(node.object);
1068
}
1069
},
1070
});
1071
}
1072
1073
// Instrument heap accesses to call growMemViews helper function, which allows
1074
// pthreads + memory growth to work (we check if the memory was grown on another thread
1075
// in each access), see #8365.
1076
function growableHeap(ast) {
1077
recursiveWalk(ast, {
1078
ExportNamedDeclaration() {
1079
// Do not recurse export statements since we don't want to rewrite, for example, `export { HEAP32 }`
1080
},
1081
FunctionDeclaration(node, c) {
1082
// Do not recurse into the helper function itself.
1083
if (
1084
!(
1085
node.id.type === 'Identifier' &&
1086
(node.id.name === 'growMemViews' || node.id.name === 'LE_HEAP_UPDATE')
1087
)
1088
) {
1089
c(node.body);
1090
}
1091
},
1092
AssignmentExpression(node, c) {
1093
if (node.left.type !== 'Identifier') {
1094
// Don't transform `HEAPxx =` assignments.
1095
c(node.left);
1096
}
1097
c(node.right);
1098
},
1099
VariableDeclarator(node, c) {
1100
// Don't transform the var declarations for HEAP8 etc
1101
// but do transform anything that sets a var to
1102
// something from HEAP8 etc
1103
if (node.init) {
1104
c(node.init);
1105
}
1106
},
1107
Identifier(node) {
1108
if (isEmscriptenHEAP(node.name)) {
1109
// Transform `HEAPxx` into `(growMemViews(), HEAPxx)`.
1110
// Important: don't just do `growMemViews(HEAPxx)` because `growMemViews` reassigns `HEAPxx`
1111
// and we want to get an updated value after that reassignment.
1112
Object.assign(node, makeSequence(makeCallGrowMemViews(), {...node}));
1113
}
1114
},
1115
});
1116
}
1117
1118
function makeCallGrowMemViews() {
1119
return {
1120
type: 'CallExpression',
1121
callee: {
1122
type: 'Identifier',
1123
name: 'growMemViews',
1124
},
1125
arguments: [],
1126
};
1127
}
1128
1129
function makeSequence(...expressions) {
1130
return {
1131
type: 'ParenthesizedExpression',
1132
expression: {
1133
type: 'SequenceExpression',
1134
expressions,
1135
}
1136
};
1137
}
1138
1139
// Make all JS pointers unsigned. We do this by modifying things like
1140
// HEAP32[X >> 2] to HEAP32[X >>> 2]. We also need to handle the case of
1141
// HEAP32[X] and make that HEAP32[X >>> 0], things like subarray(), etc.
1142
function unsignPointers(ast) {
1143
// Aside from the standard emscripten HEAP*s, also identify just "HEAP"/"heap"
1144
// as representing a heap. This can be used in JS library code in order
1145
// to get this pass to fix it up.
1146
function isHeap(name) {
1147
return isEmscriptenHEAP(name) || name === 'heap' || name === 'HEAP';
1148
}
1149
1150
function unsign(node) {
1151
// The pointer is often a >> shift, which we can just turn into >>>
1152
if (node.type === 'BinaryExpression') {
1153
if (node.operator === '>>') {
1154
node.operator = '>>>';
1155
return node;
1156
}
1157
}
1158
// If nothing else worked out, add a new shift.
1159
return {
1160
type: 'BinaryExpression',
1161
left: node,
1162
operator: '>>>',
1163
right: {
1164
type: 'Literal',
1165
value: 0,
1166
},
1167
};
1168
}
1169
1170
fullWalk(ast, (node) => {
1171
if (node.type === 'MemberExpression') {
1172
// Check if this is HEAP*[?]
1173
if (node.object.type === 'Identifier' && isHeap(node.object.name) && node.computed) {
1174
node.property = unsign(node.property);
1175
}
1176
} else if (node.type === 'CallExpression') {
1177
if (
1178
node.callee.type === 'MemberExpression' &&
1179
node.callee.object.type === 'Identifier' &&
1180
isHeap(node.callee.object.name) &&
1181
!node.callee.computed
1182
) {
1183
// This is a call on HEAP*.?. Specific things we need to fix up are
1184
// subarray, set, and copyWithin. TODO more?
1185
if (node.callee.property.name === 'set') {
1186
if (node.arguments.length >= 2) {
1187
node.arguments[1] = unsign(node.arguments[1]);
1188
}
1189
} else if (node.callee.property.name === 'subarray') {
1190
if (node.arguments.length >= 1) {
1191
node.arguments[0] = unsign(node.arguments[0]);
1192
if (node.arguments.length >= 2) {
1193
node.arguments[1] = unsign(node.arguments[1]);
1194
}
1195
}
1196
} else if (node.callee.property.name === 'copyWithin') {
1197
node.arguments[0] = unsign(node.arguments[0]);
1198
node.arguments[1] = unsign(node.arguments[1]);
1199
if (node.arguments.length >= 3) {
1200
node.arguments[2] = unsign(node.arguments[2]);
1201
}
1202
}
1203
}
1204
}
1205
});
1206
}
1207
1208
function isHEAPAccess(node) {
1209
return (
1210
node.type === 'MemberExpression' &&
1211
node.object.type === 'Identifier' &&
1212
node.computed && // notice a[X] but not a.X
1213
isEmscriptenHEAP(node.object.name) &&
1214
node.object.name
1215
);
1216
}
1217
1218
function isGrowHEAPAccess(node) {
1219
if (
1220
node.type !== 'MemberExpression' ||
1221
!node.computed || // notice a[X] but not a.X
1222
(node.object.type !== 'ParenthesizedExpression' && node.object.type !== 'SequenceExpression')
1223
)
1224
return false;
1225
const obj = node.object.type === 'ParenthesizedExpression' ? node.object.expression : node.object;
1226
return (
1227
obj.type === 'SequenceExpression' &&
1228
obj.expressions.length === 2 &&
1229
obj.expressions[0].type === 'CallExpression' &&
1230
obj.expressions[0].callee.type === 'Identifier' &&
1231
obj.expressions[0].callee.name === 'growMemViews' &&
1232
obj.expressions[1].type === 'Identifier' &&
1233
isEmscriptenHEAP(obj.expressions[1].name) &&
1234
obj.expressions[1].name
1235
);
1236
}
1237
1238
function asanifyTransform(node, action) {
1239
makeCallExpression(node.property, '_asan_js_check_index', [{ ...node.object }, { ...node.property }, makeIdentifier(action)]);
1240
}
1241
// Add ASan check to direct HEAP* loads/stores.
1242
// That lets ASan cover JS too.
1243
function asanify(ast) {
1244
recursiveWalk(ast, {
1245
FunctionDeclaration(node, c) {
1246
if (node.id.type === 'Identifier' && node.id.name === 'establishStackSpace') {
1247
// skip establishStackSpace, because it sets up variables used by ASan itself
1248
} else {
1249
c(node.body);
1250
}
1251
},
1252
AssignmentExpression(node, c) {
1253
const target = node.left;
1254
const value = node.right;
1255
c(value);
1256
if (isHEAPAccess(target)) {
1257
// Instrument a store.
1258
asanifyTransform(target, '___asan_storeN');
1259
} else {
1260
c(target);
1261
}
1262
},
1263
MemberExpression(node, c) {
1264
c(node.property);
1265
if (!isHEAPAccess(node)) {
1266
c(node.object);
1267
} else {
1268
// Instrument a load.
1269
asanifyTransform(node, '___asan_loadN');
1270
}
1271
},
1272
});
1273
}
1274
1275
function multiply(value, by) {
1276
return {
1277
type: 'BinaryExpression',
1278
left: value,
1279
operator: '*',
1280
right: createLiteral(by),
1281
};
1282
}
1283
1284
function safeHeapTransform(node, action) {
1285
makeCallExpression(node.property, 'SAFE_HEAP_INDEX', [{ ...node.object }, { ...node.property }, createLiteral(action)]);
1286
}
1287
// Add SAFE_HEAP_INDEX check to heap access
1288
function safeHeap(ast) {
1289
recursiveWalk(ast, {
1290
AssignmentExpression(node, c) {
1291
const target = node.left;
1292
const value = node.right;
1293
c(value);
1294
if (isHEAPAccess(target)) {
1295
// Instrument a store.
1296
safeHeapTransform(target, 'storing');
1297
} else {
1298
c(target);
1299
}
1300
},
1301
MemberExpression(node, c) {
1302
c(node.property);
1303
if (!isHEAPAccess(node)) {
1304
c(node.object);
1305
} else {
1306
// Instrument a load.
1307
safeHeapTransform(node, 'loading');
1308
}
1309
},
1310
});
1311
}
1312
1313
// Name minification
1314
1315
const RESERVED = new Set([
1316
'do',
1317
'if',
1318
'in',
1319
'for',
1320
'new',
1321
'try',
1322
'var',
1323
'env',
1324
'let',
1325
'case',
1326
'else',
1327
'enum',
1328
'void',
1329
'this',
1330
'void',
1331
'with',
1332
]);
1333
const VALID_MIN_INITS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$';
1334
const VALID_MIN_LATERS = VALID_MIN_INITS + '0123456789';
1335
1336
const minifiedNames = [];
1337
const minifiedState = [0];
1338
1339
// Make sure the nth index in minifiedNames exists. Done 100% deterministically.
1340
function ensureMinifiedNames(n) {
1341
while (minifiedNames.length < n + 1) {
1342
// generate the current name
1343
let name = VALID_MIN_INITS[minifiedState[0]];
1344
for (let i = 1; i < minifiedState.length; i++) {
1345
name += VALID_MIN_LATERS[minifiedState[i]];
1346
}
1347
if (!RESERVED.has(name)) minifiedNames.push(name);
1348
// increment the state
1349
let i = 0;
1350
while (true) {
1351
minifiedState[i]++;
1352
if (minifiedState[i] < (i === 0 ? VALID_MIN_INITS : VALID_MIN_LATERS).length) break;
1353
// overflow
1354
minifiedState[i] = 0;
1355
i++;
1356
// will become 0 after increment in next loop head
1357
if (i === minifiedState.length) minifiedState.push(-1);
1358
}
1359
}
1360
}
1361
1362
function minifyLocals(ast) {
1363
// We are given a mapping of global names to their minified forms.
1364
assert(extraInfo?.globals);
1365
1366
for (const fun of ast.body) {
1367
if (fun.type !== 'FunctionDeclaration') {
1368
continue;
1369
}
1370
// Find the list of local names, including params.
1371
const localNames = new Set();
1372
for (const param of fun.params) {
1373
localNames.add(param.name);
1374
}
1375
simpleWalk(fun, {
1376
VariableDeclaration(node, _c) {
1377
for (const dec of node.declarations) {
1378
localNames.add(dec.id.name);
1379
}
1380
},
1381
});
1382
1383
function isLocalName(name) {
1384
return localNames.has(name);
1385
}
1386
1387
// Names old to new names.
1388
const newNames = new Map();
1389
1390
// The names in use, that must not be collided with.
1391
const usedNames = new Set();
1392
1393
// Put the function name aside. We don't want to traverse it as it is not
1394
// in the scope of itself.
1395
const funId = fun.id;
1396
fun.id = null;
1397
1398
// Find all the globals that we need to minify using pre-assigned names.
1399
// Don't actually minify them yet as that might interfere with local
1400
// variable names; just mark them as used, and what their new name will be.
1401
simpleWalk(fun, {
1402
Identifier(node, _c) {
1403
const name = node.name;
1404
if (!isLocalName(name)) {
1405
const minified = extraInfo.globals[name];
1406
if (minified) {
1407
newNames.set(name, minified);
1408
usedNames.add(minified);
1409
}
1410
}
1411
},
1412
CallExpression(node, _c) {
1413
// We should never call a local name, as in asm.js-style code our
1414
// locals are just numbers, not functions; functions are all declared
1415
// in the outer scope. If a local is called, that is a bug.
1416
if (node.callee.type === 'Identifier') {
1417
assertAt(!isLocalName(node.callee.name), node.callee, 'cannot call a local');
1418
}
1419
},
1420
});
1421
1422
// The first time we encounter a local name, we assign it a/ minified name
1423
// that's not currently in use. Allocating on demand means they're processed
1424
// in a predictable order, which is very handy for testing/debugging
1425
// purposes.
1426
let nextMinifiedName = 0;
1427
1428
function getNextMinifiedName() {
1429
while (true) {
1430
ensureMinifiedNames(nextMinifiedName);
1431
const minified = minifiedNames[nextMinifiedName++];
1432
// TODO: we can probably remove !isLocalName here
1433
if (!usedNames.has(minified) && !isLocalName(minified)) {
1434
return minified;
1435
}
1436
}
1437
}
1438
1439
// Traverse and minify all names. First the function parameters.
1440
for (const param of fun.params) {
1441
const minified = getNextMinifiedName();
1442
newNames.set(param.name, minified);
1443
param.name = minified;
1444
}
1445
1446
// Label minification is done in a separate namespace.
1447
const labelNames = new Map();
1448
let nextMinifiedLabel = 0;
1449
function getNextMinifiedLabel() {
1450
ensureMinifiedNames(nextMinifiedLabel);
1451
return minifiedNames[nextMinifiedLabel++];
1452
}
1453
1454
// Finally, the function body.
1455
recursiveWalk(fun, {
1456
Identifier(node) {
1457
const name = node.name;
1458
if (newNames.has(name)) {
1459
node.name = newNames.get(name);
1460
} else if (isLocalName(name)) {
1461
const minified = getNextMinifiedName();
1462
newNames.set(name, minified);
1463
node.name = minified;
1464
}
1465
},
1466
LabeledStatement(node, c) {
1467
if (!labelNames.has(node.label.name)) {
1468
labelNames.set(node.label.name, getNextMinifiedLabel());
1469
}
1470
node.label.name = labelNames.get(node.label.name);
1471
c(node.body);
1472
},
1473
BreakStatement(node, _c) {
1474
if (node.label) {
1475
node.label.name = labelNames.get(node.label.name);
1476
}
1477
},
1478
ContinueStatement(node, _c) {
1479
if (node.label) {
1480
node.label.name = labelNames.get(node.label.name);
1481
}
1482
},
1483
});
1484
1485
// Finally, the function name, after restoring it.
1486
fun.id = funId;
1487
assert(extraInfo.globals.hasOwnProperty(fun.id.name));
1488
fun.id.name = extraInfo.globals[fun.id.name];
1489
}
1490
}
1491
1492
function minifyGlobals(ast) {
1493
// The input is in form
1494
//
1495
// function instantiate(wasmImports, wasmMemory, wasmTable) {
1496
// var helper..
1497
// function asmFunc(global, env, buffer) {
1498
// var memory = env.memory;
1499
// var HEAP8 = new global.Int8Array(buffer);
1500
//
1501
// We want to minify the interior of instantiate, basically everything but
1502
// the name instantiate itself, which is used externally to call it.
1503
//
1504
// This is *not* a complete minification algorithm. It does not have a full
1505
// understanding of nested scopes. Instead it assumes the code is fairly
1506
// simple - as wasm2js output is - and looks at all the minifiable names as
1507
// a whole. A possible bug here is something like
1508
//
1509
// function instantiate(wasmImports, wasmMemory, wasmTable) {
1510
// var x = foo;
1511
// function asmFunc(global, env, buffer) {
1512
// var foo = 10;
1513
//
1514
// Here foo is declared in an inner scope, and the outer use of foo looks
1515
// to the global scope. The analysis here only thinks something is from the
1516
// global scope if it is not in any var or function declaration. In practice,
1517
// the globals used from wasm2js output are things like Int8Array that we
1518
// don't declare as locals, but we should probably have a fully scope-aware
1519
// analysis here. FIXME
1520
1521
// We must run on a singleton instantiate() function as described above.
1522
assert(
1523
ast.type === 'Program' &&
1524
ast.body.length === 1 &&
1525
ast.body[0].type === 'FunctionDeclaration' &&
1526
ast.body[0].id.name === 'instantiate',
1527
);
1528
const fun = ast.body[0];
1529
1530
// Swap the function's name away so that we can then minify everything else.
1531
const funId = fun.id;
1532
fun.id = null;
1533
1534
// Find all the declarations.
1535
const declared = new Set();
1536
1537
// Some identifiers must be left as they are and not minified.
1538
const ignore = new Set();
1539
1540
simpleWalk(fun, {
1541
FunctionDeclaration(node) {
1542
if (node.id) {
1543
declared.add(node.id.name);
1544
}
1545
for (const param of node.params) {
1546
declared.add(param.name);
1547
}
1548
},
1549
FunctionExpression(node) {
1550
for (const param of node.params) {
1551
declared.add(param.name);
1552
}
1553
},
1554
VariableDeclaration(node) {
1555
for (const decl of node.declarations) {
1556
declared.add(decl.id.name);
1557
}
1558
},
1559
MemberExpression(node) {
1560
// In x.a we must not minify a. However, for x[a] we must.
1561
if (!node.computed) {
1562
ignore.add(node.property);
1563
}
1564
},
1565
});
1566
1567
// TODO: find names to avoid, that are not declared (should not happen in
1568
// wasm2js output)
1569
1570
// Minify the names.
1571
let nextMinifiedName = 0;
1572
1573
function getNewMinifiedName() {
1574
ensureMinifiedNames(nextMinifiedName);
1575
return minifiedNames[nextMinifiedName++];
1576
}
1577
1578
const minified = new Map();
1579
1580
function minify(name) {
1581
if (!minified.has(name)) {
1582
minified.set(name, getNewMinifiedName());
1583
}
1584
assert(minified.get(name));
1585
return minified.get(name);
1586
}
1587
1588
// Start with the declared things in the lowest indices. Things like HEAP8
1589
// can have very high use counts.
1590
for (const name of declared) {
1591
minify(name);
1592
}
1593
1594
// Minify all globals in function chunks, i.e. not seen here, but will be in
1595
// the minifyLocals work on functions.
1596
for (const name of extraInfo.globals) {
1597
declared.add(name);
1598
minify(name);
1599
}
1600
1601
// Replace the names with their minified versions.
1602
simpleWalk(fun, {
1603
Identifier(node) {
1604
if (declared.has(node.name) && !ignore.has(node)) {
1605
node.name = minify(node.name);
1606
}
1607
},
1608
});
1609
1610
// Restore the name
1611
fun.id = funId;
1612
1613
// Emit the metadata
1614
const json = {};
1615
for (const x of minified.entries()) json[x[0]] = x[1];
1616
1617
suffix = '// EXTRA_INFO:' + JSON.stringify(json);
1618
}
1619
1620
// Utilities
1621
1622
function reattachComments(ast, commentsMap) {
1623
const symbols = [];
1624
1625
// Collect all code symbols
1626
ast.walk(
1627
new terser.TreeWalker((node) => {
1628
if (node.start?.pos) {
1629
symbols.push(node);
1630
}
1631
}),
1632
);
1633
1634
// Sort them by ascending line number
1635
symbols.sort((a, b) => a.start.pos - b.start.pos);
1636
1637
// Walk through all comments in ascending line number, and match each
1638
// comment to the appropriate code block.
1639
let j = 0;
1640
for (const [pos, comments] of Object.entries(commentsMap)) {
1641
while (j < symbols.length && symbols[j].start.pos < pos) {
1642
++j;
1643
}
1644
if (j >= symbols.length) {
1645
trace('dropping comments: no symbol comes after them');
1646
break;
1647
}
1648
if (symbols[j].start.pos != pos) {
1649
// This comment must have been associated with a node that still
1650
// exists in the AST, otherwise to drop it.
1651
trace('dropping comments: not linked to any remaining AST node');
1652
continue;
1653
}
1654
symbols[j].start.comments_before ??= [];
1655
for (const comment of comments) {
1656
trace('reattaching comment');
1657
symbols[j].start.comments_before.push(
1658
new terser.AST_Token(
1659
comment.type == 'Line' ? 'comment1' : 'comment2',
1660
comment.value,
1661
undefined,
1662
undefined,
1663
false,
1664
undefined,
1665
undefined,
1666
'0',
1667
),
1668
);
1669
}
1670
}
1671
}
1672
1673
// Main
1674
1675
let suffix = '';
1676
1677
const {
1678
values: {
1679
'closure-friendly': closureFriendly,
1680
'export-es6': exportES6,
1681
verbose,
1682
'no-print': noPrint,
1683
'minify-whitespace': minifyWhitespace,
1684
outfile,
1685
},
1686
positionals: [infile, ...passes],
1687
} = parseArgs({
1688
options: {
1689
'closure-friendly': {type: 'boolean'},
1690
'export-es6': {type: 'boolean'},
1691
verbose: {type: 'boolean'},
1692
'no-print': {type: 'boolean'},
1693
'minify-whitespace': {type: 'boolean'},
1694
outfile: {type: 'string', short: 'o'},
1695
},
1696
allowPositionals: true,
1697
});
1698
1699
function trace(...args) {
1700
if (verbose) {
1701
console.warn(...args);
1702
}
1703
}
1704
1705
// If enabled, output retains parentheses and comments so that the
1706
// output can further be passed out to Closure.
1707
1708
const input = read(infile);
1709
const extraInfoStart = input.lastIndexOf('// EXTRA_INFO:');
1710
let extraInfo = null;
1711
if (extraInfoStart > 0) {
1712
extraInfo = JSON.parse(input.slice(extraInfoStart + 14));
1713
}
1714
// Collect all JS code comments to this map so that we can retain them in the
1715
// outputted code if --closureFriendly was requested.
1716
const sourceComments = {};
1717
const params = {
1718
ecmaVersion: 'latest',
1719
sourceType: exportES6 ? 'module' : 'script',
1720
allowAwaitOutsideFunction: true,
1721
};
1722
if (closureFriendly) {
1723
const currentComments = [];
1724
Object.assign(params, {
1725
preserveParens: true,
1726
onToken(token) {
1727
// Associate comments with the start position of the next token.
1728
sourceComments[token.start] = currentComments.slice();
1729
currentComments.length = 0;
1730
},
1731
onComment: currentComments,
1732
});
1733
}
1734
1735
const registry = {
1736
JSDCE,
1737
AJSDCE,
1738
applyImportAndExportNameChanges,
1739
emitDCEGraph,
1740
applyDCEGraphRemovals,
1741
dump,
1742
littleEndianHeap,
1743
growableHeap,
1744
unsignPointers,
1745
minifyLocals,
1746
asanify,
1747
safeHeap,
1748
minifyGlobals,
1749
};
1750
1751
let ast;
1752
try {
1753
ast = acorn.parse(input, params);
1754
for (let pass of passes) {
1755
const resolvedPass = registry[pass];
1756
assert(resolvedPass, `unknown optimizer pass: ${pass}`);
1757
resolvedPass(ast);
1758
}
1759
} catch (err) {
1760
if (err.loc) {
1761
err.message +=
1762
'\n' +
1763
`${input.split(acorn.lineBreak)[err.loc.line - 1]}\n` +
1764
`${' '.repeat(err.loc.column)}^ ${infile}:${err.loc.line}:${err.loc.column + 1}`;
1765
}
1766
throw err;
1767
}
1768
1769
if (!noPrint) {
1770
const terserAst = terser.AST_Node.from_mozilla_ast(ast);
1771
1772
if (closureFriendly) {
1773
reattachComments(terserAst, sourceComments);
1774
}
1775
1776
let output = terserAst.print_to_string({
1777
beautify: !minifyWhitespace,
1778
indent_level: minifyWhitespace ? 0 : 2,
1779
keep_quoted_props: closureFriendly, // for closure
1780
wrap_func_args: false, // don't add extra braces
1781
comments: true, // for closure as well
1782
shorthand: true, // Use object literal shorthand notation
1783
});
1784
1785
output += '\n';
1786
if (suffix) {
1787
output += suffix + '\n';
1788
}
1789
1790
if (outfile) {
1791
fs.writeFileSync(outfile, output);
1792
} else {
1793
// Simply using `fs.writeFileSync` on `process.stdout` has issues with
1794
// large amount of data. It can cause:
1795
// Error: EAGAIN: resource temporarily unavailable, write
1796
process.stdout.write(output);
1797
}
1798
}
1799
1800