Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
emscripten-core
GitHub Repository: emscripten-core/emscripten
Path: blob/main/tools/js_optimizer.py
4128 views
1
#!/usr/bin/env python3
2
# Copyright 2012 The Emscripten Authors. All rights reserved.
3
# Emscripten is available under two separate licenses, the MIT license and the
4
# University of Illinois/NCSA Open Source License. Both these licenses can be
5
# found in the LICENSE file.
6
7
import os
8
import sys
9
import subprocess
10
import re
11
import json
12
import shutil
13
14
__scriptdir__ = os.path.dirname(os.path.abspath(__file__))
15
__rootdir__ = os.path.dirname(__scriptdir__)
16
sys.path.insert(0, __rootdir__)
17
18
from tools.toolchain_profiler import ToolchainProfiler
19
from tools.utils import path_from_root
20
from tools import building, config, shared, utils
21
22
temp_files = shared.get_temp_files()
23
24
25
ACORN_OPTIMIZER = path_from_root('tools/acorn-optimizer.mjs')
26
27
NUM_CHUNKS_PER_CORE = 3
28
MIN_CHUNK_SIZE = int(os.environ.get('EMCC_JSOPT_MIN_CHUNK_SIZE') or 512 * 1024) # configuring this is just for debugging purposes
29
MAX_CHUNK_SIZE = int(os.environ.get('EMCC_JSOPT_MAX_CHUNK_SIZE') or 5 * 1024 * 1024)
30
31
WINDOWS = sys.platform.startswith('win')
32
33
DEBUG = os.environ.get('EMCC_DEBUG')
34
35
func_sig = re.compile(r'function ([_\w$]+)\(')
36
func_sig_json = re.compile(r'\["defun", ?"([_\w$]+)",')
37
import_sig = re.compile(r'(var|const) ([_\w$]+ *=[^;]+);')
38
39
40
def get_acorn_cmd():
41
node = config.NODE_JS
42
if not any('--stack-size' in arg for arg in node):
43
# Use an 8Mb stack (rather than the ~1Mb default) when running the
44
# js optimizer since larger inputs can cause terser to use a lot of stack.
45
node.append('--stack-size=8192')
46
return node + [ACORN_OPTIMIZER]
47
48
49
def split_funcs(js):
50
# split properly even if there are no newlines,
51
# which is important for deterministic builds (as which functions
52
# are in each chunk may differ, so we need to split them up and combine
53
# them all together later and sort them deterministically)
54
parts = ['function ' + part for part in js.split('function ')[1:]]
55
funcs = []
56
for func in parts:
57
m = func_sig.search(func)
58
if not m:
59
continue
60
ident = m.group(1)
61
assert ident
62
funcs.append((ident, func))
63
return funcs
64
65
66
class Minifier:
67
"""minification support. We calculate minification of
68
globals here, then pass that into the parallel acorn-optimizer.mjs runners which
69
perform minification of locals.
70
"""
71
72
def __init__(self, js):
73
self.js = js
74
self.symbols_file = None
75
self.profiling_funcs = False
76
77
def minify_shell(self, shell, minify_whitespace):
78
# Run through acorn-optimizer.mjs to find and minify the global symbols
79
# We send it the globals, which it parses at the proper time. JS decides how
80
# to minify all global names, we receive a dictionary back, which is then
81
# used by the function processors
82
83
shell = shell.replace('0.0', '13371337') # avoid optimizer doing 0.0 => 0
84
85
# Find all globals in the JS functions code
86
87
if not self.profiling_funcs:
88
self.globs = [m.group(1) for m in func_sig.finditer(self.js)]
89
if len(self.globs) == 0:
90
self.globs = [m.group(1) for m in func_sig_json.finditer(self.js)]
91
else:
92
self.globs = []
93
94
with temp_files.get_file('.minifyglobals.js') as temp_file:
95
with open(temp_file, 'w') as f:
96
f.write(shell)
97
f.write('\n')
98
f.write('// EXTRA_INFO:' + json.dumps(self.serialize()))
99
100
cmd = get_acorn_cmd() + [temp_file, 'minifyGlobals']
101
if minify_whitespace:
102
cmd.append('--minify-whitespace')
103
output = shared.run_process(cmd, stdout=subprocess.PIPE).stdout
104
105
assert len(output) and not output.startswith('Assertion failed'), 'Error in js optimizer: ' + output
106
code, metadata = output.split('// EXTRA_INFO:')
107
self.globs = json.loads(metadata)
108
109
if self.symbols_file:
110
mapping = '\n'.join(f'{value}:{key}' for key, value in self.globs.items())
111
utils.write_file(self.symbols_file, mapping + '\n')
112
print('wrote symbol map file to', self.symbols_file, file=sys.stderr)
113
114
return code.replace('13371337', '0.0')
115
116
def serialize(self):
117
return {
118
'globals': self.globs,
119
}
120
121
122
start_funcs_marker = '// EMSCRIPTEN_START_FUNCS\n'
123
end_funcs_marker = '// EMSCRIPTEN_END_FUNCS\n'
124
start_asm_marker = '// EMSCRIPTEN_START_ASM\n'
125
end_asm_marker = '// EMSCRIPTEN_END_ASM\n'
126
127
128
# Given a set of functions of form (ident, text), and a preferred chunk size,
129
# generates a set of chunks for parallel processing and caching.
130
@ToolchainProfiler.profile()
131
def chunkify(funcs, chunk_size):
132
chunks = []
133
# initialize reasonably, the rest of the funcs we need to split out
134
curr = []
135
total_size = 0
136
for func in funcs:
137
curr_size = len(func[1])
138
if total_size + curr_size < chunk_size:
139
curr.append(func)
140
total_size += curr_size
141
else:
142
chunks.append(curr)
143
curr = [func]
144
total_size = curr_size
145
if curr:
146
chunks.append(curr)
147
curr = None
148
return [''.join(func[1] for func in chunk) for chunk in chunks] # remove function names
149
150
151
@ToolchainProfiler.profile_block('js_optimizer.run_on_file')
152
def run_on_file(filename, passes, extra_info=None):
153
with ToolchainProfiler.profile_block('js_optimizer.split_markers'):
154
if not isinstance(passes, list):
155
passes = [passes]
156
157
js = utils.read_file(filename)
158
if os.linesep != '\n':
159
js = js.replace(os.linesep, '\n') # we assume \n in the splitting code
160
161
# Find markers
162
start_funcs = js.find(start_funcs_marker)
163
end_funcs = js.rfind(end_funcs_marker)
164
165
if start_funcs < 0 or end_funcs < start_funcs:
166
shared.exit_with_error('invalid input file. Did not contain appropriate markers. (start_funcs: %s, end_funcs: %s' % (start_funcs, end_funcs))
167
168
minify_globals = 'minifyNames' in passes
169
if minify_globals:
170
passes = [p if p != 'minifyNames' else 'minifyLocals' for p in passes]
171
start_asm = js.find(start_asm_marker)
172
end_asm = js.rfind(end_asm_marker)
173
assert (start_asm >= 0) == (end_asm >= 0)
174
175
closure = 'closure' in passes
176
if closure:
177
passes = [p for p in passes if p != 'closure'] # we will do it manually
178
179
cleanup = 'cleanup' in passes
180
if cleanup:
181
passes = [p for p in passes if p != 'cleanup'] # we will do it manually
182
183
if not minify_globals:
184
with ToolchainProfiler.profile_block('js_optimizer.no_minify_globals'):
185
pre = js[:start_funcs + len(start_funcs_marker)]
186
post = js[end_funcs + len(end_funcs_marker):]
187
js = js[start_funcs + len(start_funcs_marker):end_funcs]
188
# can have Module[..] and inlining prevention code, push those to post
189
finals = []
190
191
def process(line):
192
if line and (line.startswith(('Module[', 'if (globalScope)')) or line.endswith('["X"]=1;')):
193
finals.append(line)
194
return False
195
return True
196
197
js = '\n'.join(line for line in js.split('\n') if process(line))
198
post = '\n'.join(finals) + '\n' + post
199
post = end_funcs_marker + post
200
else:
201
with ToolchainProfiler.profile_block('js_optimizer.minify_globals'):
202
# We need to split out the asm shell as well, for minification
203
pre = js[:start_asm + len(start_asm_marker)]
204
post = js[end_asm:]
205
asm_shell = js[start_asm + len(start_asm_marker):start_funcs + len(start_funcs_marker)] + '''
206
EMSCRIPTEN_FUNCS();
207
''' + js[end_funcs + len(end_funcs_marker):end_asm + len(end_asm_marker)]
208
js = js[start_funcs + len(start_funcs_marker):end_funcs]
209
210
# we assume there is a maximum of one new name per line
211
minifier = Minifier(js)
212
213
def check_symbol_mapping(p):
214
if p.startswith('symbolMap='):
215
minifier.symbols_file = p.split('=', 1)[1]
216
return False
217
if p == 'profilingFuncs':
218
minifier.profiling_funcs = True
219
return False
220
return True
221
222
passes = [p for p in passes if check_symbol_mapping(p)]
223
asm_shell_pre, asm_shell_post = minifier.minify_shell(asm_shell, '--minify-whitespace' in passes).split('EMSCRIPTEN_FUNCS();')
224
asm_shell_post = asm_shell_post.replace('});', '})')
225
pre += asm_shell_pre + '\n' + start_funcs_marker
226
post = end_funcs_marker + asm_shell_post + post
227
228
minify_info = minifier.serialize()
229
230
if extra_info:
231
for key, value in extra_info.items():
232
assert key not in minify_info or value == minify_info[key], [key, value, minify_info[key]]
233
minify_info[key] = value
234
235
# if DEBUG:
236
# print >> sys.stderr, 'minify info:', minify_info
237
238
with ToolchainProfiler.profile_block('js_optimizer.split'):
239
total_size = len(js)
240
funcs = split_funcs(js)
241
js = None
242
243
with ToolchainProfiler.profile_block('js_optimizer.split_to_chunks'):
244
# if we are making source maps, we want our debug numbering to start from the
245
# top of the file, so avoid breaking the JS into chunks
246
247
intended_num_chunks = round(shared.get_num_cores() * NUM_CHUNKS_PER_CORE)
248
chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks))
249
chunks = chunkify(funcs, chunk_size)
250
251
chunks = [chunk for chunk in chunks if chunk]
252
if DEBUG:
253
lengths = [len(c) for c in chunks]
254
if not lengths:
255
lengths = [0]
256
print('chunkification: num funcs:', len(funcs), 'actual num chunks:', len(chunks), 'chunk size range:', max(lengths), '-', min(lengths), file=sys.stderr)
257
funcs = None
258
259
serialized_extra_info = ''
260
if minify_globals:
261
assert not extra_info
262
serialized_extra_info += '// EXTRA_INFO:' + json.dumps(minify_info)
263
elif extra_info:
264
serialized_extra_info += '// EXTRA_INFO:' + json.dumps(extra_info)
265
with ToolchainProfiler.profile_block('js_optimizer.write_chunks'):
266
def write_chunk(chunk, i):
267
temp_file = temp_files.get('.jsfunc_%d.js' % i).name
268
utils.write_file(temp_file, chunk + serialized_extra_info)
269
return temp_file
270
filenames = [write_chunk(chunk, i) for i, chunk in enumerate(chunks)]
271
272
with ToolchainProfiler.profile_block('run_optimizer'):
273
commands = [get_acorn_cmd() + [f] + passes for f in filenames]
274
filenames = shared.run_multiple_processes(commands, route_stdout_to_temp_files_suffix='js_opt.jo.js')
275
276
with ToolchainProfiler.profile_block('split_closure_cleanup'):
277
if closure or cleanup:
278
# run on the shell code, everything but what we acorn-optimize
279
start_asm = '// EMSCRIPTEN_START_ASM\n'
280
end_asm = '// EMSCRIPTEN_END_ASM\n'
281
cl_sep = 'wakaUnknownBefore(); var asm=wakaUnknownAfter(wakaGlobal,wakaEnv,wakaBuffer)\n'
282
283
with temp_files.get_file('.cl.js') as cle:
284
pre_1, pre_2 = pre.split(start_asm)
285
post_1, post_2 = post.split(end_asm)
286
with open(cle, 'w') as f:
287
f.write(pre_1)
288
f.write(cl_sep)
289
f.write(post_2)
290
cld = cle
291
if closure:
292
if DEBUG:
293
print('running closure on shell code', file=sys.stderr)
294
cld = building.closure_compiler(cld, pretty='--minify-whitespace' not in passes)
295
temp_files.note(cld)
296
elif cleanup:
297
if DEBUG:
298
print('running cleanup on shell code', file=sys.stderr)
299
acorn_passes = ['JSDCE']
300
if '--minify-whitespace' in passes:
301
acorn_passes.append('--minify-whitespace')
302
cld = building.acorn_optimizer(cld, acorn_passes)
303
temp_files.note(cld)
304
coutput = utils.read_file(cld)
305
306
coutput = coutput.replace('wakaUnknownBefore();', start_asm)
307
after = 'wakaUnknownAfter'
308
start = coutput.find(after)
309
end = coutput.find(')', start)
310
# If the closure comment to suppress useless code is present, we need to look one
311
# brace past it, as the first is in there. Otherwise, the first brace is the
312
# start of the function body (what we want).
313
USELESS_CODE_COMMENT = '/** @suppress {uselessCode} */ '
314
USELESS_CODE_COMMENT_BODY = 'uselessCode'
315
brace = pre_2.find('{') + 1
316
has_useless_code_comment = False
317
if pre_2[brace:brace + len(USELESS_CODE_COMMENT_BODY)] == USELESS_CODE_COMMENT_BODY:
318
brace = pre_2.find('{', brace) + 1
319
has_useless_code_comment = True
320
pre = coutput[:start] + '(' + (USELESS_CODE_COMMENT if has_useless_code_comment else '') + 'function(global,env,buffer) {\n' + pre_2[brace:]
321
post = post_1 + end_asm + coutput[end + 1:]
322
323
filename += '.jo.js'
324
temp_files.note(filename)
325
326
with open(filename, 'w') as f:
327
with ToolchainProfiler.profile_block('write_pre'):
328
f.write(pre)
329
pre = None
330
331
with ToolchainProfiler.profile_block('sort_or_concat'):
332
# sort functions by size, to make diffing easier and to improve aot times
333
funcses = [split_funcs(utils.read_file(out_file)) for out_file in filenames]
334
funcs = [item for sublist in funcses for item in sublist]
335
funcses = None
336
if not os.environ.get('EMCC_NO_OPT_SORT'):
337
funcs.sort(key=lambda x: (len(x[1]), x[0]), reverse=True)
338
339
for func in funcs:
340
f.write(func[1])
341
funcs = None
342
343
with ToolchainProfiler.profile_block('write_post'):
344
f.write('\n')
345
f.write(post)
346
f.write('\n')
347
348
return filename
349
350
351
def main():
352
last = sys.argv[-1]
353
if '{' in last:
354
extra_info = json.loads(last)
355
sys.argv = sys.argv[:-1]
356
else:
357
extra_info = None
358
out = run_on_file(sys.argv[1], sys.argv[2:], extra_info=extra_info)
359
shutil.copyfile(out, sys.argv[1] + '.jsopt.js')
360
return 0
361
362
363
if __name__ == '__main__':
364
sys.exit(main())
365
366