Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
emscripten-core
GitHub Repository: emscripten-core/emscripten
Path: blob/main/tools/wasm-sourcemap.py
4128 views
1
#!/usr/bin/env python3
2
# Copyright 2018 The Emscripten Authors. All rights reserved.
3
# Emscripten is available under two separate licenses, the MIT license and the
4
# University of Illinois/NCSA Open Source License. Both these licenses can be
5
# found in the LICENSE file.
6
7
"""Utility tools that extracts DWARF information encoded in a wasm output
8
produced by the LLVM tools, and encodes it as a wasm source map. Additionally,
9
it can collect original sources, change files prefixes, and strip debug
10
sections from a wasm file.
11
"""
12
13
import argparse
14
import json
15
import logging
16
from math import floor, log
17
import os
18
import re
19
from subprocess import Popen, PIPE
20
from pathlib import Path
21
import sys
22
23
__scriptdir__ = os.path.dirname(os.path.abspath(__file__))
24
__rootdir__ = os.path.dirname(__scriptdir__)
25
sys.path.insert(0, __rootdir__)
26
27
from tools import utils
28
from tools.system_libs import DETERMINISTIC_PREFIX
29
from tools.shared import path_from_root
30
31
EMSCRIPTEN_PREFIX = utils.normalize_path(path_from_root())
32
33
logger = logging.getLogger('wasm-sourcemap')
34
35
36
def parse_args():
37
parser = argparse.ArgumentParser(prog='wasm-sourcemap.py', description=__doc__)
38
parser.add_argument('wasm', help='wasm file')
39
parser.add_argument('-o', '--output', help='output source map')
40
parser.add_argument('-p', '--prefix', nargs='*', help='replace source debug filename prefix for source map', default=[])
41
parser.add_argument('-s', '--sources', action='store_true', help='read and embed source files from file system into source map')
42
parser.add_argument('-l', '--load-prefix', nargs='*', help='replace source debug filename prefix for reading sources from file system (see also --sources)', default=[])
43
parser.add_argument('-w', nargs='?', help='set output wasm file')
44
parser.add_argument('-x', '--strip', action='store_true', help='removes debug and linking sections')
45
parser.add_argument('-u', '--source-map-url', nargs='?', help='specifies sourceMappingURL section contest')
46
parser.add_argument('--dwarfdump', help="path to llvm-dwarfdump executable")
47
parser.add_argument('--dwarfdump-output', nargs='?', help=argparse.SUPPRESS)
48
parser.add_argument('--basepath', help='base path for source files, which will be relative to this')
49
return parser.parse_args()
50
51
52
class Prefixes:
53
def __init__(self, args, base_path=None, preserve_deterministic_prefix=True):
54
prefixes = []
55
for p in args:
56
if '=' in p:
57
prefix, replacement = p.split('=')
58
prefixes.append({'prefix': utils.normalize_path(prefix), 'replacement': replacement})
59
else:
60
prefixes.append({'prefix': utils.normalize_path(p), 'replacement': ''})
61
self.base_path = utils.normalize_path(base_path) if base_path is not None else None
62
self.preserve_deterministic_prefix = preserve_deterministic_prefix
63
self.prefixes = prefixes
64
self.cache = {}
65
66
def resolve(self, name):
67
if name in self.cache:
68
return self.cache[name]
69
70
source = name
71
if not self.preserve_deterministic_prefix and name.startswith(DETERMINISTIC_PREFIX):
72
source = EMSCRIPTEN_PREFIX + utils.removeprefix(name, DETERMINISTIC_PREFIX)
73
74
provided = False
75
for p in self.prefixes:
76
if source.startswith(p['prefix']):
77
source = p['replacement'] + utils.removeprefix(source, p['prefix'])
78
provided = True
79
break
80
81
# If prefixes were provided, we use that; otherwise if base_path is set, we
82
# emit a relative path. For files with deterministic prefix, we never use
83
# a relative path, precisely to preserve determinism, and because it would
84
# still point to the wrong location, so we leave the filepath untouched to
85
# let users map it to the proper location using prefix options.
86
if not (source.startswith(DETERMINISTIC_PREFIX) or provided or self.base_path is None):
87
try:
88
source = os.path.relpath(source, self.base_path)
89
except ValueError:
90
source = os.path.abspath(source)
91
source = utils.normalize_path(source)
92
93
self.cache[name] = source
94
return source
95
96
97
# SourceMapPrefixes contains resolver for file names that are:
98
# - "sources" is for names that output to source maps JSON
99
# - "load" is for paths that used to load source text
100
class SourceMapPrefixes:
101
def __init__(self, sources, load, base_path):
102
self.sources = Prefixes(sources, base_path=base_path)
103
self.load = Prefixes(load, preserve_deterministic_prefix=False)
104
105
106
def encode_vlq(n):
107
VLQ_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
108
x = (n << 1) if n >= 0 else ((-n << 1) + 1)
109
result = ""
110
while x > 31:
111
result = result + VLQ_CHARS[32 + (x & 31)]
112
x = x >> 5
113
return result + VLQ_CHARS[x]
114
115
116
def read_var_uint(wasm, pos):
117
n = 0
118
shift = 0
119
b = ord(wasm[pos:pos + 1])
120
pos = pos + 1
121
while b >= 128:
122
n = n | ((b - 128) << shift)
123
b = ord(wasm[pos:pos + 1])
124
pos = pos + 1
125
shift += 7
126
return n + (b << shift), pos
127
128
129
def strip_debug_sections(wasm):
130
logger.debug('Strip debug sections')
131
pos = 8
132
stripped = wasm[:pos]
133
134
while pos < len(wasm):
135
section_start = pos
136
section_id, pos_ = read_var_uint(wasm, pos)
137
section_size, section_body = read_var_uint(wasm, pos_)
138
pos = section_body + section_size
139
if section_id == 0:
140
name_len, name_pos = read_var_uint(wasm, section_body)
141
name_end = name_pos + name_len
142
name = wasm[name_pos:name_end]
143
if name in {'linking', 'sourceMappingURL'} or name.startswith(('reloc..debug_', '.debug_')):
144
continue # skip debug related sections
145
stripped = stripped + wasm[section_start:pos]
146
147
return stripped
148
149
150
def encode_uint_var(n):
151
result = bytearray()
152
while n > 127:
153
result.append(128 | (n & 127))
154
n = n >> 7
155
result.append(n)
156
return bytes(result)
157
158
159
def append_source_mapping(wasm, url):
160
logger.debug('Append sourceMappingURL section')
161
section_name = "sourceMappingURL"
162
section_content = encode_uint_var(len(section_name)) + section_name.encode() + encode_uint_var(len(url)) + url.encode()
163
return wasm + encode_uint_var(0) + encode_uint_var(len(section_content)) + section_content
164
165
166
def get_code_section_offset(wasm):
167
logger.debug('Read sections index')
168
pos = 8
169
170
while pos < len(wasm):
171
section_id, pos_ = read_var_uint(wasm, pos)
172
section_size, pos = read_var_uint(wasm, pos_)
173
if section_id == 10:
174
return pos
175
pos = pos + section_size
176
177
178
def remove_dead_entries(entries):
179
# Remove entries for dead functions. It is a heuristics to ignore data if the
180
# function starting address near to 0 (is equal to its size field length).
181
block_start = 0
182
cur_entry = 0
183
while cur_entry < len(entries):
184
if not entries[cur_entry]['eos']:
185
cur_entry += 1
186
continue
187
fn_start = entries[block_start]['address']
188
# Calculate the LEB encoded function size (including size field)
189
fn_size_length = floor(log(entries[cur_entry]['address'] - fn_start + 1, 128)) + 1
190
min_live_offset = 1 + fn_size_length # 1 byte is for code section entries
191
if fn_start < min_live_offset:
192
# Remove dead code debug info block.
193
del entries[block_start:cur_entry + 1]
194
cur_entry = block_start
195
continue
196
cur_entry += 1
197
block_start = cur_entry
198
199
200
# Given a string that has non-ASCII UTF-8 bytes 128-255 stored as octal sequences (\200 - \377), decode
201
# the sequences back to UTF-8. E.g. "C:\\\303\244 \303\266\\emsdk\\emscripten\\main" -> "C:\\ä ö\\emsdk\\emscripten\\main"
202
def decode_octal_encoded_utf8(str):
203
out = bytearray(len(str))
204
i = 0
205
o = 0
206
final_length = len(str)
207
in_escape = False
208
while i < len(str):
209
if not in_escape and str[i] == '\\' and (str[i + 1] == '2' or str[i + 1] == '3'):
210
out[o] = int(str[i + 1:i + 4], 8)
211
i += 4
212
final_length -= 3
213
in_escape = False
214
else:
215
out[o] = ord(str[i])
216
in_escape = False if in_escape else (str[i] == '\\')
217
i += 1
218
o += 1
219
return out[:final_length].decode('utf-8')
220
221
222
def extract_comp_dir_map(text):
223
map_stmt_list_to_comp_dir = {}
224
chunks = re.split(r"0x[0-9a-f]*: DW_TAG_compile_unit", text)
225
for chunk in chunks[1:]:
226
stmt_list_match = re.search(r"DW_AT_stmt_list\s+\((0x[0-9a-f]*)\)", chunk)
227
if stmt_list_match is not None:
228
stmt_list = stmt_list_match.group(1)
229
comp_dir_match = re.search(r"DW_AT_comp_dir\s+\(\"([^\"]+)\"\)", chunk)
230
comp_dir = decode_octal_encoded_utf8(comp_dir_match.group(1)) if comp_dir_match is not None else ''
231
map_stmt_list_to_comp_dir[stmt_list] = comp_dir
232
return map_stmt_list_to_comp_dir
233
234
235
def read_dwarf_entries(wasm, options):
236
if options.dwarfdump_output:
237
output = Path(options.dwarfdump_output).read_bytes()
238
elif options.dwarfdump:
239
logger.debug('Reading DWARF information from %s' % wasm)
240
if not os.path.exists(options.dwarfdump):
241
logger.error('llvm-dwarfdump not found: ' + options.dwarfdump)
242
sys.exit(1)
243
process = Popen([options.dwarfdump, '-debug-info', '-debug-line', '--recurse-depth=0', wasm], stdout=PIPE)
244
output, err = process.communicate()
245
exit_code = process.wait()
246
if exit_code != 0:
247
logger.error('Error during llvm-dwarfdump execution (%s)' % exit_code)
248
sys.exit(1)
249
else:
250
logger.error('Please specify either --dwarfdump or --dwarfdump-output')
251
sys.exit(1)
252
253
entries = []
254
debug_line_chunks = re.split(r"debug_line\[(0x[0-9a-f]*)\]", output.decode('utf-8'))
255
map_stmt_list_to_comp_dir = extract_comp_dir_map(debug_line_chunks[0])
256
for stmt_list, line_chunk in zip(debug_line_chunks[1::2], debug_line_chunks[2::2]):
257
comp_dir = map_stmt_list_to_comp_dir.get(stmt_list, '')
258
259
# include_directories[ 1] = "/Users/yury/Work/junk/sqlite-playground/src"
260
# file_names[ 1]:
261
# name: "playground.c"
262
# dir_index: 1
263
# mod_time: 0x00000000
264
# length: 0x00000000
265
#
266
# Address Line Column File ISA Discriminator Flags
267
# ------------------ ------ ------ ------ --- ------------- -------------
268
# 0x0000000000000006 22 0 1 0 0 is_stmt
269
# 0x0000000000000007 23 10 1 0 0 is_stmt prologue_end
270
# 0x000000000000000f 23 3 1 0 0
271
# 0x0000000000000010 23 3 1 0 0 end_sequence
272
# 0x0000000000000011 28 0 1 0 0 is_stmt
273
274
include_directories = {'0': comp_dir}
275
for dir in re.finditer(r"include_directories\[\s*(\d+)\] = \"([^\"]*)", line_chunk):
276
include_directories[dir.group(1)] = os.path.join(comp_dir, decode_octal_encoded_utf8(dir.group(2)))
277
278
files = {}
279
for file in re.finditer(r"file_names\[\s*(\d+)\]:\s+name: \"([^\"]*)\"\s+dir_index: (\d+)", line_chunk):
280
dir = include_directories[file.group(3)]
281
file_path = os.path.join(dir, decode_octal_encoded_utf8(file.group(2)))
282
files[file.group(1)] = file_path
283
284
for line in re.finditer(r"\n0x([0-9a-f]+)\s+(\d+)\s+(\d+)\s+(\d+)(.*?end_sequence)?", line_chunk):
285
entry = {'address': int(line.group(1), 16), 'line': int(line.group(2)), 'column': int(line.group(3)), 'file': files[line.group(4)], 'eos': line.group(5) is not None}
286
if not entry['eos']:
287
entries.append(entry)
288
else:
289
# move end of function to the last END operator
290
entry['address'] -= 1
291
if entries[-1]['address'] == entry['address']:
292
# last entry has the same address, reusing
293
entries[-1]['eos'] = True
294
else:
295
entries.append(entry)
296
297
remove_dead_entries(entries)
298
299
# return entries sorted by the address field
300
return sorted(entries, key=lambda entry: entry['address'])
301
302
303
def build_sourcemap(entries, code_section_offset, options):
304
base_path = options.basepath
305
collect_sources = options.sources
306
prefixes = SourceMapPrefixes(options.prefix, options.load_prefix, base_path)
307
308
sources = []
309
sources_content = []
310
mappings = []
311
sources_map = {}
312
last_address = 0
313
last_source_id = 0
314
last_line = 1
315
last_column = 1
316
317
for entry in entries:
318
line = entry['line']
319
column = entry['column']
320
# ignore entries with line 0
321
if line == 0:
322
continue
323
# start at least at column 1
324
if column == 0:
325
column = 1
326
327
address = entry['address'] + code_section_offset
328
file_name = utils.normalize_path(entry['file'])
329
source_name = prefixes.sources.resolve(file_name)
330
331
if source_name not in sources_map:
332
source_id = len(sources)
333
sources_map[source_name] = source_id
334
sources.append(source_name)
335
if collect_sources:
336
load_name = prefixes.load.resolve(file_name)
337
try:
338
with open(load_name) as infile:
339
source_content = infile.read()
340
sources_content.append(source_content)
341
except OSError:
342
print('Failed to read source: %s' % load_name)
343
sources_content.append(None)
344
else:
345
source_id = sources_map[source_name]
346
347
address_delta = address - last_address
348
source_id_delta = source_id - last_source_id
349
line_delta = line - last_line
350
column_delta = column - last_column
351
mappings.append(encode_vlq(address_delta) + encode_vlq(source_id_delta) + encode_vlq(line_delta) + encode_vlq(column_delta))
352
last_address = address
353
last_source_id = source_id
354
last_line = line
355
last_column = column
356
357
return {'version': 3,
358
'sources': sources,
359
'sourcesContent': sources_content,
360
'names': [],
361
'mappings': ','.join(mappings)}
362
363
364
def main():
365
options = parse_args()
366
367
wasm_input = options.wasm
368
with open(wasm_input, 'rb') as infile:
369
wasm = infile.read()
370
371
entries = read_dwarf_entries(wasm_input, options)
372
373
code_section_offset = get_code_section_offset(wasm)
374
375
logger.debug('Saving to %s' % options.output)
376
map = build_sourcemap(entries, code_section_offset, options)
377
with open(options.output, 'w', encoding='utf-8') as outfile:
378
json.dump(map, outfile, separators=(',', ':'), ensure_ascii=False)
379
380
if options.strip:
381
wasm = strip_debug_sections(wasm)
382
383
if options.source_map_url:
384
wasm = append_source_mapping(wasm, options.source_map_url)
385
386
if options.w:
387
logger.debug('Saving wasm to %s' % options.w)
388
with open(options.w, 'wb') as outfile:
389
outfile.write(wasm)
390
391
logger.debug('Done')
392
return 0
393
394
395
if __name__ == '__main__':
396
logging.basicConfig(level=logging.DEBUG if os.environ.get('EMCC_DEBUG') else logging.INFO)
397
sys.exit(main())
398
399