Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
emscripten-core
GitHub Repository: emscripten-core/emscripten
Path: blob/main/tools/emsymbolizer.py
6170 views
1
#!/usr/bin/env python3
2
3
# This is a utility for looking up the symbol names and/or file+line numbers
4
# of code addresses. There are several possible sources of this information,
5
# with varying granularity (listed here in approximate preference order).
6
7
# If the wasm has DWARF info, llvm-symbolizer can show the symbol, file, and
8
# line/column number, potentially including inlining.
9
# If the wasm has separate DWARF info, do the above with the side file
10
# If there is a source map, we can parse it to get file and line number.
11
# If there is an emscripten symbol map, we can use that to get the symbol name
12
# If there is a name section or symbol table, llvm-symbolizer can show the
13
# symbol name.
14
# Separate DWARF is not supported yet.
15
16
import argparse
17
import json
18
import os
19
import re
20
import subprocess
21
import sys
22
from dataclasses import dataclass
23
24
__scriptdir__ = os.path.dirname(os.path.abspath(__file__))
25
__rootdir__ = os.path.dirname(__scriptdir__)
26
sys.path.insert(0, __rootdir__)
27
28
from tools import shared, utils, webassembly
29
30
LLVM_SYMBOLIZER = shared.llvm_tool_path('llvm-symbolizer')
31
32
33
class Error(BaseException):
34
pass
35
36
37
# Class to treat location info in a uniform way across information sources.
38
@dataclass
39
class LocationInfo:
40
source: str | None = None
41
line: int = 0
42
column: int = 0
43
func: str | None = None
44
45
def print(self):
46
source = self.source if self.source else '??'
47
func = self.func if self.func else '??'
48
print(f'{func}\n{source}:{self.line}:{self.column}')
49
50
51
def get_codesec_offset(module):
52
sec = module.get_section(webassembly.SecType.CODE)
53
if not sec:
54
raise Error(f'No code section found in {module.filename}')
55
return sec.offset
56
57
58
def has_debug_line_section(module):
59
return module.get_custom_section('.debug_line') is not None
60
61
62
def has_name_section(module):
63
return module.get_custom_section('name') is not None
64
65
66
def has_linking_section(module):
67
return module.get_custom_section('linking') is not None
68
69
70
def symbolize_address_symbolizer(module, address, is_dwarf):
71
if is_dwarf:
72
vma_adjust = get_codesec_offset(module)
73
else:
74
vma_adjust = 0
75
cmd = [LLVM_SYMBOLIZER, '-e', module.filename, f'--adjust-vma={vma_adjust}',
76
str(address)]
77
if shared.DEBUG:
78
print(f'Running {" ".join(cmd)}')
79
out = utils.run_process(cmd, stdout=subprocess.PIPE).stdout.strip()
80
out_lines = out.splitlines()
81
82
# Source location regex, e.g., /abc/def.c:3:5
83
SOURCE_LOC_RE = re.compile(r'(.+):(\d+):(\d+)$')
84
# llvm-symbolizer prints two lines per location. The first line contains a
85
# function name, and the second contains a source location like
86
# '/abc/def.c:3:5'. If the function or source info is not available, it will
87
# be printed as '??', in which case we store None. If the line and column info
88
# is not available, they will be printed as 0, which we store as is.
89
infos = []
90
for i in range(0, len(out_lines), 2):
91
func, loc_str = out_lines[i], out_lines[i + 1]
92
m = SOURCE_LOC_RE.match(loc_str)
93
source, line, column = m.group(1), m.group(2), m.group(3)
94
if func == '??':
95
func = None
96
if source == '??':
97
source = None
98
infos.append(LocationInfo(source, line, column, func))
99
return infos
100
101
102
def get_sourceMappingURL_section(module):
103
for sec in module.sections():
104
if sec.name == "sourceMappingURL":
105
return sec
106
return None
107
108
109
class WasmSourceMap:
110
@dataclass
111
class Location:
112
source: str | None = None
113
line: int = 0
114
column: int = 0
115
func: str | None = None
116
117
def __init__(self):
118
self.version = None
119
self.sources = []
120
self.funcs = []
121
self.mappings = {}
122
self.offsets = []
123
124
def parse(self, filename):
125
with open(filename) as f:
126
source_map_json = json.loads(f.read())
127
if shared.DEBUG:
128
print(source_map_json)
129
130
self.version = source_map_json['version']
131
self.sources = source_map_json['sources']
132
self.funcs = source_map_json['names']
133
134
chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/='
135
vlq_map = {c: i for i, c in enumerate(chars)}
136
137
def decodeVLQ(string):
138
result = []
139
shift = 0
140
value = 0
141
for c in string:
142
try:
143
integer = vlq_map[c]
144
except ValueError as e:
145
raise Error(f'Invalid character ({c}) in VLQ') from e
146
value += (integer & 31) << shift
147
if integer & 32:
148
shift += 5
149
else:
150
negate = value & 1
151
value >>= 1
152
result.append(-value if negate else value)
153
value = shift = 0
154
return result
155
156
offset = 0
157
src = 0
158
line = 1
159
col = 1
160
func = 0
161
for segment in source_map_json['mappings'].split(','):
162
data = decodeVLQ(segment)
163
info = []
164
165
offset += data[0]
166
if len(data) >= 2:
167
src += data[1]
168
info.append(src)
169
if len(data) >= 3:
170
line += data[2]
171
info.append(line)
172
if len(data) >= 4:
173
col += data[3]
174
info.append(col)
175
if len(data) == 5:
176
func += data[4]
177
info.append(func)
178
179
self.mappings[offset] = WasmSourceMap.Location(*info)
180
self.offsets.append(offset)
181
self.offsets.sort()
182
183
def find_offset(self, offset, lower_bound=None):
184
# Find the largest mapped offset <= the search offset
185
lo = 0
186
hi = len(self.offsets)
187
188
while lo < hi:
189
mid = (lo + hi) // 2
190
if self.offsets[mid] > offset:
191
hi = mid
192
else:
193
lo = mid + 1
194
if lo == 0:
195
return None
196
# If lower bound is given, return the offset only if the offset is equal to
197
# or greater than the lower bound
198
if lower_bound:
199
if self.offsets[lo - 1] >= lower_bound:
200
return self.offsets[lo - 1]
201
else:
202
return None
203
else:
204
return self.offsets[lo - 1]
205
206
def lookup(self, offset, lower_bound=None):
207
nearest = self.find_offset(offset, lower_bound)
208
if not nearest:
209
return None
210
info = self.mappings[nearest]
211
return LocationInfo(
212
self.sources[info.source] if info.source is not None else None,
213
info.line,
214
info.column,
215
self.funcs[info.func] if info.func is not None else None,
216
)
217
218
219
def symbolize_address_sourcemap(module, address, force_file):
220
URL = force_file
221
if not URL:
222
# If a sourcemap file is not forced, read it from the wasm module
223
# TODO: support stripping/replacing a prefix from the URL
224
URL = module.get_sourceMappingURL()
225
226
if shared.DEBUG:
227
print(f'Source Mapping URL: {URL}')
228
sm = WasmSourceMap()
229
sm.parse(URL)
230
if shared.DEBUG:
231
csoff = get_codesec_offset(module)
232
print(sm.mappings)
233
# Print with section offsets to easily compare against dwarf
234
for k, v in sm.mappings.items():
235
print(f'{k - csoff:x}: {v}')
236
return sm.lookup(address)
237
238
239
def symbolize_address_symbolmap(module, address, symbol_map_file):
240
"""Symbolize using a symbol map file."""
241
func_names = {}
242
243
def split_symbolmap_line(line):
244
assert ':' in line, f'invalid symbolmap line: {line}'
245
return line.split(':', 1)
246
247
with open(symbol_map_file) as f:
248
lines = f.read().splitlines()
249
for line in lines:
250
index, name = split_symbolmap_line(line)
251
func_names[int(index)] = name
252
253
func_index = -1
254
for i, func in module.iter_functions_by_index():
255
if shared.DEBUG:
256
print(f'Func {i}: {hex(func.offset)}, {func_names[i]}')
257
if func.offset > address:
258
if i > 0:
259
func_index = i - 1
260
break
261
else:
262
print("Address is before the first function")
263
return
264
265
return LocationInfo(func=func_names[func_index])
266
267
268
def main(args):
269
with webassembly.Module(args.wasm_file) as module:
270
base = 16 if args.address.lower().startswith('0x') else 10
271
address = int(args.address, base)
272
273
if args.addrtype == 'code':
274
address += get_codesec_offset(module)
275
276
def print_loc(loc):
277
if isinstance(loc, list):
278
for l in loc:
279
l.print()
280
else:
281
loc.print()
282
283
if ((has_debug_line_section(module) and not args.source) or
284
'dwarf' in args.source):
285
print_loc(symbolize_address_symbolizer(module, address, is_dwarf=True))
286
elif ((get_sourceMappingURL_section(module) and not args.source) or
287
'sourcemap' in args.source):
288
print_loc(symbolize_address_sourcemap(module, address, args.file))
289
elif ((has_name_section(module) and not args.source) or
290
'names' in args.source):
291
print_loc(symbolize_address_symbolizer(module, address, is_dwarf=False))
292
elif ((has_linking_section(module) and not args.source) or
293
'symtab' in args.source):
294
print_loc(symbolize_address_symbolizer(module, address, is_dwarf=False))
295
elif (args.source == 'symbolmap'):
296
print_loc(symbolize_address_symbolmap(module, address, args.file))
297
else:
298
raise Error('No .debug_line or sourceMappingURL section found in '
299
f'{module.filename}.'
300
" I don't know how to symbolize this file yet")
301
302
303
def get_args():
304
parser = argparse.ArgumentParser()
305
parser.add_argument('-s', '--source', choices=['dwarf', 'sourcemap',
306
'names', 'symtab', 'symbolmap'],
307
help='Force debug info source type', default=())
308
parser.add_argument('-f', '--file', action='store',
309
help='Force debug info source file')
310
parser.add_argument('-t', '--addrtype', choices=['code', 'file'],
311
default='file',
312
help='Address type (code section or file offset)')
313
parser.add_argument('-v', '--verbose', action='store_true',
314
help='Print verbose info for debugging this script')
315
parser.add_argument('wasm_file', help='Wasm file')
316
parser.add_argument('address', help='Address to lookup')
317
args = parser.parse_args()
318
if args.verbose:
319
shared.PRINT_SUBPROCS = 1
320
shared.DEBUG = True
321
return args
322
323
324
if __name__ == '__main__':
325
try:
326
rv = main(get_args())
327
except (Error, webassembly.InvalidWasmError, OSError) as e:
328
print(f'{sys.argv[0]}: {str(e)}', file=sys.stderr)
329
rv = 1
330
sys.exit(rv)
331
332