Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
emscripten-core
GitHub Repository: emscripten-core/emscripten
Path: blob/main/emsymbolizer.py
4091 views
1
#!/usr/bin/env python3
2
3
# This is a utility for looking up the symbol names and/or file+line numbers
4
# of code addresses. There are several possible sources of this information,
5
# with varying granularity (listed here in approximate preference order).
6
7
# If the wasm has DWARF info, llvm-symbolizer can show the symbol, file, and
8
# line/column number, potentially including inlining.
9
# If the wasm has separate DWARF info, do the above with the side file
10
# If there is a source map, we can parse it to get file and line number.
11
# If there is an emscripten symbol map, we can use that to get the symbol name
12
# If there is a name section or symbol table, llvm-symbolizer can show the
13
# symbol name.
14
# Separate DWARF is not supported yet.
15
16
import argparse
17
import json
18
import re
19
import subprocess
20
import sys
21
from tools import shared
22
from tools import webassembly
23
24
25
LLVM_SYMBOLIZER = shared.llvm_tool_path('llvm-symbolizer')
26
27
28
class Error(BaseException):
29
pass
30
31
32
# Class to treat location info in a uniform way across information sources.
33
class LocationInfo:
34
def __init__(self, source=None, line=0, column=0, func=None):
35
self.source = source
36
self.line = line
37
self.column = column
38
self.func = func
39
40
def print(self):
41
source = self.source if self.source else '??'
42
func = self.func if self.func else '??'
43
print(f'{func}\n{source}:{self.line}:{self.column}')
44
45
46
def get_codesec_offset(module):
47
sec = module.get_section(webassembly.SecType.CODE)
48
if not sec:
49
raise Error(f'No code section found in {module.filename}')
50
return sec.offset
51
52
53
def has_debug_line_section(module):
54
return module.get_custom_section('.debug_line') is not None
55
56
57
def has_name_section(module):
58
return module.get_custom_section('name') is not None
59
60
61
def has_linking_section(module):
62
return module.get_custom_section('linking') is not None
63
64
65
def symbolize_address_symbolizer(module, address, is_dwarf):
66
if is_dwarf:
67
vma_adjust = get_codesec_offset(module)
68
else:
69
vma_adjust = 0
70
cmd = [LLVM_SYMBOLIZER, '-e', module.filename, f'--adjust-vma={vma_adjust}',
71
str(address)]
72
if shared.DEBUG:
73
print(f'Running {" ".join(cmd)}')
74
out = shared.run_process(cmd, stdout=subprocess.PIPE).stdout.strip()
75
out_lines = out.splitlines()
76
77
# Source location regex, e.g., /abc/def.c:3:5
78
SOURCE_LOC_RE = re.compile(r'(.+):(\d+):(\d+)$')
79
# llvm-symbolizer prints two lines per location. The first line contains a
80
# function name, and the second contains a source location like
81
# '/abc/def.c:3:5'. If the function or source info is not available, it will
82
# be printed as '??', in which case we store None. If the line and column info
83
# is not available, they will be printed as 0, which we store as is.
84
for i in range(0, len(out_lines), 2):
85
func, loc_str = out_lines[i], out_lines[i + 1]
86
m = SOURCE_LOC_RE.match(loc_str)
87
source, line, column = m.group(1), m.group(2), m.group(3)
88
if func == '??':
89
func = None
90
if source == '??':
91
source = None
92
LocationInfo(source, line, column, func).print()
93
94
95
def get_sourceMappingURL_section(module):
96
for sec in module.sections():
97
if sec.name == "sourceMappingURL":
98
return sec
99
return None
100
101
102
class WasmSourceMap:
103
class Location:
104
def __init__(self, source=None, line=0, column=0, func=None):
105
self.source = source
106
self.line = line
107
self.column = column
108
self.func = func
109
110
def __init__(self):
111
self.version = None
112
self.sources = []
113
self.mappings = {}
114
self.offsets = []
115
116
def parse(self, filename):
117
with open(filename) as f:
118
source_map_json = json.loads(f.read())
119
if shared.DEBUG:
120
print(source_map_json)
121
122
self.version = source_map_json['version']
123
self.sources = source_map_json['sources']
124
125
chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/='
126
vlq_map = {c: i for i, c in enumerate(chars)}
127
128
def decodeVLQ(string):
129
result = []
130
shift = 0
131
value = 0
132
for c in string:
133
try:
134
integer = vlq_map[c]
135
except ValueError as e:
136
raise Error(f'Invalid character ({c}) in VLQ') from e
137
value += (integer & 31) << shift
138
if integer & 32:
139
shift += 5
140
else:
141
negate = value & 1
142
value >>= 1
143
result.append(-value if negate else value)
144
value = shift = 0
145
return result
146
147
offset = 0
148
src = 0
149
line = 1
150
col = 1
151
for segment in source_map_json['mappings'].split(','):
152
data = decodeVLQ(segment)
153
info = []
154
155
offset += data[0]
156
if len(data) >= 2:
157
src += data[1]
158
info.append(src)
159
if len(data) >= 3:
160
line += data[2]
161
info.append(line)
162
if len(data) >= 4:
163
col += data[3]
164
info.append(col)
165
# TODO: see if we need the name, which is the next field (data[4])
166
167
self.mappings[offset] = WasmSourceMap.Location(*info)
168
self.offsets.append(offset)
169
self.offsets.sort()
170
171
def find_offset(self, offset):
172
# Find the largest mapped offset <= the search offset
173
lo = 0
174
hi = len(self.offsets)
175
176
while lo < hi:
177
mid = (lo + hi) // 2
178
if self.offsets[mid] > offset:
179
hi = mid
180
else:
181
lo = mid + 1
182
return self.offsets[lo - 1]
183
184
def lookup(self, offset):
185
nearest = self.find_offset(offset)
186
assert nearest in self.mappings, 'Sourcemap has an offset with no mapping'
187
info = self.mappings[nearest]
188
return LocationInfo(
189
self.sources[info.source] if info.source is not None else None,
190
info.line,
191
info.column,
192
)
193
194
195
def symbolize_address_sourcemap(module, address, force_file):
196
URL = force_file
197
if not URL:
198
# If a sourcemap file is not forced, read it from the wasm module
199
section = get_sourceMappingURL_section(module)
200
assert section
201
module.seek(section.offset)
202
assert module.read_string() == 'sourceMappingURL'
203
# TODO: support stripping/replacing a prefix from the URL
204
URL = module.read_string()
205
206
if shared.DEBUG:
207
print(f'Source Mapping URL: {URL}')
208
sm = WasmSourceMap()
209
sm.parse(URL)
210
if shared.DEBUG:
211
csoff = get_codesec_offset(module)
212
print(sm.mappings)
213
# Print with section offsets to easily compare against dwarf
214
for k, v in sm.mappings.items():
215
print(f'{k - csoff:x}: {v}')
216
sm.lookup(address).print()
217
218
219
def symbolize_address_symbolmap(module, address, symbol_map_file):
220
"""Symbolize using a symbol map file."""
221
func_names = {}
222
223
with open(symbol_map_file) as f:
224
lines = f.read().splitlines()
225
for line in lines:
226
index, name = line.split(':')
227
func_names[int(index)] = name
228
229
func_index = -1
230
for i, func in module.iter_functions_by_index():
231
if shared.DEBUG:
232
print(f'Func {i}: {hex(func.offset)}, {func_names[i]}')
233
if func.offset > address:
234
if i > 0:
235
func_index = i - 1
236
break
237
else:
238
print("Address is before the first function")
239
return
240
241
LocationInfo(func=func_names[func_index]).print()
242
243
244
def main(args):
245
with webassembly.Module(args.wasm_file) as module:
246
base = 16 if args.address.lower().startswith('0x') else 10
247
address = int(args.address, base)
248
249
if args.addrtype == 'code':
250
address += get_codesec_offset(module)
251
252
if ((has_debug_line_section(module) and not args.source) or
253
'dwarf' in args.source):
254
symbolize_address_symbolizer(module, address, is_dwarf=True)
255
elif ((get_sourceMappingURL_section(module) and not args.source) or
256
'sourcemap' in args.source):
257
symbolize_address_sourcemap(module, address, args.file)
258
elif ((has_name_section(module) and not args.source) or
259
'names' in args.source):
260
symbolize_address_symbolizer(module, address, is_dwarf=False)
261
elif ((has_linking_section(module) and not args.source) or
262
'symtab' in args.source):
263
symbolize_address_symbolizer(module, address, is_dwarf=False)
264
elif (args.source == 'symbolmap'):
265
symbolize_address_symbolmap(module, address, args.file)
266
else:
267
raise Error('No .debug_line or sourceMappingURL section found in '
268
f'{module.filename}.'
269
" I don't know how to symbolize this file yet")
270
271
272
def get_args():
273
parser = argparse.ArgumentParser()
274
parser.add_argument('-s', '--source', choices=['dwarf', 'sourcemap',
275
'names', 'symtab', 'symbolmap'],
276
help='Force debug info source type', default=())
277
parser.add_argument('-f', '--file', action='store',
278
help='Force debug info source file')
279
parser.add_argument('-t', '--addrtype', choices=['code', 'file'],
280
default='file',
281
help='Address type (code section or file offset)')
282
parser.add_argument('-v', '--verbose', action='store_true',
283
help='Print verbose info for debugging this script')
284
parser.add_argument('wasm_file', help='Wasm file')
285
parser.add_argument('address', help='Address to lookup')
286
args = parser.parse_args()
287
if args.verbose:
288
shared.PRINT_SUBPROCS = 1
289
shared.DEBUG = True
290
return args
291
292
293
if __name__ == '__main__':
294
try:
295
rv = main(get_args())
296
except (Error, webassembly.InvalidWasmError, OSError) as e:
297
print(f'{sys.argv[0]}: {str(e)}', file=sys.stderr)
298
rv = 1
299
sys.exit(rv)
300
301