Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
emscripten-core
GitHub Repository: emscripten-core/emscripten
Path: blob/main/tools/webassembly.py
6161 views
1
# Copyright 2011 The Emscripten Authors. All rights reserved.
2
# Emscripten is available under two separate licenses, the MIT license and the
3
# University of Illinois/NCSA Open Source License. Both these licenses can be
4
# found in the LICENSE file.
5
6
"""Utilities for manipulating WebAssembly binaries from python.
7
"""
8
9
import logging
10
import os
11
import sys
12
from collections import namedtuple
13
from enum import IntEnum
14
from functools import wraps
15
16
from . import utils
17
from .utils import memoize
18
19
sys.path.append(utils.path_from_root('third_party'))
20
21
import leb128
22
23
logger = logging.getLogger('webassembly')
24
25
WASM_PAGE_SIZE = 65536
26
27
MAGIC = b'\0asm'
28
29
VERSION = b'\x01\0\0\0'
30
31
HEADER_SIZE = 8
32
33
LIMITS_HAS_MAX = 0x1
34
35
SEG_PASSIVE = 0x1
36
37
PREFIX_MATH = 0xfc
38
PREFIX_THREADS = 0xfe
39
PREFIX_SIMD = 0xfd
40
41
SYMBOL_BINDING_MASK = 0x3
42
SYMBOL_BINDING_GLOBAL = 0x0
43
SYMBOL_BINDING_WEAK = 0x1
44
SYMBOL_BINDING_LOCAL = 0x2
45
46
47
def to_leb(num):
48
return leb128.u.encode(num)
49
50
51
def read_uleb(iobuf):
52
return leb128.u.decode_reader(iobuf)[0]
53
54
55
def read_sleb(iobuf):
56
return leb128.i.decode_reader(iobuf)[0]
57
58
59
def once(method):
60
61
@wraps(method)
62
def helper(self, *args, **kwargs):
63
key = method
64
if key not in self._cache:
65
self._cache[key] = method(self, *args, **kwargs)
66
67
return helper
68
69
70
class Type(IntEnum):
71
I32 = 0x7f # -0x1
72
I64 = 0x7e # -0x2
73
F32 = 0x7d # -0x3
74
F64 = 0x7c # -0x4
75
V128 = 0x7b # -0x5
76
FUNCREF = 0x70 # -0x10
77
EXTERNREF = 0x6f # -0x11
78
EXNREF = 0x69 # -0x17
79
VOID = 0x40 # -0x40
80
81
82
class OpCode(IntEnum):
83
NOP = 0x01
84
BLOCK = 0x02
85
END = 0x0b
86
BR = 0x0c
87
BR_TABLE = 0x0e
88
CALL = 0x10
89
DROP = 0x1a
90
LOCAL_GET = 0x20
91
LOCAL_SET = 0x21
92
LOCAL_TEE = 0x22
93
GLOBAL_GET = 0x23
94
GLOBAL_SET = 0x24
95
RETURN = 0x0f
96
I32_CONST = 0x41
97
I64_CONST = 0x42
98
F32_CONST = 0x43
99
F64_CONST = 0x44
100
I32_ADD = 0x6a
101
I64_ADD = 0x7c
102
REF_NULL = 0xd0
103
ATOMIC_PREFIX = 0xfe
104
MEMORY_PREFIX = 0xfc
105
106
107
class MemoryOpCode(IntEnum):
108
MEMORY_INIT = 0x08
109
MEMORY_DROP = 0x09
110
MEMORY_COPY = 0x0a
111
MEMORY_FILL = 0x0b
112
113
114
class AtomicOpCode(IntEnum):
115
ATOMIC_NOTIFY = 0x00
116
ATOMIC_WAIT32 = 0x01
117
ATOMIC_WAIT64 = 0x02
118
ATOMIC_I32_STORE = 0x17
119
ATOMIC_I32_RMW_CMPXCHG = 0x48
120
121
122
class SecType(IntEnum):
123
CUSTOM = 0
124
TYPE = 1
125
IMPORT = 2
126
FUNCTION = 3
127
TABLE = 4
128
MEMORY = 5
129
TAG = 13
130
GLOBAL = 6
131
EXPORT = 7
132
START = 8
133
ELEM = 9
134
DATACOUNT = 12
135
CODE = 10
136
DATA = 11
137
138
139
class ExternType(IntEnum):
140
FUNC = 0
141
TABLE = 1
142
MEMORY = 2
143
GLOBAL = 3
144
TAG = 4
145
146
147
class DylinkType(IntEnum):
148
MEM_INFO = 1
149
NEEDED = 2
150
EXPORT_INFO = 3
151
IMPORT_INFO = 4
152
RUNTIME_PATH = 5
153
154
155
class TargetFeaturePrefix(IntEnum):
156
USED = 0x2b
157
DISALLOWED = 0x2d
158
159
160
class NameType(IntEnum):
161
MODULE = 0
162
FUNCTION = 1
163
LOCAL = 2
164
LABEL = 3
165
TYPE = 4
166
TABLE = 5
167
MEMORY = 6
168
GLOBAL = 7
169
ELEMSEGMENT = 8
170
DATASEGMENT = 9
171
FIELD = 10
172
TAG = 11
173
174
175
class InvalidWasmError(BaseException):
176
pass
177
178
179
Section = namedtuple('Section', ['type', 'size', 'offset', 'name'])
180
Limits = namedtuple('Limits', ['flags', 'initial', 'maximum'])
181
Import = namedtuple('Import', ['kind', 'module', 'field', 'type'])
182
Export = namedtuple('Export', ['name', 'kind', 'index'])
183
Global = namedtuple('Global', ['type', 'mutable', 'init'])
184
Dylink = namedtuple('Dylink', ['mem_size', 'mem_align', 'table_size', 'table_align', 'needed', 'export_info', 'import_info', 'runtime_paths'])
185
Table = namedtuple('Table', ['elem_type', 'limits'])
186
FunctionBody = namedtuple('FunctionBody', ['offset', 'size'])
187
DataSegment = namedtuple('DataSegment', ['flags', 'init', 'offset', 'size'])
188
FuncType = namedtuple('FuncType', ['params', 'returns'])
189
190
191
class Module:
192
"""Extremely minimal wasm module reader. Currently only used
193
for parsing the dylink section."""
194
def __init__(self, filename):
195
self.buf = None # Set this before FS calls below in case they throw.
196
self.filename = filename
197
self.size = os.path.getsize(filename)
198
self.buf = open(filename, 'rb')
199
magic = self.buf.read(4)
200
version = self.buf.read(4)
201
if magic != MAGIC or version != VERSION:
202
raise InvalidWasmError(f'{filename} is not a valid wasm file')
203
self._cache = {}
204
205
def __del__(self):
206
assert not self.buf, '`__exit__` should have already been called, please use context manager'
207
208
def __enter__(self):
209
return self
210
211
def __exit__(self, _exc_type, _exc_val, _exc_tb):
212
if self.buf:
213
self.buf.close()
214
self.buf = None
215
216
def read_at(self, offset, count):
217
self.buf.seek(offset)
218
return self.buf.read(count)
219
220
def read_byte(self):
221
return self.buf.read(1)[0]
222
223
def read_uleb(self):
224
return read_uleb(self.buf)
225
226
def read_sleb(self):
227
return read_sleb(self.buf)
228
229
def read_string(self):
230
size = self.read_uleb()
231
return self.buf.read(size).decode('utf-8')
232
233
def read_limits(self):
234
flags = self.read_byte()
235
initial = self.read_uleb()
236
maximum = 0
237
if flags & LIMITS_HAS_MAX:
238
maximum = self.read_uleb()
239
return Limits(flags, initial, maximum)
240
241
def read_type(self):
242
return Type(self.read_uleb())
243
244
def read_init(self):
245
code = []
246
while 1:
247
opcode = OpCode(self.read_byte())
248
args = []
249
match opcode:
250
case OpCode.GLOBAL_GET:
251
args.append(self.read_uleb())
252
case OpCode.I32_CONST | OpCode.I64_CONST:
253
args.append(self.read_sleb())
254
case OpCode.REF_NULL:
255
args.append(self.read_type())
256
case OpCode.END | OpCode.I32_ADD | OpCode.I64_ADD:
257
pass
258
case _:
259
raise Exception('unexpected opcode %s' % opcode)
260
code.append((opcode, args))
261
if opcode == OpCode.END:
262
break
263
return code
264
265
def seek(self, offset):
266
return self.buf.seek(offset)
267
268
def tell(self):
269
return self.buf.tell()
270
271
def skip(self, count):
272
self.buf.seek(count, os.SEEK_CUR)
273
274
def sections(self):
275
"""Generator that lazily returns sections from the wasm file."""
276
offset = HEADER_SIZE
277
while offset < self.size:
278
self.seek(offset)
279
section_type = SecType(self.read_byte())
280
section_size = self.read_uleb()
281
section_offset = self.buf.tell()
282
name = None
283
if section_type == SecType.CUSTOM:
284
name = self.read_string()
285
286
yield Section(section_type, section_size, section_offset, name)
287
offset = section_offset + section_size
288
289
@memoize
290
def get_types(self):
291
type_section = self.get_section(SecType.TYPE)
292
if not type_section:
293
return []
294
self.seek(type_section.offset)
295
num_types = self.read_uleb()
296
types = []
297
for _ in range(num_types):
298
type_form = self.read_byte()
299
assert type_form == 0x60
300
301
num_params = self.read_uleb()
302
params = [self.read_type() for _ in range(num_params)]
303
304
num_returns = self.read_uleb()
305
returns = [self.read_type() for _ in range(num_returns)]
306
307
types.append(FuncType(params, returns))
308
309
return types
310
311
@memoize
312
def parse_dylink_section(self):
313
dylink_section = next(self.sections())
314
assert dylink_section.type == SecType.CUSTOM
315
self.seek(dylink_section.offset)
316
# section name
317
needed = []
318
export_info = {}
319
import_info = {}
320
runtime_paths = []
321
self.read_string() # name
322
323
if dylink_section.name == 'dylink':
324
mem_size = self.read_uleb()
325
mem_align = self.read_uleb()
326
table_size = self.read_uleb()
327
table_align = self.read_uleb()
328
329
needed_count = self.read_uleb()
330
while needed_count:
331
libname = self.read_string()
332
needed.append(libname)
333
needed_count -= 1
334
elif dylink_section.name == 'dylink.0':
335
section_end = dylink_section.offset + dylink_section.size
336
while self.tell() < section_end:
337
subsection_type = self.read_uleb()
338
subsection_size = self.read_uleb()
339
end = self.tell() + subsection_size
340
match subsection_type:
341
case DylinkType.MEM_INFO:
342
mem_size = self.read_uleb()
343
mem_align = self.read_uleb()
344
table_size = self.read_uleb()
345
table_align = self.read_uleb()
346
case DylinkType.NEEDED:
347
needed_count = self.read_uleb()
348
while needed_count:
349
libname = self.read_string()
350
needed.append(libname)
351
needed_count -= 1
352
case DylinkType.EXPORT_INFO:
353
count = self.read_uleb()
354
while count:
355
sym = self.read_string()
356
flags = self.read_uleb()
357
export_info[sym] = flags
358
count -= 1
359
case DylinkType.IMPORT_INFO:
360
count = self.read_uleb()
361
while count:
362
module = self.read_string()
363
field = self.read_string()
364
flags = self.read_uleb()
365
import_info.setdefault(module, {})
366
import_info[module][field] = flags
367
count -= 1
368
case DylinkType.RUNTIME_PATH:
369
count = self.read_uleb()
370
while count:
371
rpath = self.read_string()
372
runtime_paths.append(rpath)
373
count -= 1
374
case _:
375
print(f'unknown subsection: {subsection_type}')
376
# ignore unknown subsections
377
self.skip(subsection_size)
378
assert self.tell() == end
379
else:
380
utils.exit_with_error('error parsing shared library')
381
382
return Dylink(mem_size, mem_align, table_size, table_align, needed, export_info, import_info, runtime_paths)
383
384
@memoize
385
def get_exports(self):
386
export_section = self.get_section(SecType.EXPORT)
387
if not export_section:
388
return []
389
390
self.seek(export_section.offset)
391
num_exports = self.read_uleb()
392
exports = []
393
for _ in range(num_exports):
394
name = self.read_string()
395
kind = ExternType(self.read_byte())
396
index = self.read_uleb()
397
exports.append(Export(name, kind, index))
398
399
return exports
400
401
@memoize
402
def get_imports(self):
403
import_section = self.get_section(SecType.IMPORT)
404
if not import_section:
405
return []
406
407
self.seek(import_section.offset)
408
num_imports = self.read_uleb()
409
imports = []
410
for _ in range(num_imports):
411
mod = self.read_string()
412
field = self.read_string()
413
kind = ExternType(self.read_byte())
414
type_ = None
415
match kind:
416
case ExternType.FUNC:
417
type_ = self.read_uleb()
418
case ExternType.GLOBAL:
419
type_ = self.read_sleb()
420
self.read_byte() # mutable
421
case ExternType.MEMORY:
422
self.read_limits() # limits
423
case ExternType.TABLE:
424
type_ = self.read_sleb()
425
self.read_limits() # limits
426
case ExternType.TAG:
427
self.read_byte() # attribute
428
type_ = self.read_uleb()
429
case _:
430
raise AssertionError()
431
imports.append(Import(kind, mod, field, type_))
432
433
return imports
434
435
@memoize
436
def get_globals(self):
437
global_section = self.get_section(SecType.GLOBAL)
438
if not global_section:
439
return []
440
globls = []
441
self.seek(global_section.offset)
442
num_globals = self.read_uleb()
443
for _ in range(num_globals):
444
global_type = self.read_type()
445
mutable = self.read_byte()
446
init = self.read_init()
447
globls.append(Global(global_type, mutable, init))
448
return globls
449
450
@memoize
451
def get_start(self):
452
start_section = self.get_section(SecType.START)
453
if not start_section:
454
return None
455
self.seek(start_section.offset)
456
return self.read_uleb()
457
458
@memoize
459
def get_functions(self):
460
code_section = self.get_section(SecType.CODE)
461
if not code_section:
462
return []
463
functions = []
464
self.seek(code_section.offset)
465
num_functions = self.read_uleb()
466
for _ in range(num_functions):
467
body_size = self.read_uleb()
468
start = self.tell()
469
functions.append(FunctionBody(start, body_size))
470
self.seek(start + body_size)
471
return functions
472
473
def get_section(self, section_code):
474
return next((s for s in self.sections() if s.type == section_code), None)
475
476
@memoize
477
def get_custom_section(self, name):
478
for section in self.sections():
479
if section.type == SecType.CUSTOM and section.name == name:
480
return section
481
return None
482
483
@memoize
484
def get_segments(self):
485
segments = []
486
data_section = self.get_section(SecType.DATA)
487
self.seek(data_section.offset)
488
num_segments = self.read_uleb()
489
for _ in range(num_segments):
490
flags = self.read_uleb()
491
if (flags & SEG_PASSIVE):
492
init = None
493
else:
494
init = self.read_init()
495
size = self.read_uleb()
496
offset = self.tell()
497
segments.append(DataSegment(flags, init, offset, size))
498
self.seek(offset + size)
499
return segments
500
501
@memoize
502
def get_tables(self):
503
table_section = self.get_section(SecType.TABLE)
504
if not table_section:
505
return []
506
507
self.seek(table_section.offset)
508
num_tables = self.read_uleb()
509
tables = []
510
for _ in range(num_tables):
511
elem_type = self.read_type()
512
limits = self.read_limits()
513
tables.append(Table(elem_type, limits))
514
515
return tables
516
517
@memoize
518
def get_function_types(self):
519
function_section = self.get_section(SecType.FUNCTION)
520
if not function_section:
521
return []
522
523
self.seek(function_section.offset)
524
num_types = self.read_uleb()
525
return [self.read_uleb() for _ in range(num_types)]
526
527
@memoize
528
def get_function_names(self, remove_imports=True):
529
num_funcs = self.num_imported_funcs() + len(self.get_functions())
530
names = [None] * num_funcs
531
532
name_section = self.get_custom_section('name')
533
if not name_section:
534
return names
535
536
self.seek(name_section.offset)
537
self.read_string() # section name
538
section_end = name_section.offset + name_section.size
539
540
while self.tell() < section_end:
541
subsection_id = self.read_byte()
542
subsection_size = self.read_uleb()
543
if subsection_id == 1: # function names
544
count = self.read_uleb()
545
for _ in range(count):
546
func_idx = self.read_uleb()
547
func_name = self.read_string()
548
assert func_idx < len(names)
549
names[func_idx] = func_name
550
else:
551
self.skip(subsection_size)
552
553
return names[self.num_imported_funcs():] if remove_imports else names
554
555
def has_name_section(self):
556
return self.get_custom_section('name') is not None
557
558
@once
559
def _calc_indexes(self):
560
self.imports_by_kind = {}
561
for i in self.get_imports():
562
self.imports_by_kind.setdefault(i.kind, [])
563
self.imports_by_kind[i.kind].append(i)
564
565
def num_imported_funcs(self):
566
self._calc_indexes()
567
return len(self.imports_by_kind.get(ExternType.FUNC, []))
568
569
def num_imported_globals(self):
570
self._calc_indexes()
571
return len(self.imports_by_kind.get(ExternType.GLOBAL, []))
572
573
def get_function(self, idx):
574
self._calc_indexes()
575
assert idx >= self.num_imported_funcs()
576
return self.get_functions()[idx - self.num_imported_funcs()]
577
578
def iter_functions_by_index(self):
579
self._calc_indexes()
580
for idx in range(self.num_imported_funcs(),
581
self.num_imported_funcs() + len(self.get_functions())):
582
yield idx, self.get_function(idx)
583
584
def get_global(self, idx):
585
self._calc_indexes()
586
assert idx >= self.num_imported_globals()
587
return self.get_globals()[idx - self.num_imported_globals()]
588
589
def get_function_type(self, idx):
590
self._calc_indexes()
591
if idx < self.num_imported_funcs():
592
imp = self.imports_by_kind[ExternType.FUNC][idx]
593
func_type = imp.type
594
else:
595
func_type = self.get_function_types()[idx - self.num_imported_funcs()]
596
return self.get_types()[func_type]
597
598
@memoize
599
def get_target_features(self):
600
section = self.get_custom_section('target_features')
601
if not section:
602
return {}
603
self.seek(section.offset)
604
assert self.read_string() == 'target_features'
605
features = {}
606
self.read_byte() # ignore feature count
607
while self.tell() < section.offset + section.size:
608
prefix = TargetFeaturePrefix(self.read_byte())
609
feature = self.read_string()
610
features[feature] = prefix
611
return features
612
613
@memoize
614
def get_sourceMappingURL(self):
615
section = self.get_custom_section('sourceMappingURL')
616
if not section:
617
return ''
618
self.seek(section.offset)
619
self.read_string() # 'sourceMappingURL'
620
return self.read_string()
621
622
623
def parse_dylink_section(wasm_file):
624
with Module(wasm_file) as module:
625
return module.parse_dylink_section()
626
627
628
def get_exports(wasm_file):
629
with Module(wasm_file) as module:
630
return module.get_exports()
631
632
633
def get_imports(wasm_file):
634
with Module(wasm_file) as module:
635
return module.get_imports()
636
637
638
def get_weak_imports(wasm_file):
639
weak_imports = []
640
dylink_sec = parse_dylink_section(wasm_file)
641
for symbols in dylink_sec.import_info.values():
642
for symbol, flags in symbols.items():
643
if flags & SYMBOL_BINDING_MASK == SYMBOL_BINDING_WEAK:
644
weak_imports.append(symbol)
645
return weak_imports
646
647