Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
emscripten-core
GitHub Repository: emscripten-core/emscripten
Path: blob/main/tools/webassembly.py
4128 views
1
# Copyright 2011 The Emscripten Authors. All rights reserved.
2
# Emscripten is available under two separate licenses, the MIT license and the
3
# University of Illinois/NCSA Open Source License. Both these licenses can be
4
# found in the LICENSE file.
5
6
"""Utilities for manipulating WebAssembly binaries from python.
7
"""
8
9
from collections import namedtuple
10
from enum import IntEnum
11
from functools import wraps
12
import logging
13
import os
14
import sys
15
16
from .utils import memoize
17
from . import utils
18
19
sys.path.append(utils.path_from_root('third_party'))
20
21
import leb128
22
23
logger = logging.getLogger('webassembly')
24
25
WASM_PAGE_SIZE = 65536
26
27
MAGIC = b'\0asm'
28
29
VERSION = b'\x01\0\0\0'
30
31
HEADER_SIZE = 8
32
33
LIMITS_HAS_MAX = 0x1
34
35
SEG_PASSIVE = 0x1
36
37
PREFIX_MATH = 0xfc
38
PREFIX_THREADS = 0xfe
39
PREFIX_SIMD = 0xfd
40
41
SYMBOL_BINDING_MASK = 0x3
42
SYMBOL_BINDING_GLOBAL = 0x0
43
SYMBOL_BINDING_WEAK = 0x1
44
SYMBOL_BINDING_LOCAL = 0x2
45
46
47
def to_leb(num):
48
return leb128.u.encode(num)
49
50
51
def read_uleb(iobuf):
52
return leb128.u.decode_reader(iobuf)[0]
53
54
55
def read_sleb(iobuf):
56
return leb128.i.decode_reader(iobuf)[0]
57
58
59
def once(method):
60
61
@wraps(method)
62
def helper(self, *args, **kwargs):
63
key = method
64
if key not in self._cache:
65
self._cache[key] = method(self, *args, **kwargs)
66
67
return helper
68
69
70
class Type(IntEnum):
71
I32 = 0x7f # -0x1
72
I64 = 0x7e # -0x2
73
F32 = 0x7d # -0x3
74
F64 = 0x7c # -0x4
75
V128 = 0x7b # -0x5
76
FUNCREF = 0x70 # -0x10
77
EXTERNREF = 0x6f # -0x11
78
EXNREF = 0x69 # -0x17
79
VOID = 0x40 # -0x40
80
81
82
class OpCode(IntEnum):
83
NOP = 0x01
84
BLOCK = 0x02
85
END = 0x0b
86
BR = 0x0c
87
BR_TABLE = 0x0e
88
CALL = 0x10
89
DROP = 0x1a
90
LOCAL_GET = 0x20
91
LOCAL_SET = 0x21
92
LOCAL_TEE = 0x22
93
GLOBAL_GET = 0x23
94
GLOBAL_SET = 0x24
95
RETURN = 0x0f
96
I32_CONST = 0x41
97
I64_CONST = 0x42
98
F32_CONST = 0x43
99
F64_CONST = 0x44
100
I32_ADD = 0x6a
101
I64_ADD = 0x7c
102
REF_NULL = 0xd0
103
ATOMIC_PREFIX = 0xfe
104
MEMORY_PREFIX = 0xfc
105
106
107
class MemoryOpCode(IntEnum):
108
MEMORY_INIT = 0x08
109
MEMORY_DROP = 0x09
110
MEMORY_COPY = 0x0a
111
MEMORY_FILL = 0x0b
112
113
114
class AtomicOpCode(IntEnum):
115
ATOMIC_NOTIFY = 0x00
116
ATOMIC_WAIT32 = 0x01
117
ATOMIC_WAIT64 = 0x02
118
ATOMIC_I32_STORE = 0x17
119
ATOMIC_I32_RMW_CMPXCHG = 0x48
120
121
122
class SecType(IntEnum):
123
CUSTOM = 0
124
TYPE = 1
125
IMPORT = 2
126
FUNCTION = 3
127
TABLE = 4
128
MEMORY = 5
129
TAG = 13
130
GLOBAL = 6
131
EXPORT = 7
132
START = 8
133
ELEM = 9
134
DATACOUNT = 12
135
CODE = 10
136
DATA = 11
137
138
139
class ExternType(IntEnum):
140
FUNC = 0
141
TABLE = 1
142
MEMORY = 2
143
GLOBAL = 3
144
TAG = 4
145
146
147
class DylinkType(IntEnum):
148
MEM_INFO = 1
149
NEEDED = 2
150
EXPORT_INFO = 3
151
IMPORT_INFO = 4
152
RUNTIME_PATH = 5
153
154
155
class TargetFeaturePrefix(IntEnum):
156
USED = 0x2b
157
DISALLOWED = 0x2d
158
159
160
class NameType(IntEnum):
161
MODULE = 0
162
FUNCTION = 1
163
LOCAL = 2
164
LABEL = 3
165
TYPE = 4
166
TABLE = 5
167
MEMORY = 6
168
GLOBAL = 7
169
ELEMSEGMENT = 8
170
DATASEGMENT = 9
171
FIELD = 10
172
TAG = 11
173
174
175
class InvalidWasmError(BaseException):
176
pass
177
178
179
Section = namedtuple('Section', ['type', 'size', 'offset', 'name'])
180
Limits = namedtuple('Limits', ['flags', 'initial', 'maximum'])
181
Import = namedtuple('Import', ['kind', 'module', 'field', 'type'])
182
Export = namedtuple('Export', ['name', 'kind', 'index'])
183
Global = namedtuple('Global', ['type', 'mutable', 'init'])
184
Dylink = namedtuple('Dylink', ['mem_size', 'mem_align', 'table_size', 'table_align', 'needed', 'export_info', 'import_info', 'runtime_paths'])
185
Table = namedtuple('Table', ['elem_type', 'limits'])
186
FunctionBody = namedtuple('FunctionBody', ['offset', 'size'])
187
DataSegment = namedtuple('DataSegment', ['flags', 'init', 'offset', 'size'])
188
FuncType = namedtuple('FuncType', ['params', 'returns'])
189
190
191
class Module:
192
"""Extremely minimal wasm module reader. Currently only used
193
for parsing the dylink section."""
194
def __init__(self, filename):
195
self.buf = None # Set this before FS calls below in case they throw.
196
self.filename = filename
197
self.size = os.path.getsize(filename)
198
self.buf = open(filename, 'rb')
199
magic = self.buf.read(4)
200
version = self.buf.read(4)
201
if magic != MAGIC or version != VERSION:
202
raise InvalidWasmError(f'{filename} is not a valid wasm file')
203
self._cache = {}
204
205
def __del__(self):
206
assert not self.buf, '`__exit__` should have already been called, please use context manager'
207
208
def __enter__(self):
209
return self
210
211
def __exit__(self, _exc_type, _exc_val, _exc_tb):
212
if self.buf:
213
self.buf.close()
214
self.buf = None
215
216
def read_at(self, offset, count):
217
self.buf.seek(offset)
218
return self.buf.read(count)
219
220
def read_byte(self):
221
return self.buf.read(1)[0]
222
223
def read_uleb(self):
224
return read_uleb(self.buf)
225
226
def read_sleb(self):
227
return read_sleb(self.buf)
228
229
def read_string(self):
230
size = self.read_uleb()
231
return self.buf.read(size).decode('utf-8')
232
233
def read_limits(self):
234
flags = self.read_byte()
235
initial = self.read_uleb()
236
maximum = 0
237
if flags & LIMITS_HAS_MAX:
238
maximum = self.read_uleb()
239
return Limits(flags, initial, maximum)
240
241
def read_type(self):
242
return Type(self.read_uleb())
243
244
def read_init(self):
245
code = []
246
while 1:
247
opcode = OpCode(self.read_byte())
248
args = []
249
if opcode == OpCode.GLOBAL_GET:
250
args.append(self.read_uleb())
251
elif opcode in (OpCode.I32_CONST, OpCode.I64_CONST):
252
args.append(self.read_sleb())
253
elif opcode in (OpCode.REF_NULL,):
254
args.append(self.read_type())
255
elif opcode in (OpCode.END, OpCode.I32_ADD, OpCode.I64_ADD):
256
pass
257
else:
258
raise Exception('unexpected opcode %s' % opcode)
259
code.append((opcode, args))
260
if opcode == OpCode.END:
261
break
262
return code
263
264
def seek(self, offset):
265
return self.buf.seek(offset)
266
267
def tell(self):
268
return self.buf.tell()
269
270
def skip(self, count):
271
self.buf.seek(count, os.SEEK_CUR)
272
273
def sections(self):
274
"""Generator that lazily returns sections from the wasm file."""
275
offset = HEADER_SIZE
276
while offset < self.size:
277
self.seek(offset)
278
section_type = SecType(self.read_byte())
279
section_size = self.read_uleb()
280
section_offset = self.buf.tell()
281
name = None
282
if section_type == SecType.CUSTOM:
283
name = self.read_string()
284
285
yield Section(section_type, section_size, section_offset, name)
286
offset = section_offset + section_size
287
288
@memoize
289
def get_types(self):
290
type_section = self.get_section(SecType.TYPE)
291
if not type_section:
292
return []
293
self.seek(type_section.offset)
294
num_types = self.read_uleb()
295
types = []
296
for _ in range(num_types):
297
type_form = self.read_byte()
298
assert type_form == 0x60
299
300
num_params = self.read_uleb()
301
params = [self.read_type() for _ in range(num_params)]
302
303
num_returns = self.read_uleb()
304
returns = [self.read_type() for _ in range(num_returns)]
305
306
types.append(FuncType(params, returns))
307
308
return types
309
310
@memoize
311
def parse_dylink_section(self):
312
dylink_section = next(self.sections())
313
assert dylink_section.type == SecType.CUSTOM
314
self.seek(dylink_section.offset)
315
# section name
316
needed = []
317
export_info = {}
318
import_info = {}
319
runtime_paths = []
320
self.read_string() # name
321
322
if dylink_section.name == 'dylink':
323
mem_size = self.read_uleb()
324
mem_align = self.read_uleb()
325
table_size = self.read_uleb()
326
table_align = self.read_uleb()
327
328
needed_count = self.read_uleb()
329
while needed_count:
330
libname = self.read_string()
331
needed.append(libname)
332
needed_count -= 1
333
elif dylink_section.name == 'dylink.0':
334
section_end = dylink_section.offset + dylink_section.size
335
while self.tell() < section_end:
336
subsection_type = self.read_uleb()
337
subsection_size = self.read_uleb()
338
end = self.tell() + subsection_size
339
if subsection_type == DylinkType.MEM_INFO:
340
mem_size = self.read_uleb()
341
mem_align = self.read_uleb()
342
table_size = self.read_uleb()
343
table_align = self.read_uleb()
344
elif subsection_type == DylinkType.NEEDED:
345
needed_count = self.read_uleb()
346
while needed_count:
347
libname = self.read_string()
348
needed.append(libname)
349
needed_count -= 1
350
elif subsection_type == DylinkType.EXPORT_INFO:
351
count = self.read_uleb()
352
while count:
353
sym = self.read_string()
354
flags = self.read_uleb()
355
export_info[sym] = flags
356
count -= 1
357
elif subsection_type == DylinkType.IMPORT_INFO:
358
count = self.read_uleb()
359
while count:
360
module = self.read_string()
361
field = self.read_string()
362
flags = self.read_uleb()
363
import_info.setdefault(module, {})
364
import_info[module][field] = flags
365
count -= 1
366
elif subsection_type == DylinkType.RUNTIME_PATH:
367
count = self.read_uleb()
368
while count:
369
rpath = self.read_string()
370
runtime_paths.append(rpath)
371
count -= 1
372
else:
373
print(f'unknown subsection: {subsection_type}')
374
# ignore unknown subsections
375
self.skip(subsection_size)
376
assert self.tell() == end
377
else:
378
utils.exit_with_error('error parsing shared library')
379
380
return Dylink(mem_size, mem_align, table_size, table_align, needed, export_info, import_info, runtime_paths)
381
382
@memoize
383
def get_exports(self):
384
export_section = self.get_section(SecType.EXPORT)
385
if not export_section:
386
return []
387
388
self.seek(export_section.offset)
389
num_exports = self.read_uleb()
390
exports = []
391
for _ in range(num_exports):
392
name = self.read_string()
393
kind = ExternType(self.read_byte())
394
index = self.read_uleb()
395
exports.append(Export(name, kind, index))
396
397
return exports
398
399
@memoize
400
def get_imports(self):
401
import_section = self.get_section(SecType.IMPORT)
402
if not import_section:
403
return []
404
405
self.seek(import_section.offset)
406
num_imports = self.read_uleb()
407
imports = []
408
for _ in range(num_imports):
409
mod = self.read_string()
410
field = self.read_string()
411
kind = ExternType(self.read_byte())
412
type_ = None
413
if kind == ExternType.FUNC:
414
type_ = self.read_uleb()
415
elif kind == ExternType.GLOBAL:
416
type_ = self.read_sleb()
417
self.read_byte() # mutable
418
elif kind == ExternType.MEMORY:
419
self.read_limits() # limits
420
elif kind == ExternType.TABLE:
421
type_ = self.read_sleb()
422
self.read_limits() # limits
423
elif kind == ExternType.TAG:
424
self.read_byte() # attribute
425
type_ = self.read_uleb()
426
else:
427
raise AssertionError()
428
imports.append(Import(kind, mod, field, type_))
429
430
return imports
431
432
@memoize
433
def get_globals(self):
434
global_section = self.get_section(SecType.GLOBAL)
435
if not global_section:
436
return []
437
globls = []
438
self.seek(global_section.offset)
439
num_globals = self.read_uleb()
440
for _ in range(num_globals):
441
global_type = self.read_type()
442
mutable = self.read_byte()
443
init = self.read_init()
444
globls.append(Global(global_type, mutable, init))
445
return globls
446
447
@memoize
448
def get_start(self):
449
start_section = self.get_section(SecType.START)
450
if not start_section:
451
return None
452
self.seek(start_section.offset)
453
return self.read_uleb()
454
455
@memoize
456
def get_functions(self):
457
code_section = self.get_section(SecType.CODE)
458
if not code_section:
459
return []
460
functions = []
461
self.seek(code_section.offset)
462
num_functions = self.read_uleb()
463
for _ in range(num_functions):
464
body_size = self.read_uleb()
465
start = self.tell()
466
functions.append(FunctionBody(start, body_size))
467
self.seek(start + body_size)
468
return functions
469
470
def get_section(self, section_code):
471
return next((s for s in self.sections() if s.type == section_code), None)
472
473
@memoize
474
def get_custom_section(self, name):
475
for section in self.sections():
476
if section.type == SecType.CUSTOM and section.name == name:
477
return section
478
return None
479
480
@memoize
481
def get_segments(self):
482
segments = []
483
data_section = self.get_section(SecType.DATA)
484
self.seek(data_section.offset)
485
num_segments = self.read_uleb()
486
for _ in range(num_segments):
487
flags = self.read_uleb()
488
if (flags & SEG_PASSIVE):
489
init = None
490
else:
491
init = self.read_init()
492
size = self.read_uleb()
493
offset = self.tell()
494
segments.append(DataSegment(flags, init, offset, size))
495
self.seek(offset + size)
496
return segments
497
498
@memoize
499
def get_tables(self):
500
table_section = self.get_section(SecType.TABLE)
501
if not table_section:
502
return []
503
504
self.seek(table_section.offset)
505
num_tables = self.read_uleb()
506
tables = []
507
for _ in range(num_tables):
508
elem_type = self.read_type()
509
limits = self.read_limits()
510
tables.append(Table(elem_type, limits))
511
512
return tables
513
514
@memoize
515
def get_function_types(self):
516
function_section = self.get_section(SecType.FUNCTION)
517
if not function_section:
518
return []
519
520
self.seek(function_section.offset)
521
num_types = self.read_uleb()
522
return [self.read_uleb() for _ in range(num_types)]
523
524
def has_name_section(self):
525
return self.get_custom_section('name') is not None
526
527
@once
528
def _calc_indexes(self):
529
self.imports_by_kind = {}
530
for i in self.get_imports():
531
self.imports_by_kind.setdefault(i.kind, [])
532
self.imports_by_kind[i.kind].append(i)
533
534
def num_imported_funcs(self):
535
self._calc_indexes()
536
return len(self.imports_by_kind.get(ExternType.FUNC, []))
537
538
def num_imported_globals(self):
539
self._calc_indexes()
540
return len(self.imports_by_kind.get(ExternType.GLOBAL, []))
541
542
def get_function(self, idx):
543
self._calc_indexes()
544
assert idx >= self.num_imported_funcs()
545
return self.get_functions()[idx - self.num_imported_funcs()]
546
547
def iter_functions_by_index(self):
548
self._calc_indexes()
549
for idx in range(self.num_imported_funcs(),
550
self.num_imported_funcs() + len(self.get_functions())):
551
yield idx, self.get_function(idx)
552
553
def get_global(self, idx):
554
self._calc_indexes()
555
assert idx >= self.num_imported_globals()
556
return self.get_globals()[idx - self.num_imported_globals()]
557
558
def get_function_type(self, idx):
559
self._calc_indexes()
560
if idx < self.num_imported_funcs():
561
imp = self.imports_by_kind[ExternType.FUNC][idx]
562
func_type = imp.type
563
else:
564
func_type = self.get_function_types()[idx - self.num_imported_funcs()]
565
return self.get_types()[func_type]
566
567
@memoize
568
def get_target_features(self):
569
section = self.get_custom_section('target_features')
570
if not section:
571
return {}
572
self.seek(section.offset)
573
assert self.read_string() == 'target_features'
574
features = {}
575
self.read_byte() # ignore feature count
576
while self.tell() < section.offset + section.size:
577
prefix = TargetFeaturePrefix(self.read_byte())
578
feature = self.read_string()
579
features[feature] = prefix
580
return features
581
582
583
def parse_dylink_section(wasm_file):
584
with Module(wasm_file) as module:
585
return module.parse_dylink_section()
586
587
588
def get_exports(wasm_file):
589
with Module(wasm_file) as module:
590
return module.get_exports()
591
592
593
def get_imports(wasm_file):
594
with Module(wasm_file) as module:
595
return module.get_imports()
596
597
598
def get_weak_imports(wasm_file):
599
weak_imports = []
600
dylink_sec = parse_dylink_section(wasm_file)
601
for symbols in dylink_sec.import_info.values():
602
for symbol, flags in symbols.items():
603
if flags & SYMBOL_BINDING_MASK == SYMBOL_BINDING_WEAK:
604
weak_imports.append(symbol)
605
return weak_imports
606
607