Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
gteissier
GitHub Repository: gteissier/erl-matter
Path: blob/master/erlang.py
271 views
1
#!/usr/bin/env python
2
#-*-Mode:python;coding:utf-8;tab-width:4;c-basic-offset:4;indent-tabs-mode:()-*-
3
# ex: set ft=python fenc=utf-8 sts=4 ts=4 sw=4 et nomod:
4
#
5
# MIT License
6
#
7
# Copyright (c) 2011-2019 Michael Truog <mjtruog at protonmail dot com>
8
#
9
# Permission is hereby granted, free of charge, to any person obtaining a
10
# copy of this software and associated documentation files (the "Software"),
11
# to deal in the Software without restriction, including without limitation
12
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
13
# and/or sell copies of the Software, and to permit persons to whom the
14
# Software is furnished to do so, subject to the following conditions:
15
#
16
# The above copyright notice and this permission notice shall be included in
17
# all copies or substantial portions of the Software.
18
#
19
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25
# DEALINGS IN THE SOFTWARE.
26
#
27
"""
28
Erlang Binary Term Format Encoding/Decoding
29
"""
30
31
import sys
32
import struct
33
import zlib
34
import copy
35
36
if sys.version_info[0] >= 3:
37
TypeLong = int
38
TypeUnicode = str
39
def b_chr(integer):
40
"""
41
bytes chr function
42
"""
43
return bytes([integer])
44
def b_ord(character):
45
"""
46
bytes ord function
47
"""
48
return character
49
else:
50
TypeLong = long
51
TypeUnicode = unicode
52
def b_chr(integer):
53
"""
54
bytes chr function
55
"""
56
return chr(integer)
57
def b_ord(character):
58
"""
59
bytes ord function
60
"""
61
return ord(character)
62
63
__all__ = ['OtpErlangAtom',
64
'OtpErlangBinary',
65
'OtpErlangFunction',
66
'OtpErlangList',
67
'OtpErlangPid',
68
'OtpErlangPort',
69
'OtpErlangReference',
70
'binary_to_term',
71
'term_to_binary',
72
'InputException',
73
'OutputException',
74
'ParseException']
75
76
# tag values here http://www.erlang.org/doc/apps/erts/erl_ext_dist.html
77
_TAG_VERSION = 131
78
_TAG_COMPRESSED_ZLIB = 80
79
_TAG_NEW_FLOAT_EXT = 70
80
_TAG_BIT_BINARY_EXT = 77
81
_TAG_ATOM_CACHE_REF = 78
82
_TAG_SMALL_INTEGER_EXT = 97
83
_TAG_INTEGER_EXT = 98
84
_TAG_FLOAT_EXT = 99
85
_TAG_ATOM_EXT = 100
86
_TAG_REFERENCE_EXT = 101
87
_TAG_PORT_EXT = 102
88
_TAG_PID_EXT = 103
89
_TAG_SMALL_TUPLE_EXT = 104
90
_TAG_LARGE_TUPLE_EXT = 105
91
_TAG_NIL_EXT = 106
92
_TAG_STRING_EXT = 107
93
_TAG_LIST_EXT = 108
94
_TAG_BINARY_EXT = 109
95
_TAG_SMALL_BIG_EXT = 110
96
_TAG_LARGE_BIG_EXT = 111
97
_TAG_NEW_FUN_EXT = 112
98
_TAG_EXPORT_EXT = 113
99
_TAG_NEW_REFERENCE_EXT = 114
100
_TAG_SMALL_ATOM_EXT = 115
101
_TAG_MAP_EXT = 116
102
_TAG_FUN_EXT = 117
103
_TAG_ATOM_UTF8_EXT = 118
104
_TAG_SMALL_ATOM_UTF8_EXT = 119
105
106
# Erlang term classes listed alphabetically
107
108
class OtpErlangAtom(object):
109
"""
110
OtpErlangAtom
111
"""
112
# pylint: disable=too-few-public-methods
113
def __init__(self, value):
114
self.value = value
115
def binary(self):
116
"""
117
return encoded representation
118
"""
119
if isinstance(self.value, int):
120
return b_chr(_TAG_ATOM_CACHE_REF) + b_chr(self.value)
121
elif isinstance(self.value, TypeUnicode):
122
value_encoded = self.value.encode('utf-8')
123
length = len(value_encoded)
124
if length <= 255:
125
return (
126
b_chr(_TAG_SMALL_ATOM_UTF8_EXT) +
127
b_chr(length) + value_encoded
128
)
129
elif length <= 65535:
130
return (
131
b_chr(_TAG_ATOM_UTF8_EXT) +
132
struct.pack(b'>H', length) + value_encoded
133
)
134
else:
135
raise OutputException('uint16 overflow')
136
elif isinstance(self.value, bytes):
137
length = len(self.value)
138
if length <= 255:
139
return b_chr(_TAG_SMALL_ATOM_EXT) + b_chr(length) + self.value
140
elif length <= 65535:
141
return (
142
b_chr(_TAG_ATOM_EXT) +
143
struct.pack(b'>H', length) + self.value
144
)
145
else:
146
raise OutputException('uint16 overflow')
147
else:
148
raise OutputException('unknown atom type')
149
def __repr__(self):
150
return '%s(%s)' % (self.__class__.__name__, repr(self.value))
151
def __hash__(self):
152
return hash(self.binary())
153
def __eq__(self, other):
154
return self.binary() == other.binary()
155
156
class OtpErlangBinary(object):
157
"""
158
OtpErlangBinary
159
"""
160
# pylint: disable=too-few-public-methods
161
def __init__(self, value, bits=8):
162
self.value = value
163
self.bits = bits # bits in last byte
164
def binary(self):
165
"""
166
return encoded representation
167
"""
168
if isinstance(self.value, bytes):
169
length = len(self.value)
170
if length > 4294967295:
171
raise OutputException('uint32 overflow')
172
elif self.bits != 8:
173
return (
174
b_chr(_TAG_BIT_BINARY_EXT) +
175
struct.pack(b'>I', length) +
176
b_chr(self.bits) + self.value
177
)
178
else:
179
return (
180
b_chr(_TAG_BINARY_EXT) +
181
struct.pack(b'>I', length) +
182
self.value
183
)
184
else:
185
raise OutputException('unknown binary type')
186
def __repr__(self):
187
return '%s(%s,bits=%s)' % (
188
self.__class__.__name__, repr(self.value), repr(self.bits)
189
)
190
def __hash__(self):
191
return hash(self.binary())
192
def __eq__(self, other):
193
return self.binary() == other.binary()
194
195
class OtpErlangFunction(object):
196
"""
197
OtpErlangFunction
198
"""
199
# pylint: disable=too-few-public-methods
200
def __init__(self, tag, value):
201
self.tag = tag
202
self.value = value
203
def binary(self):
204
"""
205
return encoded representation
206
"""
207
return b_chr(self.tag) + self.value
208
def __repr__(self):
209
return '%s(%s,%s)' % (
210
self.__class__.__name__,
211
repr(self.tag), repr(self.value)
212
)
213
def __hash__(self):
214
return hash(self.binary())
215
def __eq__(self, other):
216
return self.binary() == other.binary()
217
218
class OtpErlangList(object):
219
"""
220
OtpErlangList
221
"""
222
# pylint: disable=too-few-public-methods
223
def __init__(self, value, improper=False):
224
self.value = value
225
self.improper = improper # no empty list tail?
226
def binary(self):
227
"""
228
return encoded representation
229
"""
230
if isinstance(self.value, list):
231
length = len(self.value)
232
if length == 0:
233
return b_chr(_TAG_NIL_EXT)
234
elif length > 4294967295:
235
raise OutputException('uint32 overflow')
236
elif self.improper:
237
return (
238
b_chr(_TAG_LIST_EXT) +
239
struct.pack(b'>I', length - 1) +
240
b''.join([_term_to_binary(element)
241
for element in self.value])
242
)
243
else:
244
return (
245
b_chr(_TAG_LIST_EXT) +
246
struct.pack(b'>I', length) +
247
b''.join([_term_to_binary(element)
248
for element in self.value]) +
249
b_chr(_TAG_NIL_EXT)
250
)
251
else:
252
raise OutputException('unknown list type')
253
def __repr__(self):
254
return '%s(%s,improper=%s)' % (
255
self.__class__.__name__, repr(self.value), repr(self.improper)
256
)
257
def __hash__(self):
258
return hash(self.binary())
259
def __eq__(self, other):
260
return self.binary() == other.binary()
261
262
class OtpErlangPid(object):
263
"""
264
OtpErlangPid
265
"""
266
# pylint: disable=too-few-public-methods
267
def __init__(self, node, id_value, serial, creation):
268
# pylint: disable=invalid-name
269
self.node = node
270
self.id = id_value
271
self.serial = serial
272
self.creation = creation
273
def binary(self):
274
"""
275
return encoded representation
276
"""
277
return (
278
b_chr(_TAG_PID_EXT) +
279
self.node.binary() + self.id + self.serial + self.creation
280
)
281
def __repr__(self):
282
return '%s(%s,%s,%s,%s)' % (
283
self.__class__.__name__,
284
repr(self.node), repr(self.id), repr(self.serial),
285
repr(self.creation)
286
)
287
def __hash__(self):
288
return hash(self.binary())
289
def __eq__(self, other):
290
return self.binary() == other.binary()
291
292
class OtpErlangPort(object):
293
"""
294
OtpErlangPort
295
"""
296
# pylint: disable=too-few-public-methods
297
def __init__(self, node, id_value, creation):
298
# pylint: disable=invalid-name
299
self.node = node
300
self.id = id_value
301
self.creation = creation
302
def binary(self):
303
"""
304
return encoded representation
305
"""
306
return (
307
b_chr(_TAG_PORT_EXT) +
308
self.node.binary() + self.id + self.creation
309
)
310
def __repr__(self):
311
return '%s(%s,%s,%s)' % (
312
self.__class__.__name__,
313
repr(self.node), repr(self.id), repr(self.creation)
314
)
315
def __hash__(self):
316
return hash(self.binary())
317
def __eq__(self, other):
318
return self.binary() == other.binary()
319
320
class OtpErlangReference(object):
321
"""
322
OtpErlangReference
323
"""
324
# pylint: disable=too-few-public-methods
325
def __init__(self, node, id_value, creation):
326
# pylint: disable=invalid-name
327
self.node = node
328
self.id = id_value
329
self.creation = creation
330
def binary(self):
331
"""
332
return encoded representation
333
"""
334
length = len(self.id) / 4
335
if length == 0:
336
return (
337
b_chr(_TAG_REFERENCE_EXT) +
338
self.node.binary() + self.id + self.creation
339
)
340
elif length <= 65535:
341
return (
342
b_chr(_TAG_NEW_REFERENCE_EXT) +
343
struct.pack(b'>H', length) +
344
self.node.binary() + self.creation + self.id
345
)
346
else:
347
raise OutputException('uint16 overflow')
348
def __repr__(self):
349
return '%s(%s,%s,%s)' % (
350
self.__class__.__name__,
351
repr(self.node), repr(self.id), repr(self.creation)
352
)
353
def __hash__(self):
354
return hash(self.binary())
355
def __eq__(self, other):
356
return self.binary() == other.binary()
357
358
# dependency to support Erlang maps as map keys in python
359
360
class frozendict(dict):
361
"""
362
frozendict is under the PSF (Python Software Foundation) License
363
(from http://code.activestate.com/recipes/414283-frozen-dictionaries/)
364
"""
365
# pylint: disable=invalid-name
366
def _blocked_attribute(self):
367
# pylint: disable=no-self-use
368
raise AttributeError('A frozendict cannot be modified.')
369
_blocked_attribute = property(_blocked_attribute)
370
__delitem__ = __setitem__ = clear = _blocked_attribute
371
pop = popitem = setdefault = update = _blocked_attribute
372
def __new__(cls, *args, **kw):
373
# pylint: disable=unused-argument
374
# pylint: disable=too-many-nested-blocks
375
new = dict.__new__(cls)
376
args_ = []
377
for arg in args:
378
if isinstance(arg, dict):
379
arg = copy.copy(arg)
380
for k, v in arg.items():
381
if isinstance(v, dict):
382
arg[k] = frozendict(v)
383
elif isinstance(v, list):
384
v_ = list()
385
for elm in v:
386
if isinstance(elm, dict):
387
v_.append(frozendict(elm))
388
else:
389
v_.append(elm)
390
arg[k] = tuple(v_)
391
args_.append(arg)
392
else:
393
args_.append(arg)
394
dict.__init__(new, *args_, **kw)
395
return new
396
def __init__(self, *args, **kw):
397
# pylint: disable=unused-argument
398
# pylint: disable=super-init-not-called
399
self.__cached_hash = None
400
def __hash__(self):
401
if self.__cached_hash is None:
402
self.__cached_hash = hash(frozenset(self.items()))
403
return self.__cached_hash
404
def __repr__(self):
405
return "frozendict(%s)" % dict.__repr__(self)
406
407
# core functionality
408
409
def binary_to_term(data):
410
"""
411
Decode Erlang terms within binary data into Python types
412
"""
413
if not isinstance(data, bytes):
414
raise ParseException('not bytes input')
415
size = len(data)
416
if size <= 1:
417
raise ParseException('null input')
418
if b_ord(data[0]) != _TAG_VERSION:
419
raise ParseException('invalid version')
420
try:
421
i, term = _binary_to_term(1, data)
422
return (i, term)
423
except struct.error:
424
raise ParseException('missing data')
425
except IndexError:
426
raise ParseException('missing data')
427
428
def term_to_binary(term, compressed=False):
429
"""
430
Encode Python types into Erlang terms in binary data
431
"""
432
data_uncompressed = _term_to_binary(term)
433
if compressed is False:
434
return b_chr(_TAG_VERSION) + data_uncompressed
435
else:
436
if compressed is True:
437
compressed = 6
438
if compressed < 0 or compressed > 9:
439
raise InputException('compressed in [0..9]')
440
data_compressed = zlib.compress(data_uncompressed, compressed)
441
size_uncompressed = len(data_uncompressed)
442
if size_uncompressed > 4294967295:
443
raise OutputException('uint32 overflow')
444
return (
445
b_chr(_TAG_VERSION) + b_chr(_TAG_COMPRESSED_ZLIB) +
446
struct.pack(b'>I', size_uncompressed) + data_compressed
447
)
448
449
# binary_to_term implementation functions
450
451
def _binary_to_term(i, data):
452
# pylint: disable=too-many-locals
453
# pylint: disable=too-many-return-statements
454
# pylint: disable=too-many-branches
455
# pylint: disable=too-many-statements
456
tag = b_ord(data[i])
457
i += 1
458
if tag == _TAG_NEW_FLOAT_EXT:
459
return (i + 8, struct.unpack(b'>d', data[i:i + 8])[0])
460
elif tag == _TAG_BIT_BINARY_EXT:
461
j = struct.unpack(b'>I', data[i:i + 4])[0]
462
i += 4
463
bits = b_ord(data[i])
464
i += 1
465
return (i + j, OtpErlangBinary(data[i:i + j], bits))
466
elif tag == _TAG_ATOM_CACHE_REF:
467
return (i + 1, OtpErlangAtom(b_ord(data[i:i + 1])))
468
elif tag == _TAG_SMALL_INTEGER_EXT:
469
return (i + 1, b_ord(data[i]))
470
elif tag == _TAG_INTEGER_EXT:
471
return (i + 4, struct.unpack(b'>i', data[i:i + 4])[0])
472
elif tag == _TAG_FLOAT_EXT:
473
value = float(data[i:i + 31].partition(b_chr(0))[0])
474
return (i + 31, value)
475
elif tag == _TAG_ATOM_EXT:
476
j = struct.unpack(b'>H', data[i:i + 2])[0]
477
i += 2
478
return (i + j, OtpErlangAtom(data[i:i + j]))
479
elif tag == _TAG_REFERENCE_EXT or tag == _TAG_PORT_EXT:
480
i, node = _binary_to_atom(i, data)
481
id_value = data[i:i + 4]
482
i += 4
483
creation = data[i:i + 1]
484
i += 1
485
if tag == _TAG_REFERENCE_EXT:
486
return (i, OtpErlangReference(node, id_value, creation))
487
# tag == _TAG_PORT_EXT
488
return (i, OtpErlangPort(node, id_value, creation))
489
elif tag == _TAG_PID_EXT:
490
i, node = _binary_to_atom(i, data)
491
id_value = data[i:i + 4]
492
i += 4
493
serial = data[i:i + 4]
494
i += 4
495
creation = data[i:i + 1]
496
i += 1
497
return (i, OtpErlangPid(node, id_value, serial, creation))
498
elif tag == _TAG_SMALL_TUPLE_EXT or tag == _TAG_LARGE_TUPLE_EXT:
499
if tag == _TAG_SMALL_TUPLE_EXT:
500
length = b_ord(data[i])
501
i += 1
502
elif tag == _TAG_LARGE_TUPLE_EXT:
503
length = struct.unpack(b'>I', data[i:i + 4])[0]
504
i += 4
505
i, tuple_value = _binary_to_term_sequence(i, length, data)
506
return (i, tuple(tuple_value))
507
elif tag == _TAG_NIL_EXT:
508
return (i, [])
509
elif tag == _TAG_STRING_EXT:
510
j = struct.unpack(b'>H', data[i:i + 2])[0]
511
i += 2
512
return (i + j, data[i:i + j])
513
elif tag == _TAG_LIST_EXT:
514
length = struct.unpack(b'>I', data[i:i + 4])[0]
515
i += 4
516
i, list_value = _binary_to_term_sequence(i, length, data)
517
i, tail = _binary_to_term(i, data)
518
if not isinstance(tail, list) or tail != []:
519
list_value.append(tail)
520
list_value = OtpErlangList(list_value, improper=True)
521
return (i, list_value)
522
elif tag == _TAG_BINARY_EXT:
523
j = struct.unpack(b'>I', data[i:i + 4])[0]
524
i += 4
525
return (i + j, OtpErlangBinary(data[i:i + j], 8))
526
elif tag == _TAG_SMALL_BIG_EXT or tag == _TAG_LARGE_BIG_EXT:
527
if tag == _TAG_SMALL_BIG_EXT:
528
j = b_ord(data[i])
529
i += 1
530
elif tag == _TAG_LARGE_BIG_EXT:
531
j = struct.unpack(b'>I', data[i:i + 4])[0]
532
i += 4
533
sign = b_ord(data[i])
534
bignum = 0
535
for bignum_index in range(j):
536
digit = b_ord(data[i + j - bignum_index])
537
bignum = bignum * 256 + int(digit)
538
if sign == 1:
539
bignum *= -1
540
i += 1
541
return (i + j, bignum)
542
elif tag == _TAG_NEW_FUN_EXT:
543
length = struct.unpack(b'>I', data[i:i + 4])[0]
544
return (i + length, OtpErlangFunction(tag, data[i:i + length]))
545
elif tag == _TAG_EXPORT_EXT:
546
old_i = i
547
i, _ = _binary_to_atom(i, data)
548
i, _ = _binary_to_atom(i, data)
549
if b_ord(data[i]) != _TAG_SMALL_INTEGER_EXT:
550
raise ParseException('invalid small integer tag')
551
i += 1
552
_ = b_ord(data[i])
553
i += 1
554
return (i, OtpErlangFunction(tag, data[old_i:i]))
555
elif tag == _TAG_NEW_REFERENCE_EXT:
556
j = struct.unpack(b'>H', data[i:i + 2])[0] * 4
557
i += 2
558
i, node = _binary_to_atom(i, data)
559
creation = data[i:i + 1]
560
i += 1
561
return (i + j, OtpErlangReference(node, data[i: i + j], creation))
562
elif tag == _TAG_SMALL_ATOM_EXT:
563
j = b_ord(data[i])
564
i += 1
565
atom_name = data[i:i + j]
566
i = i + j
567
if atom_name == b'true':
568
return (i, True)
569
elif atom_name == b'false':
570
return (i, False)
571
return (i, OtpErlangAtom(atom_name))
572
elif tag == _TAG_MAP_EXT:
573
length = struct.unpack(b'>I', data[i:i + 4])[0]
574
i += 4
575
pairs = {}
576
for _ in range(length):
577
i, key = _binary_to_term(i, data)
578
i, value = _binary_to_term(i, data)
579
if isinstance(key, dict):
580
pairs[frozendict(key)] = value
581
elif isinstance(key, list):
582
pairs[OtpErlangList(key)] = value
583
else:
584
pairs[key] = value
585
return (i, pairs)
586
elif tag == _TAG_FUN_EXT:
587
old_i = i
588
numfree = struct.unpack(b'>I', data[i:i + 4])[0]
589
i += 4
590
i, _ = _binary_to_pid(i, data)
591
i, _ = _binary_to_atom(i, data)
592
i, _ = _binary_to_integer(i, data)
593
i, _ = _binary_to_integer(i, data)
594
i, _ = _binary_to_term_sequence(i, numfree, data)
595
return (i, OtpErlangFunction(tag, data[old_i:i]))
596
elif tag == _TAG_ATOM_UTF8_EXT:
597
j = struct.unpack(b'>H', data[i:i + 2])[0]
598
i += 2
599
atom_name = TypeUnicode(
600
data[i:i + j], encoding='utf-8', errors='strict'
601
)
602
return (i + j, OtpErlangAtom(atom_name))
603
elif tag == _TAG_SMALL_ATOM_UTF8_EXT:
604
j = b_ord(data[i:i + 1])
605
i += 1
606
atom_name = TypeUnicode(
607
data[i:i + j], encoding='utf-8', errors='strict'
608
)
609
return (i + j, OtpErlangAtom(atom_name))
610
elif tag == _TAG_COMPRESSED_ZLIB:
611
size_uncompressed = struct.unpack(b'>I', data[i:i + 4])[0]
612
if size_uncompressed == 0:
613
raise ParseException('compressed data null')
614
i += 4
615
data_compressed = data[i:]
616
j = len(data_compressed)
617
data_uncompressed = zlib.decompress(data_compressed)
618
if size_uncompressed != len(data_uncompressed):
619
raise ParseException('compression corrupt')
620
(i_new, term) = _binary_to_term(0, data_uncompressed)
621
if i_new != size_uncompressed:
622
raise ParseException('unparsed data')
623
return (i + j, term)
624
else:
625
raise ParseException('invalid tag')
626
627
def _binary_to_term_sequence(i, length, data):
628
sequence = []
629
for _ in range(length):
630
i, element = _binary_to_term(i, data)
631
sequence.append(element)
632
return (i, sequence)
633
634
# (binary_to_term Erlang term primitive type functions)
635
636
def _binary_to_integer(i, data):
637
tag = b_ord(data[i])
638
i += 1
639
if tag == _TAG_SMALL_INTEGER_EXT:
640
return (i + 1, b_ord(data[i]))
641
elif tag == _TAG_INTEGER_EXT:
642
return (i + 4, struct.unpack(b'>i', data[i:i + 4])[0])
643
else:
644
raise ParseException('invalid integer tag')
645
646
def _binary_to_pid(i, data):
647
tag = b_ord(data[i])
648
i += 1
649
if tag == _TAG_PID_EXT:
650
i, node = _binary_to_atom(i, data)
651
id_value = data[i:i + 4]
652
i += 4
653
serial = data[i:i + 4]
654
i += 4
655
creation = data[i:i + 1]
656
i += 1
657
return (i, OtpErlangPid(node, id_value, serial, creation))
658
else:
659
raise ParseException('invalid pid tag')
660
661
def _binary_to_atom(i, data):
662
tag = b_ord(data[i])
663
i += 1
664
if tag == _TAG_ATOM_EXT:
665
j = struct.unpack(b'>H', data[i:i + 2])[0]
666
i += 2
667
return (i + j, OtpErlangAtom(data[i:i + j]))
668
elif tag == _TAG_ATOM_CACHE_REF:
669
return (i + 1, OtpErlangAtom(b_ord(data[i:i + 1])))
670
elif tag == _TAG_SMALL_ATOM_EXT:
671
j = b_ord(data[i:i + 1])
672
i += 1
673
return (i + j, OtpErlangAtom(data[i:i + j]))
674
elif tag == _TAG_ATOM_UTF8_EXT:
675
j = struct.unpack(b'>H', data[i:i + 2])[0]
676
i += 2
677
atom_name = TypeUnicode(
678
data[i:i + j], encoding='utf-8', errors='strict'
679
)
680
return (i + j, OtpErlangAtom(atom_name))
681
elif tag == _TAG_SMALL_ATOM_UTF8_EXT:
682
j = b_ord(data[i:i + 1])
683
i += 1
684
atom_name = TypeUnicode(
685
data[i:i + j], encoding='utf-8', errors='strict'
686
)
687
return (i + j, OtpErlangAtom(atom_name))
688
else:
689
raise ParseException('invalid atom tag')
690
691
# term_to_binary implementation functions
692
693
def _term_to_binary(term):
694
# pylint: disable=too-many-return-statements
695
# pylint: disable=too-many-branches
696
if isinstance(term, bytes):
697
return _string_to_binary(term)
698
elif isinstance(term, TypeUnicode):
699
return _string_to_binary(
700
term.encode(encoding='utf-8', errors='strict')
701
)
702
elif isinstance(term, list):
703
return OtpErlangList(term).binary()
704
elif isinstance(term, tuple):
705
return _tuple_to_binary(term)
706
elif isinstance(term, bool):
707
return OtpErlangAtom(term and b'true' or b'false').binary()
708
elif isinstance(term, (int, TypeLong)):
709
return _long_to_binary(term)
710
elif isinstance(term, float):
711
return _float_to_binary(term)
712
elif isinstance(term, dict):
713
return _dict_to_binary(term)
714
elif term is None:
715
return OtpErlangAtom(b'undefined').binary()
716
elif isinstance(term, OtpErlangAtom):
717
return term.binary()
718
elif isinstance(term, OtpErlangList):
719
return term.binary()
720
elif isinstance(term, OtpErlangBinary):
721
return term.binary()
722
elif isinstance(term, OtpErlangFunction):
723
return term.binary()
724
elif isinstance(term, OtpErlangReference):
725
return term.binary()
726
elif isinstance(term, OtpErlangPort):
727
return term.binary()
728
elif isinstance(term, OtpErlangPid):
729
return term.binary()
730
else:
731
raise OutputException('unknown python type')
732
733
# (term_to_binary Erlang term composite type functions)
734
735
def _string_to_binary(term):
736
length = len(term)
737
if length == 0:
738
return b_chr(_TAG_NIL_EXT)
739
elif length <= 65535:
740
return b_chr(_TAG_STRING_EXT) + struct.pack(b'>H', length) + term
741
elif length <= 4294967295:
742
return (
743
b_chr(_TAG_LIST_EXT) + struct.pack(b'>I', length) +
744
b''.join([b_chr(_TAG_SMALL_INTEGER_EXT) + b_chr(b_ord(c))
745
for c in term]) +
746
b_chr(_TAG_NIL_EXT)
747
)
748
else:
749
raise OutputException('uint32 overflow')
750
751
def _tuple_to_binary(term):
752
length = len(term)
753
if length <= 255:
754
return (
755
b_chr(_TAG_SMALL_TUPLE_EXT) + b_chr(length) +
756
b''.join([_term_to_binary(element) for element in term])
757
)
758
elif length <= 4294967295:
759
return (
760
b_chr(_TAG_LARGE_TUPLE_EXT) + struct.pack(b'>I', length) +
761
b''.join([_term_to_binary(element) for element in term])
762
)
763
else:
764
raise OutputException('uint32 overflow')
765
766
def _dict_to_binary(term):
767
length = len(term)
768
if length <= 4294967295:
769
return (
770
b_chr(_TAG_MAP_EXT) + struct.pack(b'>I', length) +
771
b''.join([_term_to_binary(key) + _term_to_binary(value)
772
for key, value in term.items()])
773
)
774
else:
775
raise OutputException('uint32 overflow')
776
777
# (term_to_binary Erlang term primitive type functions)
778
779
def _integer_to_binary(term):
780
if 0 <= term <= 255:
781
return b_chr(_TAG_SMALL_INTEGER_EXT) + b_chr(term)
782
return b_chr(_TAG_INTEGER_EXT) + struct.pack(b'>i', term)
783
784
def _long_to_binary(term):
785
if -2147483648 <= term <= 2147483647:
786
return _integer_to_binary(term)
787
return _bignum_to_binary(term)
788
789
def _bignum_to_binary(term):
790
bignum = abs(term)
791
if term < 0:
792
sign = b_chr(1)
793
else:
794
sign = b_chr(0)
795
value = []
796
while bignum > 0:
797
value.append(b_chr(bignum & 255))
798
bignum >>= 8
799
length = len(value)
800
if length <= 255:
801
return (
802
b_chr(_TAG_SMALL_BIG_EXT) +
803
b_chr(length) + sign + b''.join(value)
804
)
805
elif length <= 4294967295:
806
return (
807
b_chr(_TAG_LARGE_BIG_EXT) +
808
struct.pack(b'>I', length) + sign + b''.join(value)
809
)
810
else:
811
raise OutputException('uint32 overflow')
812
813
def _float_to_binary(term):
814
return b_chr(_TAG_NEW_FLOAT_EXT) + struct.pack(b'>d', term)
815
816
# Exception classes listed alphabetically
817
818
class InputException(ValueError):
819
"""
820
InputError describes problems with function input parameters
821
"""
822
def __init__(self, s):
823
ValueError.__init__(self)
824
self.__s = str(s)
825
def __str__(self):
826
return self.__s
827
828
class OutputException(TypeError):
829
"""
830
OutputError describes problems with creating function output data
831
"""
832
def __init__(self, s):
833
TypeError.__init__(self)
834
self.__s = str(s)
835
def __str__(self):
836
return self.__s
837
838
class ParseException(SyntaxError):
839
"""
840
ParseError provides specific parsing failure information
841
"""
842
def __init__(self, s):
843
SyntaxError.__init__(self)
844
self.__s = str(s)
845
def __str__(self):
846
return self.__s
847
848
def consult(string_in):
849
"""
850
provide file:consult/1 functionality with python types
851
"""
852
# pylint: disable=eval-used
853
# pylint: disable=too-many-branches
854
# pylint: disable=too-many-statements
855
856
# manually parse textual erlang data to avoid external dependencies
857
list_out = []
858
tuple_binary = False # binaries become tuples of integers
859
quoted_string = False # strings become python string
860
atom_string = False # atoms become python string
861
number = False
862
whitespace = frozenset(('\n', '\t', ' '))
863
i = 0
864
while i < len(string_in):
865
character = string_in[i]
866
if character == ',':
867
if atom_string:
868
list_out.append('"')
869
atom_string = False
870
list_out.append(',')
871
number = string_in[i + 1].isdigit()
872
elif character == '{':
873
list_out.append('(')
874
number = string_in[i + 1].isdigit()
875
elif character == '}':
876
if atom_string:
877
list_out.append('"')
878
atom_string = False
879
list_out.append(')')
880
number = False
881
elif character == '[':
882
list_out.append('[')
883
number = string_in[i + 1].isdigit()
884
elif character == ']':
885
if atom_string:
886
list_out.append('"')
887
atom_string = False
888
list_out.append(']')
889
number = False
890
elif character == '<' and string_in[i + 1] == '<':
891
list_out.append('(')
892
tuple_binary = True
893
i += 1
894
elif character == '>' and string_in[i + 1] == '>':
895
list_out.append(')')
896
tuple_binary = False
897
i += 1
898
elif not quoted_string and not atom_string and character in whitespace:
899
number = string_in[i + 1].isdigit()
900
elif tuple_binary or number:
901
list_out.append(character)
902
elif character == '"':
903
if quoted_string:
904
quoted_string = False
905
else:
906
quoted_string = True
907
list_out.append('"')
908
elif character == "'":
909
if atom_string:
910
atom_string = False
911
else:
912
atom_string = True
913
list_out.append('"')
914
elif not quoted_string and not atom_string:
915
atom_string = True
916
list_out.append('"')
917
list_out.append(character)
918
else:
919
list_out.append(character)
920
i += 1
921
return eval(''.join(list_out))
922
923