Path: blob/main/contrib/lib9p/pytest/sequencer.py
103835 views
#! /usr/bin/env python12from __future__ import print_function34#__all__ = ['EncDec', 'EncDecSimple', 'EncDecTyped', 'EncDecA',5# 'SequenceError', 'Sequencer']67import abc8import struct9import sys1011_ProtoStruct = {12'1': struct.Struct('<B'),13'2': struct.Struct('<H'),14'4': struct.Struct('<I'),15'8': struct.Struct('<Q'),16'_string_': None, # handled specially17}18for _i in (1, 2, 4, 8):19_ProtoStruct[_i] = _ProtoStruct[str(_i)]20del _i2122class EncDec(object):23__metaclass__ = abc.ABCMeta24"""25Base class for en/de-coders, which are put into sequencers.2627All have a name and arbitrary user-supplied auxiliary data28(default=None).2930All provide a pack() and unpack(). The pack() function31returns a "bytes" value. This is internally implemented as a32function apack() that returns a list of struct.pack() bytes,33and pack() just joins them up as needed.3435The pack/unpack functions take a dictionary of variable names36and values, and a second dictionary for conditionals, but at37this level conditionals don't apply: they are just being38passed through. Variable names do apply to array encoders3940EncDec also provide b2s() and s2b() static methods, which41convert strings to bytes and vice versa, as reversibly as42possible (using surrogateescape encoding). In Python2 this is43a no-op since the string type *is* the bytes type (<type44'unicode'>) is the unicode-ized string type).4546EncDec also provides b2u() and u2b() to do conversion to/from47Unicode.4849These are partly for internal use (all strings get converted50to UTF-8 byte sequences when coding a _string_ type) and partly51for doctests, where we just want some py2k/py3k compat hacks.52"""53def __init__(self, name, aux):54self.name = name55self.aux = aux5657@staticmethod58def b2u(byte_sequence):59"transform bytes to unicode"60return byte_sequence.decode('utf-8', 'surrogateescape')6162@staticmethod63def u2b(unicode_sequence):64"transform unicode to bytes"65return unicode_sequence.encode('utf-8', 'surrogateescape')6667if sys.version_info[0] >= 3:68b2s = b2u69@staticmethod70def s2b(string):71"transform string to bytes (leaves raw byte sequence unchanged)"72if isinstance(string, bytes):73return string74return string.encode('utf-8', 'surrogateescape')75else:76@staticmethod77def b2s(byte_sequence):78"transform bytes to string - no-op in python2.7"79return byte_sequence80@staticmethod81def s2b(string):82"transform string or unicode to bytes"83if isinstance(string, unicode):84return string.encode('utf-8', 'surrogateescape')85return string8687def pack(self, vdict, cdict, val):88"encode value <val> into a byte-string"89return b''.join(self.apack(vdict, cdict, val))9091@abc.abstractmethod92def apack(self, vdict, cdict, val):93"encode value <val> into [bytes1, b2, ..., bN]"9495@abc.abstractmethod96def unpack(self, vdict, cdict, bstring, offset, noerror=False):97"unpack bytes from <bstring> at <offset>"9899100class EncDecSimple(EncDec):101r"""102Encode/decode a simple (but named) field. The field is not an103array, which requires using EncDecA, nor a typed object104like a qid or stat instance -- those require a Sequence and105EncDecTyped.106107The format is one of '1'/1, '2'/2, '4'/4, '8'/8, or '_string_'.108109Note: using b2s here is purely a doctest/tetsmod python2/python3110compat hack. The output of e.pack is <type 'bytes'>; b2s111converts it to a string, purely for display purposes. (It might112be better to map py2 output to bytes but they just print as a113string anyway.) In normal use, you should not call b2s here.114115>>> e = EncDecSimple('eggs', 2)116>>> e.b2s(e.pack({}, {}, 0))117'\x00\x00'118>>> e.b2s(e.pack({}, {}, 256))119'\x00\x01'120121Values that cannot be packed produce a SequenceError:122123>>> e.pack({}, {}, None)124Traceback (most recent call last):125...126SequenceError: failed while packing 'eggs'=None127>>> e.pack({}, {}, -1)128Traceback (most recent call last):129...130SequenceError: failed while packing 'eggs'=-1131132Unpacking both returns a value, and tells how many bytes it133used out of the bytestring or byte-array argument. If there134are not enough bytes remaining at the starting offset, it135raises a SequenceError, unless noerror=True (then unset136values are None)137138>>> e.unpack({}, {}, b'\x00\x01', 0)139(256, 2)140>>> e.unpack({}, {}, b'', 0)141Traceback (most recent call last):142...143SequenceError: out of data while unpacking 'eggs'144>>> e.unpack({}, {}, b'', 0, noerror=True)145(None, 2)146147Note that strings can be provided as regular strings, byte148strings (same as regular strings in py2k), or Unicode strings149(same as regular strings in py3k). Unicode strings will be150converted to UTF-8 before being packed. Since this leaves1517-bit characters alone, these examples work in both py2k and152py3k. (Note: the UTF-8 encoding of u'\u1234' is153'\0xe1\0x88\0xb4' or 225, 136, 180. The b2i trick below is154another py2k vs py3k special case just for doctests: py2k155tries to display the utf-8 encoded data as a string.)156157>>> e = EncDecSimple('spam', '_string_')158>>> e.b2s(e.pack({}, {}, 'p3=unicode,p2=bytes'))159'\x13\x00p3=unicode,p2=bytes'160161>>> e.b2s(e.pack({}, {}, b'bytes'))162'\x05\x00bytes'163164>>> import sys165>>> ispy3k = sys.version_info[0] >= 3166167>>> b2i = lambda x: x if ispy3k else ord(x)168>>> [b2i(x) for x in e.pack({}, {}, u'\u1234')]169[3, 0, 225, 136, 180]170171The byte length of the utf-8 data cannot exceed 65535 since172the encoding has the length as a 2-byte field (a la the173encoding for 'eggs' here). A too-long string produces174a SequenceError as well.175176>>> e.pack({}, {}, 16384 * 'spam')177Traceback (most recent call last):178...179SequenceError: string too long (len=65536) while packing 'spam'180181Unpacking strings produces byte arrays. (Of course,182in py2k these are also known as <type 'str'>.)183184>>> unpacked = e.unpack({}, {}, b'\x04\x00data', 0)185>>> etype = bytes if ispy3k else str186>>> print(isinstance(unpacked[0], etype))187True188>>> e.b2s(unpacked[0])189'data'190>>> unpacked[1]1916192193You may use e.b2s() to conver them to unicode strings in py3k,194or you may set e.autob2s. This still only really does195anything in py3k, since py2k strings *are* bytes, so it's196really just intended for doctest purposes (see EncDecA):197198>>> e.autob2s = True199>>> e.unpack({}, {}, b'\x07\x00stringy', 0)200('stringy', 9)201"""202def __init__(self, name, fmt, aux=None):203super(EncDecSimple, self).__init__(name, aux)204self.fmt = fmt205self.struct = _ProtoStruct[fmt]206self.autob2s = False207208def __repr__(self):209if self.aux is None:210return '{0}({1!r}, {2!r})'.format(self.__class__.__name__,211self.name, self.fmt)212return '{0}({1!r}, {2!r}, {3!r})'.format(self.__class__.__name__,213self.name, self.fmt, self.aux)214215__str__ = __repr__216217def apack(self, vdict, cdict, val):218"encode a value"219try:220if self.struct:221return [self.struct.pack(val)]222sval = self.s2b(val)223if len(sval) > 65535:224raise SequenceError('string too long (len={0:d}) '225'while packing {1!r}'.format(len(sval), self.name))226return [EncDecSimple.string_len.pack(len(sval)), sval]227# Include AttributeError in case someone tries to, e.g.,228# pack name=None and self.s2b() tries to use .encode on it.229except (struct.error, AttributeError):230raise SequenceError('failed '231'while packing {0!r}={1!r}'.format(self.name, val))232233def _unpack1(self, via, bstring, offset, noerror):234"internal function to unpack single item"235try:236tup = via.unpack_from(bstring, offset)237except struct.error as err:238if 'unpack_from requires a buffer of at least' in str(err):239if noerror:240return None, offset + via.size241raise SequenceError('out of data '242'while unpacking {0!r}'.format(self.name))243# not clear what to do here if noerror244raise SequenceError('failed '245'while unpacking {0!r}'.format(self.name))246assert len(tup) == 1247return tup[0], offset + via.size248249def unpack(self, vdict, cdict, bstring, offset, noerror=False):250"decode a value; return the value and the new offset"251if self.struct:252return self._unpack1(self.struct, bstring, offset, noerror)253slen, offset = self._unpack1(EncDecSimple.string_len, bstring, offset,254noerror)255if slen is None:256return None, offset257nexto = offset + slen258if len(bstring) < nexto:259if noerror:260val = None261else:262raise SequenceError('out of data '263'while unpacking {0!r}'.format(self.name))264else:265val = bstring[offset:nexto]266if self.autob2s:267val = self.b2s(val)268return val, nexto269270# string length: 2 byte unsigned field271EncDecSimple.string_len = _ProtoStruct[2]272273class EncDecTyped(EncDec):274r"""275EncDec for typed objects (which are build from PFODs, which are276a sneaky class variant of OrderedDict similar to namedtuple).277278Calling the klass() function with no arguments must create an279instance with all-None members.280281We also require a Sequencer to pack and unpack the members of282the underlying pfod.283284>>> qid_s = Sequencer('qid')285>>> qid_s.append_encdec(None, EncDecSimple('type', 1))286>>> qid_s.append_encdec(None, EncDecSimple('version', 4))287>>> qid_s.append_encdec(None, EncDecSimple('path', 8))288>>> len(qid_s)2893290291>>> from pfod import pfod292>>> qid = pfod('qid', ['type', 'version', 'path'])293>>> len(qid._fields)2943295>>> qid_inst = qid(1, 2, 3)296>>> qid_inst297qid(type=1, version=2, path=3)298299>>> e = EncDecTyped(qid, 'aqid', qid_s)300>>> e.b2s(e.pack({}, {}, qid_inst))301'\x01\x02\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00'302>>> e.unpack({}, {},303... b'\x01\x02\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00', 0)304(qid(type=1, version=2, path=3), 13)305306If an EncDecTyped instance has a conditional sequencer, note307that unpacking will leave un-selected items set to None (see308the Sequencer example below):309310>>> breakfast = pfod('breakfast', 'eggs spam ham')311>>> breakfast()312breakfast(eggs=None, spam=None, ham=None)313>>> bfseq = Sequencer('breakfast')314>>> bfseq.append_encdec(None, EncDecSimple('eggs', 1))315>>> bfseq.append_encdec('yuck', EncDecSimple('spam', 1))316>>> bfseq.append_encdec(None, EncDecSimple('ham', 1))317>>> e = EncDecTyped(breakfast, 'bfname', bfseq)318>>> e.unpack({}, {'yuck': False}, b'\x02\x01\x04', 0)319(breakfast(eggs=2, spam=None, ham=1), 2)320321This used just two of the three bytes: eggs=2, ham=1.322323>>> e.unpack({}, {'yuck': True}, b'\x02\x01\x04', 0)324(breakfast(eggs=2, spam=1, ham=4), 3)325326This used the third byte, so ham=4.327"""328def __init__(self, klass, name, sequence, aux=None):329assert len(sequence) == len(klass()._fields) # temporary330super(EncDecTyped, self).__init__(name, aux)331self.klass = klass332self.name = name333self.sequence = sequence334335def __repr__(self):336if self.aux is None:337return '{0}({1!r}, {2!r}, {3!r})'.format(self.__class__.__name__,338self.klass, self.name, self.sequence)339return '{0}({1!r}, {2!r}, {3!r}, {4!r})'.format(self.__class__.__name__,340self.klass, self.name, self.sequence, self.aux)341342__str__ = __repr__343344def apack(self, vdict, cdict, val):345"""346Pack each of our instance variables.347348Note that some packing may be conditional.349"""350return self.sequence.apack(val, cdict)351352def unpack(self, vdict, cdict, bstring, offset, noerror=False):353"""354Unpack each instance variable, into a new object of355self.klass. Return the new instance and new offset.356357Note that some unpacking may be conditional.358"""359obj = self.klass()360offset = self.sequence.unpack_from(obj, cdict, bstring, offset, noerror)361return obj, offset362363class EncDecA(EncDec):364r"""365EncDec for arrays (repeated objects).366367We take the name of repeat count variable, and a sub-coder368(Sequencer instance). For instance, we can en/de-code369repeat='nwname' copies of name='wname', or nwname of370name='wqid', in a Twalk en/de-code.371372Note that we don't pack or unpack the repeat count itself --373that must be done by higher level code. We just get its value374from vdict.375376>>> subcode = EncDecSimple('wname', '_string_')377>>> e = EncDecA('nwname', 'wname', subcode)378>>> e.b2s(e.pack({'nwname': 2}, {}, ['A', 'BC']))379'\x01\x00A\x02\x00BC'380381>>> subcode.autob2s = True # so that A and BC decode to py3k str382>>> e.unpack({'nwname': 2}, {}, b'\x01\x00A\x02\x00BC', 0)383(['A', 'BC'], 7)384385When using noerror, the first sub-item that fails to decode386completely starts the None-s. Strings whose length fails to387decode are assumed to be zero bytes long as well, for the388purpose of showing the expected packet length:389390>>> e.unpack({'nwname': 2}, {}, b'\x01\x00A\x02\x00', 0, noerror=True)391(['A', None], 7)392>>> e.unpack({'nwname': 2}, {}, b'\x01\x00A\x02', 0, noerror=True)393(['A', None], 5)394>>> e.unpack({'nwname': 3}, {}, b'\x01\x00A\x02', 0, noerror=True)395(['A', None, None], 7)396397As a special case, supplying None for the sub-coder398makes the repeated item pack or unpack a simple byte399string. (Note that autob2s is not supported here.)400A too-short byte string is simply truncated!401402>>> e = EncDecA('count', 'data', None)403>>> e.b2s(e.pack({'count': 5}, {}, b'12345'))404'12345'405>>> x = list(e.unpack({'count': 3}, {}, b'123', 0))406>>> x[0] = e.b2s(x[0])407>>> x408['123', 3]409>>> x = list(e.unpack({'count': 3}, {}, b'12', 0, noerror=True))410>>> x[0] = e.b2s(x[0])411>>> x412['12', 3]413"""414def __init__(self, repeat, name, sub, aux=None):415super(EncDecA, self).__init__(name, aux)416self.repeat = repeat417self.name = name418self.sub = sub419420def __repr__(self):421if self.aux is None:422return '{0}({1!r}, {2!r}, {3!r})'.format(self.__class__.__name__,423self.repeat, self.name, self.sub)424return '{0}({1!r}, {2!r}, {3!r}, {4!r})'.format(self.__class__.__name__,425self.repeat, self.name, self.sub, self.aux)426427__str__ = __repr__428429def apack(self, vdict, cdict, val):430"pack each val[i], for i in range(vdict[self.repeat])"431num = vdict[self.repeat]432assert num == len(val)433if self.sub is None:434assert isinstance(val, bytes)435return [val]436parts = []437for i in val:438parts.extend(self.sub.apack(vdict, cdict, i))439return parts440441def unpack(self, vdict, cdict, bstring, offset, noerror=False):442"unpack repeatedly, per self.repeat, into new array."443num = vdict[self.repeat]444if num is None and noerror:445num = 0446else:447assert num >= 0448if self.sub is None:449nexto = offset + num450if len(bstring) < nexto and not noerror:451raise SequenceError('out of data '452'while unpacking {0!r}'.format(self.name))453return bstring[offset:nexto], nexto454array = []455for i in range(num):456obj, offset = self.sub.unpack(vdict, cdict, bstring, offset,457noerror)458array.append(obj)459return array, offset460461class SequenceError(Exception):462"sequence error: item too big, or ran out of data"463pass464465class Sequencer(object):466r"""467A sequencer is an object that packs (marshals) or unpacks468(unmarshals) a series of objects, according to their EncDec469instances.470471The objects themselves (and their values) come from, or472go into, a dictionary: <vdict>, the first argument to473pack/unpack.474475Some fields may be conditional. The conditions are in a476separate dictionary (the second or <cdict> argument).477478Some objects may be dictionaries or PFODs, e.g., they may479be a Plan9 qid or stat structure. These have their own480sub-encoding.481482As with each encoder, we have both an apack() function483(returns a list of parts) and a plain pack(). Users should484mostly stick with plain pack().485486>>> s = Sequencer('monty')487>>> s488Sequencer('monty')489>>> e = EncDecSimple('eggs', 2)490>>> s.append_encdec(None, e)491>>> s.append_encdec(None, EncDecSimple('spam', 1))492>>> s[0]493(None, EncDecSimple('eggs', 2))494>>> e.b2s(s.pack({'eggs': 513, 'spam': 65}, {}))495'\x01\x02A'496497When particular fields are conditional, they appear in498packed output, or are taken from the byte-string during499unpacking, only if their condition is true.500501As with struct, use unpack_from to start at an arbitrary502offset and/or omit verification that the entire byte-string503is consumed.504505>>> s = Sequencer('python')506>>> s.append_encdec(None, e)507>>> s.append_encdec('.u', EncDecSimple('spam', 1))508>>> s[1]509('.u', EncDecSimple('spam', 1))510>>> e.b2s(s.pack({'eggs': 513, 'spam': 65}, {'.u': True}))511'\x01\x02A'512>>> e.b2s(s.pack({'eggs': 513, 'spam': 65}, {'.u': False}))513'\x01\x02'514515>>> d = {}516>>> s.unpack(d, {'.u': True}, b'\x01\x02A')517>>> print(d['eggs'], d['spam'])518513 65519>>> d = {}520>>> s.unpack(d, {'.u': False}, b'\x01\x02A', 0)521Traceback (most recent call last):522...523SequenceError: 1 byte(s) unconsumed524>>> s.unpack_from(d, {'.u': False}, b'\x01\x02A', 0)5252526>>> print(d)527{'eggs': 513}528529The incoming dictionary-like object may be pre-initialized530if you like; only sequences that decode are filled-in:531532>>> d = {'eggs': None, 'spam': None}533>>> s.unpack_from(d, {'.u': False}, b'\x01\x02A', 0)5342535>>> print(d['eggs'], d['spam'])536513 None537538Some objects may be arrays; if so their EncDec is actually539an EncDecA, the repeat count must be in the dictionary, and540the object itself must have a len() and be index-able:541542>>> s = Sequencer('arr')543>>> s.append_encdec(None, EncDecSimple('n', 1))544>>> ae = EncDecSimple('array', 2)545>>> s.append_encdec(None, EncDecA('n', 'array', ae))546>>> ae.b2s(s.pack({'n': 2, 'array': [257, 514]}, {}))547'\x02\x01\x01\x02\x02'548549Unpacking an array creates a list of the number of items.550The EncDec encoder that decodes the number of items needs to551occur first in the sequencer, so that the dictionary will have552acquired the repeat-count variable's value by the time we hit553the array's encdec:554555>>> d = {}556>>> s.unpack(d, {}, b'\x01\x04\x00')557>>> d['n'], d['array']558(1, [4])559"""560def __init__(self, name):561self.name = name562self._codes = []563self.debug = False # or sys.stderr564565def __repr__(self):566return '{0}({1!r})'.format(self.__class__.__name__, self.name)567568__str__ = __repr__569570def __len__(self):571return len(self._codes)572573def __iter__(self):574return iter(self._codes)575576def __getitem__(self, index):577return self._codes[index]578579def dprint(self, *args, **kwargs):580if not self.debug:581return582if isinstance(self.debug, bool):583dest = sys.stdout584else:585dest = self.debug586print(*args, file=dest, **kwargs)587588def append_encdec(self, cond, code):589"add EncDec en/de-coder, conditional on cond"590self._codes.append((cond, code))591592def apack(self, vdict, cdict):593"""594Produce packed representation of each field.595"""596packed_data = []597for cond, code in self._codes:598# Skip this item if it's conditional on a false thing.599if cond is not None and not cdict[cond]:600self.dprint('skip %r - %r is False' % (code, cond))601continue602603# Pack the item.604self.dprint('pack %r - no cond or %r is True' % (code, cond))605packed_data.extend(code.apack(vdict, cdict, vdict[code.name]))606607return packed_data608609def pack(self, vdict, cdict):610"""611Flatten packed data.612"""613return b''.join(self.apack(vdict, cdict))614615def unpack_from(self, vdict, cdict, bstring, offset=0, noerror=False):616"""617Unpack from byte string.618619The values are unpacked into a dictionary vdict;620some of its entries may themselves be ordered621dictionaries created by typedefed codes.622623Raises SequenceError if the string is too short,624unless you set noerror, in which case we assume625you want see what you can get out of the data.626"""627for cond, code in self._codes:628# Skip this item if it's conditional on a false thing.629if cond is not None and not cdict[cond]:630self.dprint('skip %r - %r is False' % (code, cond))631continue632633# Unpack the item.634self.dprint('unpack %r - no cond or %r is True' % (code, cond))635obj, offset = code.unpack(vdict, cdict, bstring, offset, noerror)636vdict[code.name] = obj637638return offset639640def unpack(self, vdict, cdict, bstring, noerror=False):641"""642Like unpack_from but unless noerror=True, requires that643we completely use up the given byte string.644"""645offset = self.unpack_from(vdict, cdict, bstring, 0, noerror)646if not noerror and offset != len(bstring):647raise SequenceError('{0} byte(s) unconsumed'.format(648len(bstring) - offset))649650if __name__ == '__main__':651import doctest652doctest.testmod()653654655