Path: blob/main/python/pylang/src/baselib/str.py
1398 views
# vim:fileencoding=utf-81# License: BSD2# Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>34# globals: ρσ_kwargs_symbol, ρσ_list_decorate, ρσ_iterator_symbol, HTMLElement56# Locale can’t be changed in-flight, so we just retrieve this once.7# Sadly older node versions (< 8) don’t support formatToParts8# decimal_sep = Intl.NumberFormat() \9# .formatToParts(1.1) \10# .find(def(part): return part.type == 'decimal';) \11# .value12decimal_sep = (1.1).toLocaleString()[1]1314def ρσ_repr_js_builtin(x, as_array):15ans = v'[]'16b = '{}'17if as_array:18b = '[]'19for v'var i = 0; i < x.length; i++':20ans.push(ρσ_repr(x[i]))21else:22keys = Object.keys(x)23for v'var k = 0; k < keys.length; k++':24key = keys[k]25ans.push(ρσ_repr(key) + ': ' + ρσ_repr(x[key]))26return b[0] + ans.join(', ') + b[1]2728def ρσ_html_element_to_string(elem):29attrs = v'[]'30for attr in elem.attributes:31if attr.specified:32val = attr.value33if val.length > 10:34val = val[:15] + '...'35val = JSON.stringify(val)36attrs.push(f'{attr.name}={val}')37attrs = (' ' + attrs.join(' ')) if attrs.length else ''38ans = f'<{elem.tagName}{attrs}>'39return ans4041def ρσ_repr(x):42if x is None:43return 'None'44if x is undefined:45return 'undefined'46ans = x47if v'typeof x.__repr__ === "function"':48ans = x.__repr__()49elif x is True or x is False:50ans = 'True' if x else 'False'51elif Array.isArray(x):52ans = ρσ_repr_js_builtin(x, True)53elif jstype(x) is 'string': # python uses single quotes for repr(string)54ans = "'" + x + "'"55elif jstype(x) is 'function':56ans = x.toString()57elif jstype(x) is 'object' and not x.toString:58# Assume this is a dictionary59ans = ρσ_repr_js_builtin(x)60else:61name = Object.prototype.toString.call(x).slice(8, -1)62if "Int8Array Uint8Array Uint8ClampedArray Int16Array Uint16Array Int32Array Uint32Array Float32Array Float64Array".indexOf(name) != -1:63return name + '([' + x.map(def(i): return str.format('0x{:02x}', i);).join(', ') + '])'64if jstype(HTMLElement) is not 'undefined' and v'x instanceof HTMLElement':65ans = ρσ_html_element_to_string(x)66else:67ans = x.toString() if v'typeof x.toString === "function"' else x68if ans is '[object Object]':69# Assume this is a dictionary70return ρσ_repr_js_builtin(x)71try:72ans = JSON.stringify(x)73except:74pass75return ans + '' # Ensures we return an object of type string (i.e. primitive value) rather than a String object7677def ρσ_str(x):78if x is None:79return 'None'80if x is undefined:81return 'undefined'82ans = x83if v'typeof x.__str__ === "function"':84ans = x.__str__()85elif v'typeof x.__repr__ === "function"':86ans = x.__repr__()87elif x is True or x is False:88ans = 'True' if x else 'False'89elif Array.isArray(x):90ans = ρσ_repr_js_builtin(x, True)91elif v'typeof x.toString === "function"':92name = Object.prototype.toString.call(x).slice(8, -1)93if "Int8Array Uint8Array Uint8ClampedArray Int16Array Uint16Array Int32Array Uint32Array Float32Array Float64Array".indexOf(name) != -1:94return name + '([' + x.map(def(i): return str.format('0x{:02x}', i);).join(', ') + '])'95if jstype(HTMLElement) is not 'undefined' and v'x instanceof HTMLElement':96ans = ρσ_html_element_to_string(x)97else:98ans = x.toString()99if ans is '[object Object]':100# Assume this is a dictionary101ans = ρσ_repr_js_builtin(x)102elif jstype(x) is 'object' and not x.toString:103# Assume this is a dictionary104ans = ρσ_repr_js_builtin(x)105return ans + '' # Ensures we return an object of type string (i.e. primitive value) rather than a String object106107define_str_func = def(name, func):108ρσ_str.prototype[name] = func109ρσ_str[name] = f = func.call.bind(func)110if func.__argnames__:111Object.defineProperty(f, '__argnames__', {'value':v"['string']".concat(func.__argnames__)})112113ρσ_orig_split, ρσ_orig_replace = String.prototype.split.call.bind(String.prototype.split), String.prototype.replace.call.bind(String.prototype.replace)114115# format() {{{116define_str_func('format', def ():117template = this118if template is undefined:119raise TypeError("Template is required")120args = Array.prototype.slice.call(arguments)121kwargs = {}122if args[-1] and args[-1][ρσ_kwargs_symbol] is not undefined:123kwargs = args[-1]124args = args[:-1]125126explicit = implicit = False127idx = 0128split = ρσ_orig_split129130if ρσ_str.format._template_resolve_pat is undefined:131ρσ_str.format._template_resolve_pat = /[.\[]/132133def resolve(arg, object):134if not arg:135return object136first, arg = arg[0], arg[1:]137key = split(arg, ρσ_str.format._template_resolve_pat, 1)[0]138rest = arg[key.length:]139ans = object[key[:-1]] if first is '[' else getattr(object, key)140if ans is undefined:141raise KeyError(key[:-1] if first is '[' else key)142return resolve(rest, ans)143144def resolve_format_spec(format_spec):145if ρσ_str.format._template_resolve_fs_pat is undefined:146ρσ_str.format._template_resolve_fs_pat = /[{]([a-zA-Z0-9_]+)[}]/g147return format_spec.replace(ρσ_str.format._template_resolve_fs_pat, def (match, key):148if not Object.prototype.hasOwnProperty.call(kwargs, key):149return ''150return '' + kwargs[key]151)152153def set_comma(ans, comma):154if comma is not ',':155sep = 1234156sep = sep.toLocaleString(undefined, v'{useGrouping: true}')[1]157ans = str.replace(ans, sep, comma)158return ans159160def safe_comma(value, comma):161try:162return set_comma(value.toLocaleString(undefined, v'{useGrouping: true}'), comma)163except:164return value.toString(10)165166167def safe_fixed(value, precision, comma):168if not comma:169return value.toFixed(precision)170try:171return set_comma(value.toLocaleString(undefined, v'{useGrouping: true, minimumFractionDigits: precision, maximumFractionDigits: precision}'), comma)172except:173return value.toFixed(precision)174175176def apply_formatting(value, format_spec):177if format_spec.indexOf('{') is not -1:178format_spec = resolve_format_spec(format_spec)179if ρσ_str.format._template_format_pat is undefined:180ρσ_str.format._template_format_pat = ///181([^{}](?=[<>=^]))?([<>=^])? # fill & align182([-+\x20])? # sign183(\#)? # integer base specifier184(0)? # zero-padding185(\d+)? # width186([,_])? # use a grouping (thousands) seperator187(?:\.(\d+))? # precision188([bcdeEfFgGnosxX%])? # type189///190191try:192fill, align, sign, fhash, zeropad, width, comma, precision, ftype = format_spec.match(ρσ_str.format._template_format_pat)[1:]193except TypeError:194return value195if zeropad:196fill = fill or '0'197align = align or '='198else:199fill = fill or ' '200align = align or '>'201is_numeric = v'Number(value) === value'202is_int = is_numeric and v'value % 1 === 0'203precision = parseInt(precision, 10)204lftype = (ftype or '').toLowerCase()205206if ftype is 'n':207is_numeric = True208if is_int:209if comma:210raise ValueError("Cannot specify ',' with 'n'")211value = parseInt(value, 10).toLocaleString()212else:213value = parseFloat(value).toLocaleString()214215elif v"['b', 'c', 'd', 'o', 'x']".indexOf(lftype) is not -1:216value = parseInt(value, 10)217is_numeric = True218if not isNaN(value):219if ftype is 'b':220value = v'(value >>> 0).toString(2)'221if fhash:222value = '0b' + value223elif ftype is 'c':224if value > 0xFFFF:225code = value - 0x10000226value = String.fromCharCode(0xD800+(code>>10), 0xDC00+(code&0x3FF))227else:228value = String.fromCharCode(value)229elif ftype is 'd':230if comma:231value = safe_comma(value, comma)232else:233value = value.toString(10)234elif ftype is 'o':235value = value.toString(8)236if fhash:237value = '0o' + value238elif lftype is 'x':239value = value.toString(16)240value = value.toLowerCase() if ftype is 'x' else value.toUpperCase()241if fhash:242value = '0x' + value243244elif v"['e','f','g','%']".indexOf(lftype) is not -1:245is_numeric = True246value = parseFloat(value)247prec = 6 if isNaN(precision) else precision248if lftype is 'e':249value = value.toExponential(prec)250value = value.toUpperCase() if ftype is 'E' else value.toLowerCase()251elif lftype is 'f':252value = safe_fixed(value, prec, comma)253value = value.toUpperCase() if ftype is 'F' else value.toLowerCase()254elif lftype is '%':255value *= 100256value = safe_fixed(value, prec, comma) + '%'257elif lftype is 'g':258prec = max(1, prec)259exp = parseInt(split(value.toExponential(prec - 1).toLowerCase(), 'e')[1], 10)260if -4 <= exp < prec:261value = safe_fixed(value, prec - 1 - exp, comma)262else:263value = value.toExponential(prec - 1)264value = value.replace(/0+$/g, '')265if value[-1] is decimal_sep:266value = value[:-1]267if ftype is 'G':268value = value.toUpperCase()269270else:271if comma:272value = parseInt(value, 10)273if isNaN(value):274raise ValueError('Must use numbers with , or _')275value = safe_comma(value, comma)276value += '' # Ensure we have a string277if not isNaN(precision):278value = value[:precision]279280value += '' # Ensure we have a string281282if is_numeric and sign:283nval = v'Number(value)'284is_positive = not isNaN(nval) and nval >= 0285if is_positive and (sign is ' ' or sign is '+'):286value = sign + value287288def repeat(char, num):289return v'(new Array(num+1)).join(char)'290291if is_numeric and width and width[0] is '0':292width = width[1:]293fill, align = '0', '='294295width = parseInt(width or '-1', 10)296if isNaN(width):297raise ValueError('Invalid width specification: ' + width)298299if fill and value.length < width:300if align is '<':301value = value + repeat(fill, width - value.length)302elif align is '>':303value = repeat(fill, width - value.length) + value304elif align is '^':305left = (width - value.length) // 2306right = width - left - value.length307value = repeat(fill, left) + value + repeat(fill, right)308elif align is '=':309if value[0] in "+- ":310value = value[0] + repeat(fill, width - value.length) + value[1:]311else:312value = repeat(fill, width - value.length) + value313else:314raise ValueError('Unrecognized alignment: ' + align)315316return value317318def parse_markup(markup):319key = transformer = format_spec = ''320pos = 0321state = 0322while pos < markup.length:323ch = markup[pos]324if state is 0:325if ch is '!':326state = 1327elif ch is ':':328state = 2329else:330key += ch331elif state is 1:332if ch is ':':333state = 2334else:335transformer += ch336else:337format_spec += ch338pos += 1339return key, transformer, format_spec340341def render_markup(markup):342nonlocal explicit, implicit, idx343key, transformer, format_spec = parse_markup(markup)344if transformer and v"['a', 'r', 's']".indexOf(transformer) is -1:345raise ValueError('Unknown conversion specifier: ' + transformer)346ends_with_equal = key.endsWith('=')347if ends_with_equal:348key = key[:-1]349lkey = key.length and split(key, /[.\[]/, 1)[0]350if lkey:351explicit = True352if implicit:353raise ValueError('cannot switch from automatic field numbering to manual field specification')354nvalue = parseInt(lkey)355object = kwargs[lkey] if isNaN(nvalue) else args[nvalue]356if object is undefined:357if isNaN(nvalue):358raise KeyError(lkey)359raise IndexError(lkey)360object = resolve(key[lkey.length:], object)361else:362implicit = True363if explicit:364raise ValueError('cannot switch from manual field specification to automatic field numbering')365if idx >= args.length:366raise IndexError('Not enough arguments to match template: ' + template)367object = args[idx]368idx += 1369if jstype(object) is 'function':370object = object()371ans = '' + object372if format_spec:373ans = apply_formatting(ans, format_spec)374if ends_with_equal:375ans = f'{key}={ans}'376return ans377378379ans = ''380pos = 0381in_brace = 0382markup = ''383while pos < template.length:384ch = template[pos]385if in_brace:386if ch is '{':387in_brace += 1388markup += '{'389elif ch is '}':390in_brace -= 1391if in_brace > 0:392markup += '}'393else:394ans += render_markup(markup)395else:396markup += ch397else:398if ch is '{':399if template[pos+1] is '{':400pos += 1401ans += '{'402else:403in_brace = 1404markup = ''405else:406ans += ch407if ch is '}' and template[pos+1] is '}':408pos += 1409410pos += 1411412if in_brace:413raise ValueError("expected '}' before end of string")414415return ans416)417# }}}418419define_str_func('capitalize', def ():420string = this421if string:422string = string[0].toUpperCase() + string[1:].toLowerCase()423return string424)425426define_str_func('center', def(width, fill):427left = (width - this.length) // 2428right = width - left - this.length # noqa:unused-local429fill = fill or ' '430return v'new Array(left+1).join(fill)' + this + v'new Array(right+1).join(fill)'431)432433define_str_func('count', def(needle, start, end):434string = this435start = start or 0436end = end or string.length437if start < 0 or end < 0:438string = string[start:end]439start, end = 0, string.length440pos = start441step = needle.length442if not step:443return 0444ans = 0445while pos is not -1:446pos = string.indexOf(needle, pos)447if pos is not -1:448ans += 1449pos += step450return ans451)452453define_str_func('endswith', def(suffixes, start, end):454string = this455start = start or 0456if jstype(suffixes) is 'string':457suffixes = v'[suffixes]'458if end is not undefined:459string = string[:end]460for v'var i = 0; i < suffixes.length; i++':461q = suffixes[i] # noqa:undef462if string.indexOf(q, Math.max(start, string.length - q.length)) is not -1:463return True464return False465)466467define_str_func('startswith', def(prefixes, start, end):468start = start or 0469if jstype(prefixes) is 'string':470prefixes = v'[prefixes]'471for v'var i = 0; i < prefixes.length; i++':472prefix = prefixes[i] # noqa:undef473end = this.length if end is undefined else end474if end - start >= prefix.length and prefix is this[start:start + prefix.length]:475return True476return False477)478479define_str_func('find', def(needle, start, end):480while start < 0:481start += this.length482ans = this.indexOf(needle, start)483if end is not undefined and ans is not -1:484while end < 0:485end += this.length486if ans >= end - needle.length:487return -1488return ans489)490491define_str_func('rfind', def(needle, start, end):492while end < 0:493end += this.length494ans = this.lastIndexOf(needle, end - 1)495if start is not undefined and ans is not -1:496while start < 0:497start += this.length498if ans < start:499return -1500return ans501)502503define_str_func('index', def(needle, start, end):504ans = ρσ_str.prototype.find.apply(this, arguments)505if ans is -1:506raise ValueError('substring not found')507return ans508)509510define_str_func('rindex', def(needle, start, end):511ans = ρσ_str.prototype.rfind.apply(this, arguments)512if ans is -1:513raise ValueError('substring not found')514return ans515)516517define_str_func('islower', def():518return this.length > 0 and this.toLowerCase() is this.toString()519)520521define_str_func('isupper', def():522return this.length > 0 and this.toUpperCase() is this.toString()523)524525define_str_func('isspace', def():526return this.length > 0 and /^\s+$/.test(this)527)528529define_str_func('join', def(iterable):530if Array.isArray(iterable):531return iterable.join(this)532ans = ''533r = iterable.next()534while not r.done:535if ans:536ans += this537ans += r.value538r = iterable.next()539return ans540)541542define_str_func('ljust', def(width, fill):543string = this544if width > string.length:545fill = fill or ' '546string += v'new Array(width - string.length + 1).join(fill)'547return string548)549550define_str_func('rjust', def(width, fill):551string = this552if width > string.length:553fill = fill or ' '554string = v'new Array(width - string.length + 1).join(fill)' + string555return string556)557558define_str_func('lower', def():559return this.toLowerCase()560)561562define_str_func('upper', def():563return this.toUpperCase()564)565566define_str_func('lstrip', def(chars):567string = this568pos = 0569chars = chars or ρσ_str.whitespace570while chars.indexOf(string[pos]) is not -1:571pos += 1572if pos:573string = string[pos:]574return string575)576577define_str_func('rstrip', def(chars):578string = this579pos = string.length - 1580chars = chars or ρσ_str.whitespace581while chars.indexOf(string[pos]) is not -1:582pos -= 1583if pos < string.length - 1:584string = string[:pos + 1]585return string586)587588define_str_func('strip', def(chars):589return ρσ_str.prototype.lstrip.call(ρσ_str.prototype.rstrip.call(this, chars), chars)590)591592define_str_func('partition', def(sep):593idx = this.indexOf(sep)594if idx is -1:595return this, '', ''596return this[:idx], sep, this[idx + sep.length:]597)598599define_str_func('rpartition', def(sep):600idx = this.lastIndexOf(sep)601if idx is -1:602return '', '', this603return this[:idx], sep, this[idx + sep.length:]604)605606define_str_func('replace', def(old, repl, count):607string = this608if count is 1:609return ρσ_orig_replace(string, old, repl)610if count < 1:611return string612count = count or Number.MAX_VALUE613pos = 0614while count > 0:615count -= 1616idx = string.indexOf(old, pos)617if idx is -1:618break619pos = idx + repl.length620string = string[:idx] + repl + string[idx + old.length:]621return string622)623624define_str_func('split', def(sep, maxsplit):625if maxsplit is 0:626return [this]627split = ρσ_orig_split628if sep is undefined or sep is None:629if maxsplit > 0:630ans = split(this, /(\s+)/)631extra = ''632parts = v'[]'633for v'var i = 0; i < ans.length; i++':634if parts.length >= maxsplit + 1:635extra += ans[i]636elif i % 2 is 0:637parts.push(ans[i]) # noqa:undef638parts[-1] += extra639ans = parts640else:641ans = split(this, /\s+/)642else:643if sep is '':644raise ValueError('empty separator')645ans = split(this, sep)646if maxsplit > 0 and ans.length > maxsplit:647extra = ans[maxsplit:].join(sep)648ans = ans[:maxsplit]649ans.push(extra)650return ρσ_list_decorate(ans)651)652653define_str_func('rsplit', def(sep, maxsplit):654if not maxsplit:655return ρσ_str.prototype.split.call(this, sep)656split = ρσ_orig_split657if sep is undefined or sep is None:658if maxsplit > 0:659ans = v'[]'660is_space = /\s/661pos = this.length - 1662current = ''663while pos > -1 and maxsplit > 0:664spc = False665ch = this[pos]666while pos > -1 and is_space.test(ch):667spc = True668ch = v'this[--pos]'669if spc:670if current:671ans.push(current)672maxsplit -= 1673current = ch674else:675current += ch676pos -= 1677ans.push(this[:pos + 1] + current)678ans.reverse()679else:680ans = split(this, /\s+/)681else:682if sep is '':683raise ValueError('empty separator')684ans = v'[]'685pos = end = this.length686while pos > -1 and maxsplit > 0:687maxsplit -= 1688idx = this.lastIndexOf(sep, pos)689if idx is -1:690break691ans.push(this[idx + sep.length:end])692pos = idx - 1693end = idx694ans.push(this[:end])695ans.reverse()696return ρσ_list_decorate(ans)697)698699define_str_func('splitlines', def(keepends):700split = ρσ_orig_split701if keepends:702parts = split(this, /((?:\r?\n)|\r)/)703ans = v'[]'704for v'var i = 0; i < parts.length; i++':705if i % 2 is 0:706ans.push(parts[i])707else:708ans[-1] += parts[i] # noqa:undef709else:710ans = split(this, /(?:\r?\n)|\r/)711return ρσ_list_decorate(ans)712)713714define_str_func('swapcase', def():715ans = v'new Array(this.length)'716for v'var i = 0; i < ans.length; i++':717a = this[i]718# We dont care about non-BMP chars as they are not cased anyway719b = a.toLowerCase()720if a is b:721b = a.toUpperCase()722ans[i] = b # noqa:undef723return ans.join('')724)725726define_str_func('zfill', def(width):727string = this728if width > string.length:729string = v'new Array(width - string.length + 1).join("0")' + string730return string731)732733ρσ_str.ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'734ρσ_str.ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'735ρσ_str.ascii_letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'736ρσ_str.digits = '0123456789'737ρσ_str.punctuation = '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'738ρσ_str.printable = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c'739ρσ_str.whitespace = ' \t\n\r\x0b\x0c'740741v'define_str_func = undefined'742v'var str = ρσ_str, repr = ρσ_repr'743744745