CoCalc -- formatter.py

GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/formatter.py
⁵⁴⁵⁷ views
1
# -*- coding: utf-8 -*-
2

3
# Copyright 2021-2025 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8

9
"""String formatters"""
10

11
import os
12
import sys
13
import time
14
import string
15
import _string
16
import datetime
17
import operator
18
from . import text, util
19

20
NONE = util.NONE
21

22

23
def parse(format_string, default=NONE, fmt=format):
24
    key = format_string, default, fmt
25

26
    try:
27
        return _CACHE[key]
28
    except KeyError:
29
        pass
30

31
    if format_string and format_string[0] == "\f":
32
        kind, _, format_string = format_string.partition(" ")
33
        try:
34
            cls = _FORMATTERS[kind[1:]]
35
        except KeyError:
36
            import logging
37
            logging.getLogger("formatter").error(
38
                "Invalid formatter type '%s'", kind[1:])
39
            cls = StringFormatter
40
    else:
41
        cls = StringFormatter
42

43
    formatter = _CACHE[key] = cls(format_string, default, fmt)
44
    return formatter
45

46

47
class StringFormatter():
48
    """Custom, extended version of string.Formatter
49

50
    This string formatter implementation is a mostly performance-optimized
51
    variant of the original string.Formatter class. Unnecessary features have
52
    been removed (positional arguments, unused argument check) and new
53
    formatting options have been added.
54

55
    Extra Conversions:
56
    - "l": calls str.lower on the target value
57
    - "u": calls str.upper
58
    - "c": calls str.capitalize
59
    - "C": calls string.capwords
60
    - "g": calls text.slugify()
61
    - "j": calls json.dumps
62
    - "t": calls str.strip
63
    - "T": calls util.datetime_to_timestamp_string()
64
    - "d": calls text.parse_timestamp
65
    - "s": calls str()
66
    - "S": calls util.to_string()
67
    - "U": calls urllib.parse.unescape
68
    - "r": calls repr()
69
    - "a": calls ascii()
70
    - Example: {f!l} -> "example"; {f!u} -> "EXAMPLE"
71

72
    # Go to _CONVERSIONS and _SPECIFIERS below to se all of them, read:
73
    # https://github.com/mikf/gallery-dl/blob/master/docs/formatting.md
74

75
    Extra Format Specifiers:
76
    - "?<before>/<after>/":
77
        Adds <before> and <after> to the actual value if it evaluates to True.
78
        Otherwise the whole replacement field becomes an empty string.
79
        Example: {f:?-+/+-/} -> "-+Example+-" (if "f" contains "Example")
80
                             -> ""            (if "f" is None, 0, "")
81

82
    - "L<maxlen>/<replacement>/":
83
        Replaces the output with <replacement> if its length (in characters)
84
        exceeds <maxlen>. Otherwise everything is left as is.
85
        Example: {f:L5/too long/} -> "foo"      (if "f" is "foo")
86
                                  -> "too long" (if "f" is "foobar")
87

88
    - "J<separator>/":
89
        Joins elements of a list (or string) using <separator>
90
        Example: {f:J - /} -> "a - b - c" (if "f" is ["a", "b", "c"])
91

92
    - "R<old>/<new>/":
93
        Replaces all occurrences of <old> with <new>
94
        Example: {f:R /_/} -> "f_o_o_b_a_r" (if "f" is "f o o b a r")
95
    """
96

97
    def __init__(self, format_string, default=NONE, fmt=format):
98
        self.default = default
99
        self.format = fmt
100
        self.result = []
101
        self.fields = []
102

103
        for literal_text, field_name, format_spec, conv in \
104
                _string.formatter_parser(format_string):
105
            if literal_text:
106
                self.result.append(literal_text)
107
            if field_name:
108
                self.fields.append((
109
                    len(self.result),
110
                    self._field_access(field_name, format_spec, conv),
111
                ))
112
                self.result.append("")
113

114
        if len(self.result) == 1:
115
            if self.fields:
116
                self.format_map = self.fields[0][1]
117
            else:
118
                self.format_map = lambda _: format_string
119
            del self.result, self.fields
120

121
    def format_map(self, kwdict):
122
        """Apply 'kwdict' to the initial format_string and return its result"""
123
        result = self.result
124
        for index, func in self.fields:
125
            result[index] = func(kwdict)
126
        return "".join(result)
127

128
    def _field_access(self, field_name, format_spec, conversion):
129
        fmt = self._parse_format_spec(format_spec, conversion)
130

131
        if "|" in field_name:
132
            return self._apply_list([
133
                parse_field_name(fn)
134
                for fn in field_name.split("|")
135
            ], fmt)
136
        else:
137
            key, funcs = parse_field_name(field_name)
138
            if key in _GLOBALS:
139
                return self._apply_globals(_GLOBALS[key], funcs, fmt)
140
            if funcs:
141
                return self._apply(key, funcs, fmt)
142
            return self._apply_simple(key, fmt)
143

144
    def _apply(self, key, funcs, fmt):
145
        def wrap(kwdict):
146
            try:
147
                obj = kwdict[key]
148
                for func in funcs:
149
                    obj = func(obj)
150
            except Exception:
151
                obj = self.default
152
            return fmt(obj)
153
        return wrap
154

155
    def _apply_globals(self, gobj, funcs, fmt):
156
        def wrap(_):
157
            try:
158
                obj = gobj()
159
                for func in funcs:
160
                    obj = func(obj)
161
            except Exception:
162
                obj = self.default
163
            return fmt(obj)
164
        return wrap
165

166
    def _apply_simple(self, key, fmt):
167
        def wrap(kwdict):
168
            return fmt(kwdict[key] if key in kwdict else self.default)
169
        return wrap
170

171
    def _apply_list(self, lst, fmt):
172
        def wrap(kwdict):
173
            for key, funcs in lst:
174
                try:
175
                    obj = _GLOBALS[key]() if key in _GLOBALS else kwdict[key]
176
                    for func in funcs:
177
                        obj = func(obj)
178
                    if obj:
179
                        break
180
                except Exception:
181
                    obj = None
182
            else:
183
                if obj is None:
184
                    obj = self.default
185
            return fmt(obj)
186
        return wrap
187

188
    def _parse_format_spec(self, format_spec, conversion):
189
        fmt = _build_format_func(format_spec, self.format)
190
        if not conversion:
191
            return fmt
192

193
        conversion = _CONVERSIONS[conversion]
194
        if fmt is self.format:
195
            return conversion
196
        else:
197
            return lambda obj: fmt(conversion(obj))
198

199

200
class ExpressionFormatter():
201
    """Generate text by evaluating a Python expression"""
202

203
    def __init__(self, expression, default=NONE, fmt=None):
204
        self.format_map = util.compile_expression(expression)
205

206

207
class FStringFormatter():
208
    """Generate text by evaluating an f-string literal"""
209

210
    def __init__(self, fstring, default=NONE, fmt=None):
211
        self.format_map = util.compile_expression(f'f"""{fstring}"""')
212

213

214
def _init_jinja():
215
    import jinja2
216
    from . import config
217

218
    if opts := config.get((), "jinja"):
219
        JinjaFormatter.env = env = jinja2.Environment(
220
            **opts.get("environment") or {})
221
    else:
222
        JinjaFormatter.env = jinja2.Environment()
223
        return
224

225
    if policies := opts.get("policies"):
226
        env.policies.update(policies)
227

228
    if path := opts.get("filters"):
229
        module = util.import_file(path).__dict__
230
        env.filters.update(
231
            module["__filters__"] if "__filters__" in module else module)
232

233
    if path := opts.get("tests"):
234
        module = util.import_file(path).__dict__
235
        env.tests.update(
236
            module["__tests__"] if "__tests__" in module else module)
237

238

239
class JinjaFormatter():
240
    """Generate text by evaluating a Jinja template string"""
241
    env = None
242

243
    def __init__(self, source, default=NONE, fmt=None):
244
        if self.env is None:
245
            _init_jinja()
246
        self.format_map = self.env.from_string(source).render
247

248

249
class ModuleFormatter():
250
    """Generate text by calling an external function"""
251

252
    def __init__(self, function_spec, default=NONE, fmt=None):
253
        module_name, _, function_name = function_spec.rpartition(":")
254
        module = util.import_file(module_name)
255
        self.format_map = getattr(module, function_name)
256

257

258
class TemplateFormatter(StringFormatter):
259
    """Read format_string from file"""
260

261
    def __init__(self, path, default=NONE, fmt=format):
262
        with open(util.expand_path(path)) as fp:
263
            format_string = fp.read()
264
        StringFormatter.__init__(self, format_string, default, fmt)
265

266

267
class TemplateFStringFormatter(FStringFormatter):
268
    """Read f-string from file"""
269

270
    def __init__(self, path, default=NONE, fmt=None):
271
        with open(util.expand_path(path)) as fp:
272
            fstring = fp.read()
273
        FStringFormatter.__init__(self, fstring, default, fmt)
274

275

276
class TemplateJinjaFormatter(JinjaFormatter):
277
    """Generate text by evaluating a Jinja template"""
278

279
    def __init__(self, path, default=NONE, fmt=None):
280
        with open(util.expand_path(path)) as fp:
281
            source = fp.read()
282
        JinjaFormatter.__init__(self, source, default, fmt)
283

284

285
def parse_field_name(field_name):
286
    if field_name[0] == "'":
287
        return "_lit", (operator.itemgetter(field_name[1:-1]),)
288

289
    first, rest = _string.formatter_field_name_split(field_name)
290
    funcs = []
291

292
    for is_attr, key in rest:
293
        if is_attr:
294
            func = operator.attrgetter
295
        else:
296
            func = operator.itemgetter
297
            try:
298
                if ":" in key:
299
                    if key[0] == "b":
300
                        func = _bytesgetter
301
                        key = _slice(key[1:])
302
                    else:
303
                        key = _slice(key)
304
                else:
305
                    key = key.strip("\"'")
306
            except TypeError:
307
                pass  # key is an integer
308

309
        funcs.append(func(key))
310

311
    return first, funcs
312

313

314
def _slice(indices):
315
    start, _, stop = indices.partition(":")
316
    stop, _, step = stop.partition(":")
317
    return slice(
318
        int(start) if start else None,
319
        int(stop) if stop else None,
320
        int(step) if step else None,
321
    )
322

323

324
def _bytesgetter(slice, encoding=sys.getfilesystemencoding()):
325

326
    def apply_slice_bytes(obj):
327
        return obj.encode(encoding)[slice].decode(encoding, "ignore")
328

329
    return apply_slice_bytes
330

331

332
def _build_format_func(format_spec, default):
333
    if format_spec:
334
        return _FORMAT_SPECIFIERS.get(
335
            format_spec[0], _default_format)(format_spec, default)
336
    return default
337

338

339
def _parse_optional(format_spec, default):
340
    before, after, format_spec = format_spec.split(_SEPARATOR, 2)
341
    before = before[1:]
342
    fmt = _build_format_func(format_spec, default)
343

344
    def optional(obj):
345
        return f"{before}{fmt(obj)}{after}" if obj else ""
346
    return optional
347

348

349
def _parse_slice(format_spec, default):
350
    indices, _, format_spec = format_spec.partition("]")
351
    fmt = _build_format_func(format_spec, default)
352

353
    if indices[1] == "b":
354
        slice_bytes = _bytesgetter(_slice(indices[2:]))
355

356
        def apply_slice(obj):
357
            return fmt(slice_bytes(obj))
358

359
    else:
360
        slice = _slice(indices[1:])
361

362
        def apply_slice(obj):
363
            return fmt(obj[slice])
364

365
    return apply_slice
366

367

368
def _parse_arithmetic(format_spec, default):
369
    op, _, format_spec = format_spec.partition(_SEPARATOR)
370
    fmt = _build_format_func(format_spec, default)
371

372
    value = int(op[2:])
373
    op = op[1]
374

375
    if op == "+":
376
        return lambda obj: fmt(obj + value)
377
    if op == "-":
378
        return lambda obj: fmt(obj - value)
379
    if op == "*":
380
        return lambda obj: fmt(obj * value)
381

382
    return fmt
383

384

385
def _parse_conversion(format_spec, default):
386
    conversions, _, format_spec = format_spec.partition(_SEPARATOR)
387
    convs = [_CONVERSIONS[c] for c in conversions[1:]]
388
    fmt = _build_format_func(format_spec, default)
389

390
    if len(conversions) <= 2:
391

392
        def convert_one(obj):
393
            return fmt(conv(obj))
394
        conv = _CONVERSIONS[conversions[1]]
395
        return convert_one
396

397
    def convert_many(obj):
398
        for conv in convs:
399
            obj = conv(obj)
400
        return fmt(obj)
401
    convs = [_CONVERSIONS[c] for c in conversions[1:]]
402
    return convert_many
403

404

405
def _parse_maxlen(format_spec, default):
406
    maxlen, replacement, format_spec = format_spec.split(_SEPARATOR, 2)
407
    maxlen = text.parse_int(maxlen[1:])
408
    fmt = _build_format_func(format_spec, default)
409

410
    def mlen(obj):
411
        obj = fmt(obj)
412
        return obj if len(obj) <= maxlen else replacement
413
    return mlen
414

415

416
def _parse_join(format_spec, default):
417
    separator, _, format_spec = format_spec.partition(_SEPARATOR)
418
    join = separator[1:].join
419
    fmt = _build_format_func(format_spec, default)
420

421
    def apply_join(obj):
422
        if isinstance(obj, str):
423
            return fmt(obj)
424
        return fmt(join(obj))
425
    return apply_join
426

427

428
def _parse_map(format_spec, default):
429
    key, _, format_spec = format_spec.partition(_SEPARATOR)
430
    key = key[1:]
431
    fmt = _build_format_func(format_spec, default)
432

433
    def map_(obj):
434
        if not obj or isinstance(obj, str):
435
            return fmt(obj)
436

437
        results = []
438
        for item in obj:
439
            if isinstance(item, dict):
440
                value = item.get(key, ...)
441
                results.append(default if value is ... else value)
442
            else:
443
                results.append(item)
444
        return fmt(results)
445

446
    return map_
447

448

449
def _parse_replace(format_spec, default):
450
    old, new, format_spec = format_spec.split(_SEPARATOR, 2)
451
    old = old[1:]
452
    fmt = _build_format_func(format_spec, default)
453

454
    def replace(obj):
455
        return fmt(obj.replace(old, new))
456
    return replace
457

458

459
def _parse_datetime(format_spec, default):
460
    dt_format, _, format_spec = format_spec.partition(_SEPARATOR)
461
    dt_format = dt_format[1:]
462
    fmt = _build_format_func(format_spec, default)
463

464
    def dt(obj):
465
        return fmt(text.parse_datetime(obj, dt_format))
466
    return dt
467

468

469
def _parse_offset(format_spec, default):
470
    offset, _, format_spec = format_spec.partition(_SEPARATOR)
471
    offset = offset[1:]
472
    fmt = _build_format_func(format_spec, default)
473

474
    if not offset or offset == "local":
475
        def off(dt):
476
            local = time.localtime(util.datetime_to_timestamp(dt))
477
            return fmt(dt + datetime.timedelta(0, local.tm_gmtoff))
478
    else:
479
        hours, _, minutes = offset.partition(":")
480
        offset = 3600 * int(hours)
481
        if minutes:
482
            offset += 60 * (int(minutes) if offset > 0 else -int(minutes))
483
        offset = datetime.timedelta(0, offset)
484

485
        def off(obj):
486
            return fmt(obj + offset)
487
    return off
488

489

490
def _parse_sort(format_spec, default):
491
    args, _, format_spec = format_spec.partition(_SEPARATOR)
492
    fmt = _build_format_func(format_spec, default)
493

494
    if "d" in args or "r" in args:
495
        def sort_desc(obj):
496
            return fmt(sorted(obj, reverse=True))
497
        return sort_desc
498
    else:
499
        def sort_asc(obj):
500
            return fmt(sorted(obj))
501
        return sort_asc
502

503

504
def _parse_limit(format_spec, default):
505
    limit, hint, format_spec = format_spec.split(_SEPARATOR, 2)
506
    limit = int(limit[1:])
507
    limit_hint = limit - len(hint)
508
    fmt = _build_format_func(format_spec, default)
509

510
    def apply_limit(obj):
511
        if len(obj) > limit:
512
            obj = obj[:limit_hint] + hint
513
        return fmt(obj)
514
    return apply_limit
515

516

517
def _default_format(format_spec, default):
518
    def wrap(obj):
519
        return format(obj, format_spec)
520
    return wrap
521

522

523
class Literal():
524
    # __getattr__, __getattribute__, and __class_getitem__
525
    # are all slower than regular __getitem__
526

527
    def __getitem__(self, key):
528
        return key
529

530

531
_literal = Literal()
532

533
_CACHE = {}
534
_SEPARATOR = "/"
535
_FORMATTERS = {
536
    "E" : ExpressionFormatter,
537
    "F" : FStringFormatter,
538
    "J" : JinjaFormatter,
539
    "M" : ModuleFormatter,
540
    "S" : StringFormatter,
541
    "T" : TemplateFormatter,
542
    "TF": TemplateFStringFormatter,
543
    "FT": TemplateFStringFormatter,
544
    "TJ": TemplateJinjaFormatter,
545
    "JT": TemplateJinjaFormatter,
546
}
547
_GLOBALS = {
548
    "_env": lambda: os.environ,
549
    "_lit": lambda: _literal,
550
    "_now": datetime.datetime.now,
551
    "_nul": lambda: util.NONE,
552
}
553
_CONVERSIONS = {
554
    "l": str.lower,
555
    "u": str.upper,
556
    "c": str.capitalize,
557
    "C": string.capwords,
558
    "j": util.json_dumps,
559
    "t": str.strip,
560
    "n": len,
561
    "L": util.code_to_language,
562
    "T": util.datetime_to_timestamp_string,
563
    "d": text.parse_timestamp,
564
    "D": util.to_datetime,
565
    "U": text.unescape,
566
    "H": lambda s: text.unescape(text.remove_html(s)),
567
    "g": text.slugify,
568
    "R": text.re(r"https?://[^\s\"']+").findall,
569
    "W": text.sanitize_whitespace,
570
    "S": util.to_string,
571
    "s": str,
572
    "r": repr,
573
    "a": ascii,
574
    "i": int,
575
    "f": float,
576
}
577
_FORMAT_SPECIFIERS = {
578
    "?": _parse_optional,
579
    "[": _parse_slice,
580
    "A": _parse_arithmetic,
581
    "C": _parse_conversion,
582
    "D": _parse_datetime,
583
    "J": _parse_join,
584
    "L": _parse_maxlen,
585
    "M": _parse_map,
586
    "O": _parse_offset,
587
    "R": _parse_replace,
588
    "S": _parse_sort,
589
    "X": _parse_limit,
590
}
591

592
Product

Resources

Company