Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
mikf
GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/formatter.py
5457 views
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2021-2025 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8
9
"""String formatters"""
10
11
import os
12
import sys
13
import time
14
import string
15
import _string
16
import datetime
17
import operator
18
from . import text, util
19
20
NONE = util.NONE
21
22
23
def parse(format_string, default=NONE, fmt=format):
24
key = format_string, default, fmt
25
26
try:
27
return _CACHE[key]
28
except KeyError:
29
pass
30
31
if format_string and format_string[0] == "\f":
32
kind, _, format_string = format_string.partition(" ")
33
try:
34
cls = _FORMATTERS[kind[1:]]
35
except KeyError:
36
import logging
37
logging.getLogger("formatter").error(
38
"Invalid formatter type '%s'", kind[1:])
39
cls = StringFormatter
40
else:
41
cls = StringFormatter
42
43
formatter = _CACHE[key] = cls(format_string, default, fmt)
44
return formatter
45
46
47
class StringFormatter():
48
"""Custom, extended version of string.Formatter
49
50
This string formatter implementation is a mostly performance-optimized
51
variant of the original string.Formatter class. Unnecessary features have
52
been removed (positional arguments, unused argument check) and new
53
formatting options have been added.
54
55
Extra Conversions:
56
- "l": calls str.lower on the target value
57
- "u": calls str.upper
58
- "c": calls str.capitalize
59
- "C": calls string.capwords
60
- "g": calls text.slugify()
61
- "j": calls json.dumps
62
- "t": calls str.strip
63
- "T": calls util.datetime_to_timestamp_string()
64
- "d": calls text.parse_timestamp
65
- "s": calls str()
66
- "S": calls util.to_string()
67
- "U": calls urllib.parse.unescape
68
- "r": calls repr()
69
- "a": calls ascii()
70
- Example: {f!l} -> "example"; {f!u} -> "EXAMPLE"
71
72
# Go to _CONVERSIONS and _SPECIFIERS below to se all of them, read:
73
# https://github.com/mikf/gallery-dl/blob/master/docs/formatting.md
74
75
Extra Format Specifiers:
76
- "?<before>/<after>/":
77
Adds <before> and <after> to the actual value if it evaluates to True.
78
Otherwise the whole replacement field becomes an empty string.
79
Example: {f:?-+/+-/} -> "-+Example+-" (if "f" contains "Example")
80
-> "" (if "f" is None, 0, "")
81
82
- "L<maxlen>/<replacement>/":
83
Replaces the output with <replacement> if its length (in characters)
84
exceeds <maxlen>. Otherwise everything is left as is.
85
Example: {f:L5/too long/} -> "foo" (if "f" is "foo")
86
-> "too long" (if "f" is "foobar")
87
88
- "J<separator>/":
89
Joins elements of a list (or string) using <separator>
90
Example: {f:J - /} -> "a - b - c" (if "f" is ["a", "b", "c"])
91
92
- "R<old>/<new>/":
93
Replaces all occurrences of <old> with <new>
94
Example: {f:R /_/} -> "f_o_o_b_a_r" (if "f" is "f o o b a r")
95
"""
96
97
def __init__(self, format_string, default=NONE, fmt=format):
98
self.default = default
99
self.format = fmt
100
self.result = []
101
self.fields = []
102
103
for literal_text, field_name, format_spec, conv in \
104
_string.formatter_parser(format_string):
105
if literal_text:
106
self.result.append(literal_text)
107
if field_name:
108
self.fields.append((
109
len(self.result),
110
self._field_access(field_name, format_spec, conv),
111
))
112
self.result.append("")
113
114
if len(self.result) == 1:
115
if self.fields:
116
self.format_map = self.fields[0][1]
117
else:
118
self.format_map = lambda _: format_string
119
del self.result, self.fields
120
121
def format_map(self, kwdict):
122
"""Apply 'kwdict' to the initial format_string and return its result"""
123
result = self.result
124
for index, func in self.fields:
125
result[index] = func(kwdict)
126
return "".join(result)
127
128
def _field_access(self, field_name, format_spec, conversion):
129
fmt = self._parse_format_spec(format_spec, conversion)
130
131
if "|" in field_name:
132
return self._apply_list([
133
parse_field_name(fn)
134
for fn in field_name.split("|")
135
], fmt)
136
else:
137
key, funcs = parse_field_name(field_name)
138
if key in _GLOBALS:
139
return self._apply_globals(_GLOBALS[key], funcs, fmt)
140
if funcs:
141
return self._apply(key, funcs, fmt)
142
return self._apply_simple(key, fmt)
143
144
def _apply(self, key, funcs, fmt):
145
def wrap(kwdict):
146
try:
147
obj = kwdict[key]
148
for func in funcs:
149
obj = func(obj)
150
except Exception:
151
obj = self.default
152
return fmt(obj)
153
return wrap
154
155
def _apply_globals(self, gobj, funcs, fmt):
156
def wrap(_):
157
try:
158
obj = gobj()
159
for func in funcs:
160
obj = func(obj)
161
except Exception:
162
obj = self.default
163
return fmt(obj)
164
return wrap
165
166
def _apply_simple(self, key, fmt):
167
def wrap(kwdict):
168
return fmt(kwdict[key] if key in kwdict else self.default)
169
return wrap
170
171
def _apply_list(self, lst, fmt):
172
def wrap(kwdict):
173
for key, funcs in lst:
174
try:
175
obj = _GLOBALS[key]() if key in _GLOBALS else kwdict[key]
176
for func in funcs:
177
obj = func(obj)
178
if obj:
179
break
180
except Exception:
181
obj = None
182
else:
183
if obj is None:
184
obj = self.default
185
return fmt(obj)
186
return wrap
187
188
def _parse_format_spec(self, format_spec, conversion):
189
fmt = _build_format_func(format_spec, self.format)
190
if not conversion:
191
return fmt
192
193
conversion = _CONVERSIONS[conversion]
194
if fmt is self.format:
195
return conversion
196
else:
197
return lambda obj: fmt(conversion(obj))
198
199
200
class ExpressionFormatter():
201
"""Generate text by evaluating a Python expression"""
202
203
def __init__(self, expression, default=NONE, fmt=None):
204
self.format_map = util.compile_expression(expression)
205
206
207
class FStringFormatter():
208
"""Generate text by evaluating an f-string literal"""
209
210
def __init__(self, fstring, default=NONE, fmt=None):
211
self.format_map = util.compile_expression(f'f"""{fstring}"""')
212
213
214
def _init_jinja():
215
import jinja2
216
from . import config
217
218
if opts := config.get((), "jinja"):
219
JinjaFormatter.env = env = jinja2.Environment(
220
**opts.get("environment") or {})
221
else:
222
JinjaFormatter.env = jinja2.Environment()
223
return
224
225
if policies := opts.get("policies"):
226
env.policies.update(policies)
227
228
if path := opts.get("filters"):
229
module = util.import_file(path).__dict__
230
env.filters.update(
231
module["__filters__"] if "__filters__" in module else module)
232
233
if path := opts.get("tests"):
234
module = util.import_file(path).__dict__
235
env.tests.update(
236
module["__tests__"] if "__tests__" in module else module)
237
238
239
class JinjaFormatter():
240
"""Generate text by evaluating a Jinja template string"""
241
env = None
242
243
def __init__(self, source, default=NONE, fmt=None):
244
if self.env is None:
245
_init_jinja()
246
self.format_map = self.env.from_string(source).render
247
248
249
class ModuleFormatter():
250
"""Generate text by calling an external function"""
251
252
def __init__(self, function_spec, default=NONE, fmt=None):
253
module_name, _, function_name = function_spec.rpartition(":")
254
module = util.import_file(module_name)
255
self.format_map = getattr(module, function_name)
256
257
258
class TemplateFormatter(StringFormatter):
259
"""Read format_string from file"""
260
261
def __init__(self, path, default=NONE, fmt=format):
262
with open(util.expand_path(path)) as fp:
263
format_string = fp.read()
264
StringFormatter.__init__(self, format_string, default, fmt)
265
266
267
class TemplateFStringFormatter(FStringFormatter):
268
"""Read f-string from file"""
269
270
def __init__(self, path, default=NONE, fmt=None):
271
with open(util.expand_path(path)) as fp:
272
fstring = fp.read()
273
FStringFormatter.__init__(self, fstring, default, fmt)
274
275
276
class TemplateJinjaFormatter(JinjaFormatter):
277
"""Generate text by evaluating a Jinja template"""
278
279
def __init__(self, path, default=NONE, fmt=None):
280
with open(util.expand_path(path)) as fp:
281
source = fp.read()
282
JinjaFormatter.__init__(self, source, default, fmt)
283
284
285
def parse_field_name(field_name):
286
if field_name[0] == "'":
287
return "_lit", (operator.itemgetter(field_name[1:-1]),)
288
289
first, rest = _string.formatter_field_name_split(field_name)
290
funcs = []
291
292
for is_attr, key in rest:
293
if is_attr:
294
func = operator.attrgetter
295
else:
296
func = operator.itemgetter
297
try:
298
if ":" in key:
299
if key[0] == "b":
300
func = _bytesgetter
301
key = _slice(key[1:])
302
else:
303
key = _slice(key)
304
else:
305
key = key.strip("\"'")
306
except TypeError:
307
pass # key is an integer
308
309
funcs.append(func(key))
310
311
return first, funcs
312
313
314
def _slice(indices):
315
start, _, stop = indices.partition(":")
316
stop, _, step = stop.partition(":")
317
return slice(
318
int(start) if start else None,
319
int(stop) if stop else None,
320
int(step) if step else None,
321
)
322
323
324
def _bytesgetter(slice, encoding=sys.getfilesystemencoding()):
325
326
def apply_slice_bytes(obj):
327
return obj.encode(encoding)[slice].decode(encoding, "ignore")
328
329
return apply_slice_bytes
330
331
332
def _build_format_func(format_spec, default):
333
if format_spec:
334
return _FORMAT_SPECIFIERS.get(
335
format_spec[0], _default_format)(format_spec, default)
336
return default
337
338
339
def _parse_optional(format_spec, default):
340
before, after, format_spec = format_spec.split(_SEPARATOR, 2)
341
before = before[1:]
342
fmt = _build_format_func(format_spec, default)
343
344
def optional(obj):
345
return f"{before}{fmt(obj)}{after}" if obj else ""
346
return optional
347
348
349
def _parse_slice(format_spec, default):
350
indices, _, format_spec = format_spec.partition("]")
351
fmt = _build_format_func(format_spec, default)
352
353
if indices[1] == "b":
354
slice_bytes = _bytesgetter(_slice(indices[2:]))
355
356
def apply_slice(obj):
357
return fmt(slice_bytes(obj))
358
359
else:
360
slice = _slice(indices[1:])
361
362
def apply_slice(obj):
363
return fmt(obj[slice])
364
365
return apply_slice
366
367
368
def _parse_arithmetic(format_spec, default):
369
op, _, format_spec = format_spec.partition(_SEPARATOR)
370
fmt = _build_format_func(format_spec, default)
371
372
value = int(op[2:])
373
op = op[1]
374
375
if op == "+":
376
return lambda obj: fmt(obj + value)
377
if op == "-":
378
return lambda obj: fmt(obj - value)
379
if op == "*":
380
return lambda obj: fmt(obj * value)
381
382
return fmt
383
384
385
def _parse_conversion(format_spec, default):
386
conversions, _, format_spec = format_spec.partition(_SEPARATOR)
387
convs = [_CONVERSIONS[c] for c in conversions[1:]]
388
fmt = _build_format_func(format_spec, default)
389
390
if len(conversions) <= 2:
391
392
def convert_one(obj):
393
return fmt(conv(obj))
394
conv = _CONVERSIONS[conversions[1]]
395
return convert_one
396
397
def convert_many(obj):
398
for conv in convs:
399
obj = conv(obj)
400
return fmt(obj)
401
convs = [_CONVERSIONS[c] for c in conversions[1:]]
402
return convert_many
403
404
405
def _parse_maxlen(format_spec, default):
406
maxlen, replacement, format_spec = format_spec.split(_SEPARATOR, 2)
407
maxlen = text.parse_int(maxlen[1:])
408
fmt = _build_format_func(format_spec, default)
409
410
def mlen(obj):
411
obj = fmt(obj)
412
return obj if len(obj) <= maxlen else replacement
413
return mlen
414
415
416
def _parse_join(format_spec, default):
417
separator, _, format_spec = format_spec.partition(_SEPARATOR)
418
join = separator[1:].join
419
fmt = _build_format_func(format_spec, default)
420
421
def apply_join(obj):
422
if isinstance(obj, str):
423
return fmt(obj)
424
return fmt(join(obj))
425
return apply_join
426
427
428
def _parse_map(format_spec, default):
429
key, _, format_spec = format_spec.partition(_SEPARATOR)
430
key = key[1:]
431
fmt = _build_format_func(format_spec, default)
432
433
def map_(obj):
434
if not obj or isinstance(obj, str):
435
return fmt(obj)
436
437
results = []
438
for item in obj:
439
if isinstance(item, dict):
440
value = item.get(key, ...)
441
results.append(default if value is ... else value)
442
else:
443
results.append(item)
444
return fmt(results)
445
446
return map_
447
448
449
def _parse_replace(format_spec, default):
450
old, new, format_spec = format_spec.split(_SEPARATOR, 2)
451
old = old[1:]
452
fmt = _build_format_func(format_spec, default)
453
454
def replace(obj):
455
return fmt(obj.replace(old, new))
456
return replace
457
458
459
def _parse_datetime(format_spec, default):
460
dt_format, _, format_spec = format_spec.partition(_SEPARATOR)
461
dt_format = dt_format[1:]
462
fmt = _build_format_func(format_spec, default)
463
464
def dt(obj):
465
return fmt(text.parse_datetime(obj, dt_format))
466
return dt
467
468
469
def _parse_offset(format_spec, default):
470
offset, _, format_spec = format_spec.partition(_SEPARATOR)
471
offset = offset[1:]
472
fmt = _build_format_func(format_spec, default)
473
474
if not offset or offset == "local":
475
def off(dt):
476
local = time.localtime(util.datetime_to_timestamp(dt))
477
return fmt(dt + datetime.timedelta(0, local.tm_gmtoff))
478
else:
479
hours, _, minutes = offset.partition(":")
480
offset = 3600 * int(hours)
481
if minutes:
482
offset += 60 * (int(minutes) if offset > 0 else -int(minutes))
483
offset = datetime.timedelta(0, offset)
484
485
def off(obj):
486
return fmt(obj + offset)
487
return off
488
489
490
def _parse_sort(format_spec, default):
491
args, _, format_spec = format_spec.partition(_SEPARATOR)
492
fmt = _build_format_func(format_spec, default)
493
494
if "d" in args or "r" in args:
495
def sort_desc(obj):
496
return fmt(sorted(obj, reverse=True))
497
return sort_desc
498
else:
499
def sort_asc(obj):
500
return fmt(sorted(obj))
501
return sort_asc
502
503
504
def _parse_limit(format_spec, default):
505
limit, hint, format_spec = format_spec.split(_SEPARATOR, 2)
506
limit = int(limit[1:])
507
limit_hint = limit - len(hint)
508
fmt = _build_format_func(format_spec, default)
509
510
def apply_limit(obj):
511
if len(obj) > limit:
512
obj = obj[:limit_hint] + hint
513
return fmt(obj)
514
return apply_limit
515
516
517
def _default_format(format_spec, default):
518
def wrap(obj):
519
return format(obj, format_spec)
520
return wrap
521
522
523
class Literal():
524
# __getattr__, __getattribute__, and __class_getitem__
525
# are all slower than regular __getitem__
526
527
def __getitem__(self, key):
528
return key
529
530
531
_literal = Literal()
532
533
_CACHE = {}
534
_SEPARATOR = "/"
535
_FORMATTERS = {
536
"E" : ExpressionFormatter,
537
"F" : FStringFormatter,
538
"J" : JinjaFormatter,
539
"M" : ModuleFormatter,
540
"S" : StringFormatter,
541
"T" : TemplateFormatter,
542
"TF": TemplateFStringFormatter,
543
"FT": TemplateFStringFormatter,
544
"TJ": TemplateJinjaFormatter,
545
"JT": TemplateJinjaFormatter,
546
}
547
_GLOBALS = {
548
"_env": lambda: os.environ,
549
"_lit": lambda: _literal,
550
"_now": datetime.datetime.now,
551
"_nul": lambda: util.NONE,
552
}
553
_CONVERSIONS = {
554
"l": str.lower,
555
"u": str.upper,
556
"c": str.capitalize,
557
"C": string.capwords,
558
"j": util.json_dumps,
559
"t": str.strip,
560
"n": len,
561
"L": util.code_to_language,
562
"T": util.datetime_to_timestamp_string,
563
"d": text.parse_timestamp,
564
"D": util.to_datetime,
565
"U": text.unescape,
566
"H": lambda s: text.unescape(text.remove_html(s)),
567
"g": text.slugify,
568
"R": text.re(r"https?://[^\s\"']+").findall,
569
"W": text.sanitize_whitespace,
570
"S": util.to_string,
571
"s": str,
572
"r": repr,
573
"a": ascii,
574
"i": int,
575
"f": float,
576
}
577
_FORMAT_SPECIFIERS = {
578
"?": _parse_optional,
579
"[": _parse_slice,
580
"A": _parse_arithmetic,
581
"C": _parse_conversion,
582
"D": _parse_datetime,
583
"J": _parse_join,
584
"L": _parse_maxlen,
585
"M": _parse_map,
586
"O": _parse_offset,
587
"R": _parse_replace,
588
"S": _parse_sort,
589
"X": _parse_limit,
590
}
591
592