Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Objects/stringlib/unicode_format.h
12 views
1
/*
2
unicode_format.h -- implementation of str.format().
3
*/
4
5
#include "pycore_floatobject.h" // _PyFloat_FormatAdvancedWriter()
6
7
/************************************************************************/
8
/*********** Global data structures and forward declarations *********/
9
/************************************************************************/
10
11
/*
12
A SubString consists of the characters between two string or
13
unicode pointers.
14
*/
15
typedef struct {
16
PyObject *str; /* borrowed reference */
17
Py_ssize_t start, end;
18
} SubString;
19
20
21
typedef enum {
22
ANS_INIT,
23
ANS_AUTO,
24
ANS_MANUAL
25
} AutoNumberState; /* Keep track if we're auto-numbering fields */
26
27
/* Keeps track of our auto-numbering state, and which number field we're on */
28
typedef struct {
29
AutoNumberState an_state;
30
int an_field_number;
31
} AutoNumber;
32
33
34
/* forward declaration for recursion */
35
static PyObject *
36
build_string(SubString *input, PyObject *args, PyObject *kwargs,
37
int recursion_depth, AutoNumber *auto_number);
38
39
40
41
/************************************************************************/
42
/************************** Utility functions ************************/
43
/************************************************************************/
44
45
static void
46
AutoNumber_Init(AutoNumber *auto_number)
47
{
48
auto_number->an_state = ANS_INIT;
49
auto_number->an_field_number = 0;
50
}
51
52
/* fill in a SubString from a pointer and length */
53
Py_LOCAL_INLINE(void)
54
SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end)
55
{
56
str->str = s;
57
str->start = start;
58
str->end = end;
59
}
60
61
/* return a new string. if str->str is NULL, return None */
62
Py_LOCAL_INLINE(PyObject *)
63
SubString_new_object(SubString *str)
64
{
65
if (str->str == NULL)
66
Py_RETURN_NONE;
67
return PyUnicode_Substring(str->str, str->start, str->end);
68
}
69
70
/* return a new string. if str->str is NULL, return a new empty string */
71
Py_LOCAL_INLINE(PyObject *)
72
SubString_new_object_or_empty(SubString *str)
73
{
74
if (str->str == NULL) {
75
return PyUnicode_New(0, 0);
76
}
77
return SubString_new_object(str);
78
}
79
80
/* Return 1 if an error has been detected switching between automatic
81
field numbering and manual field specification, else return 0. Set
82
ValueError on error. */
83
static int
84
autonumber_state_error(AutoNumberState state, int field_name_is_empty)
85
{
86
if (state == ANS_MANUAL) {
87
if (field_name_is_empty) {
88
PyErr_SetString(PyExc_ValueError, "cannot switch from "
89
"manual field specification to "
90
"automatic field numbering");
91
return 1;
92
}
93
}
94
else {
95
if (!field_name_is_empty) {
96
PyErr_SetString(PyExc_ValueError, "cannot switch from "
97
"automatic field numbering to "
98
"manual field specification");
99
return 1;
100
}
101
}
102
return 0;
103
}
104
105
106
/************************************************************************/
107
/*********** Format string parsing -- integers and identifiers *********/
108
/************************************************************************/
109
110
static Py_ssize_t
111
get_integer(const SubString *str)
112
{
113
Py_ssize_t accumulator = 0;
114
Py_ssize_t digitval;
115
Py_ssize_t i;
116
117
/* empty string is an error */
118
if (str->start >= str->end)
119
return -1;
120
121
for (i = str->start; i < str->end; i++) {
122
digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i));
123
if (digitval < 0)
124
return -1;
125
/*
126
Detect possible overflow before it happens:
127
128
accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
129
accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
130
*/
131
if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
132
PyErr_Format(PyExc_ValueError,
133
"Too many decimal digits in format string");
134
return -1;
135
}
136
accumulator = accumulator * 10 + digitval;
137
}
138
return accumulator;
139
}
140
141
/************************************************************************/
142
/******** Functions to get field objects and specification strings ******/
143
/************************************************************************/
144
145
/* do the equivalent of obj.name */
146
static PyObject *
147
getattr(PyObject *obj, SubString *name)
148
{
149
PyObject *newobj;
150
PyObject *str = SubString_new_object(name);
151
if (str == NULL)
152
return NULL;
153
newobj = PyObject_GetAttr(obj, str);
154
Py_DECREF(str);
155
return newobj;
156
}
157
158
/* do the equivalent of obj[idx], where obj is a sequence */
159
static PyObject *
160
getitem_sequence(PyObject *obj, Py_ssize_t idx)
161
{
162
return PySequence_GetItem(obj, idx);
163
}
164
165
/* do the equivalent of obj[idx], where obj is not a sequence */
166
static PyObject *
167
getitem_idx(PyObject *obj, Py_ssize_t idx)
168
{
169
PyObject *newobj;
170
PyObject *idx_obj = PyLong_FromSsize_t(idx);
171
if (idx_obj == NULL)
172
return NULL;
173
newobj = PyObject_GetItem(obj, idx_obj);
174
Py_DECREF(idx_obj);
175
return newobj;
176
}
177
178
/* do the equivalent of obj[name] */
179
static PyObject *
180
getitem_str(PyObject *obj, SubString *name)
181
{
182
PyObject *newobj;
183
PyObject *str = SubString_new_object(name);
184
if (str == NULL)
185
return NULL;
186
newobj = PyObject_GetItem(obj, str);
187
Py_DECREF(str);
188
return newobj;
189
}
190
191
typedef struct {
192
/* the entire string we're parsing. we assume that someone else
193
is managing its lifetime, and that it will exist for the
194
lifetime of the iterator. can be empty */
195
SubString str;
196
197
/* index to where we are inside field_name */
198
Py_ssize_t index;
199
} FieldNameIterator;
200
201
202
static int
203
FieldNameIterator_init(FieldNameIterator *self, PyObject *s,
204
Py_ssize_t start, Py_ssize_t end)
205
{
206
SubString_init(&self->str, s, start, end);
207
self->index = start;
208
return 1;
209
}
210
211
static int
212
_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
213
{
214
Py_UCS4 c;
215
216
name->str = self->str.str;
217
name->start = self->index;
218
219
/* return everything until '.' or '[' */
220
while (self->index < self->str.end) {
221
c = PyUnicode_READ_CHAR(self->str.str, self->index++);
222
switch (c) {
223
case '[':
224
case '.':
225
/* backup so that we this character will be seen next time */
226
self->index--;
227
break;
228
default:
229
continue;
230
}
231
break;
232
}
233
/* end of string is okay */
234
name->end = self->index;
235
return 1;
236
}
237
238
static int
239
_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
240
{
241
int bracket_seen = 0;
242
Py_UCS4 c;
243
244
name->str = self->str.str;
245
name->start = self->index;
246
247
/* return everything until ']' */
248
while (self->index < self->str.end) {
249
c = PyUnicode_READ_CHAR(self->str.str, self->index++);
250
switch (c) {
251
case ']':
252
bracket_seen = 1;
253
break;
254
default:
255
continue;
256
}
257
break;
258
}
259
/* make sure we ended with a ']' */
260
if (!bracket_seen) {
261
PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
262
return 0;
263
}
264
265
/* end of string is okay */
266
/* don't include the ']' */
267
name->end = self->index-1;
268
return 1;
269
}
270
271
/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
272
static int
273
FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
274
Py_ssize_t *name_idx, SubString *name)
275
{
276
/* check at end of input */
277
if (self->index >= self->str.end)
278
return 1;
279
280
switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) {
281
case '.':
282
*is_attribute = 1;
283
if (_FieldNameIterator_attr(self, name) == 0)
284
return 0;
285
*name_idx = -1;
286
break;
287
case '[':
288
*is_attribute = 0;
289
if (_FieldNameIterator_item(self, name) == 0)
290
return 0;
291
*name_idx = get_integer(name);
292
if (*name_idx == -1 && PyErr_Occurred())
293
return 0;
294
break;
295
default:
296
/* Invalid character follows ']' */
297
PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
298
"follow ']' in format field specifier");
299
return 0;
300
}
301
302
/* empty string is an error */
303
if (name->start == name->end) {
304
PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
305
return 0;
306
}
307
308
return 2;
309
}
310
311
312
/* input: field_name
313
output: 'first' points to the part before the first '[' or '.'
314
'first_idx' is -1 if 'first' is not an integer, otherwise
315
it's the value of first converted to an integer
316
'rest' is an iterator to return the rest
317
*/
318
static int
319
field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first,
320
Py_ssize_t *first_idx, FieldNameIterator *rest,
321
AutoNumber *auto_number)
322
{
323
Py_UCS4 c;
324
Py_ssize_t i = start;
325
int field_name_is_empty;
326
int using_numeric_index;
327
328
/* find the part up until the first '.' or '[' */
329
while (i < end) {
330
switch (c = PyUnicode_READ_CHAR(str, i++)) {
331
case '[':
332
case '.':
333
/* backup so that we this character is available to the
334
"rest" iterator */
335
i--;
336
break;
337
default:
338
continue;
339
}
340
break;
341
}
342
343
/* set up the return values */
344
SubString_init(first, str, start, i);
345
FieldNameIterator_init(rest, str, i, end);
346
347
/* see if "first" is an integer, in which case it's used as an index */
348
*first_idx = get_integer(first);
349
if (*first_idx == -1 && PyErr_Occurred())
350
return 0;
351
352
field_name_is_empty = first->start >= first->end;
353
354
/* If the field name is omitted or if we have a numeric index
355
specified, then we're doing numeric indexing into args. */
356
using_numeric_index = field_name_is_empty || *first_idx != -1;
357
358
/* We always get here exactly one time for each field we're
359
processing. And we get here in field order (counting by left
360
braces). So this is the perfect place to handle automatic field
361
numbering if the field name is omitted. */
362
363
/* Check if we need to do the auto-numbering. It's not needed if
364
we're called from string.Format routines, because it's handled
365
in that class by itself. */
366
if (auto_number) {
367
/* Initialize our auto numbering state if this is the first
368
time we're either auto-numbering or manually numbering. */
369
if (auto_number->an_state == ANS_INIT && using_numeric_index)
370
auto_number->an_state = field_name_is_empty ?
371
ANS_AUTO : ANS_MANUAL;
372
373
/* Make sure our state is consistent with what we're doing
374
this time through. Only check if we're using a numeric
375
index. */
376
if (using_numeric_index)
377
if (autonumber_state_error(auto_number->an_state,
378
field_name_is_empty))
379
return 0;
380
/* Zero length field means we want to do auto-numbering of the
381
fields. */
382
if (field_name_is_empty)
383
*first_idx = (auto_number->an_field_number)++;
384
}
385
386
return 1;
387
}
388
389
390
/*
391
get_field_object returns the object inside {}, before the
392
format_spec. It handles getindex and getattr lookups and consumes
393
the entire input string.
394
*/
395
static PyObject *
396
get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
397
AutoNumber *auto_number)
398
{
399
PyObject *obj = NULL;
400
int ok;
401
int is_attribute;
402
SubString name;
403
SubString first;
404
Py_ssize_t index;
405
FieldNameIterator rest;
406
407
if (!field_name_split(input->str, input->start, input->end, &first,
408
&index, &rest, auto_number)) {
409
goto error;
410
}
411
412
if (index == -1) {
413
/* look up in kwargs */
414
PyObject *key = SubString_new_object(&first);
415
if (key == NULL) {
416
goto error;
417
}
418
if (kwargs == NULL) {
419
PyErr_SetObject(PyExc_KeyError, key);
420
Py_DECREF(key);
421
goto error;
422
}
423
/* Use PyObject_GetItem instead of PyDict_GetItem because this
424
code is no longer just used with kwargs. It might be passed
425
a non-dict when called through format_map. */
426
obj = PyObject_GetItem(kwargs, key);
427
Py_DECREF(key);
428
if (obj == NULL) {
429
goto error;
430
}
431
}
432
else {
433
/* If args is NULL, we have a format string with a positional field
434
with only kwargs to retrieve it from. This can only happen when
435
used with format_map(), where positional arguments are not
436
allowed. */
437
if (args == NULL) {
438
PyErr_SetString(PyExc_ValueError, "Format string contains "
439
"positional fields");
440
goto error;
441
}
442
443
/* look up in args */
444
obj = PySequence_GetItem(args, index);
445
if (obj == NULL) {
446
PyErr_Format(PyExc_IndexError,
447
"Replacement index %zd out of range for positional "
448
"args tuple",
449
index);
450
goto error;
451
}
452
}
453
454
/* iterate over the rest of the field_name */
455
while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
456
&name)) == 2) {
457
PyObject *tmp;
458
459
if (is_attribute)
460
/* getattr lookup "." */
461
tmp = getattr(obj, &name);
462
else
463
/* getitem lookup "[]" */
464
if (index == -1)
465
tmp = getitem_str(obj, &name);
466
else
467
if (PySequence_Check(obj))
468
tmp = getitem_sequence(obj, index);
469
else
470
/* not a sequence */
471
tmp = getitem_idx(obj, index);
472
if (tmp == NULL)
473
goto error;
474
475
/* assign to obj */
476
Py_SETREF(obj, tmp);
477
}
478
/* end of iterator, this is the non-error case */
479
if (ok == 1)
480
return obj;
481
error:
482
Py_XDECREF(obj);
483
return NULL;
484
}
485
486
/************************************************************************/
487
/***************** Field rendering functions **************************/
488
/************************************************************************/
489
490
/*
491
render_field() is the main function in this section. It takes the
492
field object and field specification string generated by
493
get_field_and_spec, and renders the field into the output string.
494
495
render_field calls fieldobj.__format__(format_spec) method, and
496
appends to the output.
497
*/
498
static int
499
render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer)
500
{
501
int ok = 0;
502
PyObject *result = NULL;
503
PyObject *format_spec_object = NULL;
504
int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
505
int err;
506
507
/* If we know the type exactly, skip the lookup of __format__ and just
508
call the formatter directly. */
509
if (PyUnicode_CheckExact(fieldobj))
510
formatter = _PyUnicode_FormatAdvancedWriter;
511
else if (PyLong_CheckExact(fieldobj))
512
formatter = _PyLong_FormatAdvancedWriter;
513
else if (PyFloat_CheckExact(fieldobj))
514
formatter = _PyFloat_FormatAdvancedWriter;
515
else if (PyComplex_CheckExact(fieldobj))
516
formatter = _PyComplex_FormatAdvancedWriter;
517
518
if (formatter) {
519
/* we know exactly which formatter will be called when __format__ is
520
looked up, so call it directly, instead. */
521
err = formatter(writer, fieldobj, format_spec->str,
522
format_spec->start, format_spec->end);
523
return (err == 0);
524
}
525
else {
526
/* We need to create an object out of the pointers we have, because
527
__format__ takes a string/unicode object for format_spec. */
528
if (format_spec->str)
529
format_spec_object = PyUnicode_Substring(format_spec->str,
530
format_spec->start,
531
format_spec->end);
532
else
533
format_spec_object = PyUnicode_New(0, 0);
534
if (format_spec_object == NULL)
535
goto done;
536
537
result = PyObject_Format(fieldobj, format_spec_object);
538
}
539
if (result == NULL)
540
goto done;
541
542
if (_PyUnicodeWriter_WriteStr(writer, result) == -1)
543
goto done;
544
ok = 1;
545
546
done:
547
Py_XDECREF(format_spec_object);
548
Py_XDECREF(result);
549
return ok;
550
}
551
552
static int
553
parse_field(SubString *str, SubString *field_name, SubString *format_spec,
554
int *format_spec_needs_expanding, Py_UCS4 *conversion)
555
{
556
/* Note this function works if the field name is zero length,
557
which is good. Zero length field names are handled later, in
558
field_name_split. */
559
560
Py_UCS4 c = 0;
561
562
/* initialize these, as they may be empty */
563
*conversion = '\0';
564
SubString_init(format_spec, NULL, 0, 0);
565
566
/* Search for the field name. it's terminated by the end of
567
the string, or a ':' or '!' */
568
field_name->str = str->str;
569
field_name->start = str->start;
570
while (str->start < str->end) {
571
switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
572
case '{':
573
PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name");
574
return 0;
575
case '[':
576
for (; str->start < str->end; str->start++)
577
if (PyUnicode_READ_CHAR(str->str, str->start) == ']')
578
break;
579
continue;
580
case '}':
581
case ':':
582
case '!':
583
break;
584
default:
585
continue;
586
}
587
break;
588
}
589
590
field_name->end = str->start - 1;
591
if (c == '!' || c == ':') {
592
Py_ssize_t count;
593
/* we have a format specifier and/or a conversion */
594
/* don't include the last character */
595
596
/* see if there's a conversion specifier */
597
if (c == '!') {
598
/* there must be another character present */
599
if (str->start >= str->end) {
600
PyErr_SetString(PyExc_ValueError,
601
"end of string while looking for conversion "
602
"specifier");
603
return 0;
604
}
605
*conversion = PyUnicode_READ_CHAR(str->str, str->start++);
606
607
if (str->start < str->end) {
608
c = PyUnicode_READ_CHAR(str->str, str->start++);
609
if (c == '}')
610
return 1;
611
if (c != ':') {
612
PyErr_SetString(PyExc_ValueError,
613
"expected ':' after conversion specifier");
614
return 0;
615
}
616
}
617
}
618
format_spec->str = str->str;
619
format_spec->start = str->start;
620
count = 1;
621
while (str->start < str->end) {
622
switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
623
case '{':
624
*format_spec_needs_expanding = 1;
625
count++;
626
break;
627
case '}':
628
count--;
629
if (count == 0) {
630
format_spec->end = str->start - 1;
631
return 1;
632
}
633
break;
634
default:
635
break;
636
}
637
}
638
639
PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec");
640
return 0;
641
}
642
else if (c != '}') {
643
PyErr_SetString(PyExc_ValueError, "expected '}' before end of string");
644
return 0;
645
}
646
647
return 1;
648
}
649
650
/************************************************************************/
651
/******* Output string allocation and escape-to-markup processing ******/
652
/************************************************************************/
653
654
/* MarkupIterator breaks the string into pieces of either literal
655
text, or things inside {} that need to be marked up. it is
656
designed to make it easy to wrap a Python iterator around it, for
657
use with the Formatter class */
658
659
typedef struct {
660
SubString str;
661
} MarkupIterator;
662
663
static int
664
MarkupIterator_init(MarkupIterator *self, PyObject *str,
665
Py_ssize_t start, Py_ssize_t end)
666
{
667
SubString_init(&self->str, str, start, end);
668
return 1;
669
}
670
671
/* returns 0 on error, 1 on non-error termination, and 2 if it got a
672
string (or something to be expanded) */
673
static int
674
MarkupIterator_next(MarkupIterator *self, SubString *literal,
675
int *field_present, SubString *field_name,
676
SubString *format_spec, Py_UCS4 *conversion,
677
int *format_spec_needs_expanding)
678
{
679
int at_end;
680
Py_UCS4 c = 0;
681
Py_ssize_t start;
682
Py_ssize_t len;
683
int markup_follows = 0;
684
685
/* initialize all of the output variables */
686
SubString_init(literal, NULL, 0, 0);
687
SubString_init(field_name, NULL, 0, 0);
688
SubString_init(format_spec, NULL, 0, 0);
689
*conversion = '\0';
690
*format_spec_needs_expanding = 0;
691
*field_present = 0;
692
693
/* No more input, end of iterator. This is the normal exit
694
path. */
695
if (self->str.start >= self->str.end)
696
return 1;
697
698
start = self->str.start;
699
700
/* First read any literal text. Read until the end of string, an
701
escaped '{' or '}', or an unescaped '{'. In order to never
702
allocate memory and so I can just pass pointers around, if
703
there's an escaped '{' or '}' then we'll return the literal
704
including the brace, but no format object. The next time
705
through, we'll return the rest of the literal, skipping past
706
the second consecutive brace. */
707
while (self->str.start < self->str.end) {
708
switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
709
case '{':
710
case '}':
711
markup_follows = 1;
712
break;
713
default:
714
continue;
715
}
716
break;
717
}
718
719
at_end = self->str.start >= self->str.end;
720
len = self->str.start - start;
721
722
if ((c == '}') && (at_end ||
723
(c != PyUnicode_READ_CHAR(self->str.str,
724
self->str.start)))) {
725
PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
726
"in format string");
727
return 0;
728
}
729
if (at_end && c == '{') {
730
PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
731
"in format string");
732
return 0;
733
}
734
if (!at_end) {
735
if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) {
736
/* escaped } or {, skip it in the input. there is no
737
markup object following us, just this literal text */
738
self->str.start++;
739
markup_follows = 0;
740
}
741
else
742
len--;
743
}
744
745
/* record the literal text */
746
literal->str = self->str.str;
747
literal->start = start;
748
literal->end = start + len;
749
750
if (!markup_follows)
751
return 2;
752
753
/* this is markup; parse the field */
754
*field_present = 1;
755
if (!parse_field(&self->str, field_name, format_spec,
756
format_spec_needs_expanding, conversion))
757
return 0;
758
return 2;
759
}
760
761
762
/* do the !r or !s conversion on obj */
763
static PyObject *
764
do_conversion(PyObject *obj, Py_UCS4 conversion)
765
{
766
/* XXX in pre-3.0, do we need to convert this to unicode, since it
767
might have returned a string? */
768
switch (conversion) {
769
case 'r':
770
return PyObject_Repr(obj);
771
case 's':
772
return PyObject_Str(obj);
773
case 'a':
774
return PyObject_ASCII(obj);
775
default:
776
if (conversion > 32 && conversion < 127) {
777
/* It's the ASCII subrange; casting to char is safe
778
(assuming the execution character set is an ASCII
779
superset). */
780
PyErr_Format(PyExc_ValueError,
781
"Unknown conversion specifier %c",
782
(char)conversion);
783
} else
784
PyErr_Format(PyExc_ValueError,
785
"Unknown conversion specifier \\x%x",
786
(unsigned int)conversion);
787
return NULL;
788
}
789
}
790
791
/* given:
792
793
{field_name!conversion:format_spec}
794
795
compute the result and write it to output.
796
format_spec_needs_expanding is an optimization. if it's false,
797
just output the string directly, otherwise recursively expand the
798
format_spec string.
799
800
field_name is allowed to be zero length, in which case we
801
are doing auto field numbering.
802
*/
803
804
static int
805
output_markup(SubString *field_name, SubString *format_spec,
806
int format_spec_needs_expanding, Py_UCS4 conversion,
807
_PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs,
808
int recursion_depth, AutoNumber *auto_number)
809
{
810
PyObject *tmp = NULL;
811
PyObject *fieldobj = NULL;
812
SubString expanded_format_spec;
813
SubString *actual_format_spec;
814
int result = 0;
815
816
/* convert field_name to an object */
817
fieldobj = get_field_object(field_name, args, kwargs, auto_number);
818
if (fieldobj == NULL)
819
goto done;
820
821
if (conversion != '\0') {
822
tmp = do_conversion(fieldobj, conversion);
823
if (tmp == NULL)
824
goto done;
825
826
/* do the assignment, transferring ownership: fieldobj = tmp */
827
Py_SETREF(fieldobj, tmp);
828
tmp = NULL;
829
}
830
831
/* if needed, recursively compute the format_spec */
832
if (format_spec_needs_expanding) {
833
tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
834
auto_number);
835
if (tmp == NULL)
836
goto done;
837
838
/* note that in the case we're expanding the format string,
839
tmp must be kept around until after the call to
840
render_field. */
841
SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp));
842
actual_format_spec = &expanded_format_spec;
843
}
844
else
845
actual_format_spec = format_spec;
846
847
if (render_field(fieldobj, actual_format_spec, writer) == 0)
848
goto done;
849
850
result = 1;
851
852
done:
853
Py_XDECREF(fieldobj);
854
Py_XDECREF(tmp);
855
856
return result;
857
}
858
859
/*
860
do_markup is the top-level loop for the format() method. It
861
searches through the format string for escapes to markup codes, and
862
calls other functions to move non-markup text to the output,
863
and to perform the markup to the output.
864
*/
865
static int
866
do_markup(SubString *input, PyObject *args, PyObject *kwargs,
867
_PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number)
868
{
869
MarkupIterator iter;
870
int format_spec_needs_expanding;
871
int result;
872
int field_present;
873
SubString literal;
874
SubString field_name;
875
SubString format_spec;
876
Py_UCS4 conversion;
877
878
MarkupIterator_init(&iter, input->str, input->start, input->end);
879
while ((result = MarkupIterator_next(&iter, &literal, &field_present,
880
&field_name, &format_spec,
881
&conversion,
882
&format_spec_needs_expanding)) == 2) {
883
if (literal.end != literal.start) {
884
if (!field_present && iter.str.start == iter.str.end)
885
writer->overallocate = 0;
886
if (_PyUnicodeWriter_WriteSubstring(writer, literal.str,
887
literal.start, literal.end) < 0)
888
return 0;
889
}
890
891
if (field_present) {
892
if (iter.str.start == iter.str.end)
893
writer->overallocate = 0;
894
if (!output_markup(&field_name, &format_spec,
895
format_spec_needs_expanding, conversion, writer,
896
args, kwargs, recursion_depth, auto_number))
897
return 0;
898
}
899
}
900
return result;
901
}
902
903
904
/*
905
build_string allocates the output string and then
906
calls do_markup to do the heavy lifting.
907
*/
908
static PyObject *
909
build_string(SubString *input, PyObject *args, PyObject *kwargs,
910
int recursion_depth, AutoNumber *auto_number)
911
{
912
_PyUnicodeWriter writer;
913
914
/* check the recursion level */
915
if (recursion_depth <= 0) {
916
PyErr_SetString(PyExc_ValueError,
917
"Max string recursion exceeded");
918
return NULL;
919
}
920
921
_PyUnicodeWriter_Init(&writer);
922
writer.overallocate = 1;
923
writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100;
924
925
if (!do_markup(input, args, kwargs, &writer, recursion_depth,
926
auto_number)) {
927
_PyUnicodeWriter_Dealloc(&writer);
928
return NULL;
929
}
930
931
return _PyUnicodeWriter_Finish(&writer);
932
}
933
934
/************************************************************************/
935
/*********** main routine ***********************************************/
936
/************************************************************************/
937
938
/* this is the main entry point */
939
static PyObject *
940
do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
941
{
942
SubString input;
943
944
/* PEP 3101 says only 2 levels, so that
945
"{0:{1}}".format('abc', 's') # works
946
"{0:{1:{2}}}".format('abc', 's', '') # fails
947
*/
948
int recursion_depth = 2;
949
950
AutoNumber auto_number;
951
AutoNumber_Init(&auto_number);
952
SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self));
953
return build_string(&input, args, kwargs, recursion_depth, &auto_number);
954
}
955
956
static PyObject *
957
do_string_format_map(PyObject *self, PyObject *obj)
958
{
959
return do_string_format(self, NULL, obj);
960
}
961
962
963
/************************************************************************/
964
/*********** formatteriterator ******************************************/
965
/************************************************************************/
966
967
/* This is used to implement string.Formatter.vparse(). It exists so
968
Formatter can share code with the built in unicode.format() method.
969
It's really just a wrapper around MarkupIterator that is callable
970
from Python. */
971
972
typedef struct {
973
PyObject_HEAD
974
PyObject *str;
975
MarkupIterator it_markup;
976
} formatteriterobject;
977
978
static void
979
formatteriter_dealloc(formatteriterobject *it)
980
{
981
Py_XDECREF(it->str);
982
PyObject_Free(it);
983
}
984
985
/* returns a tuple:
986
(literal, field_name, format_spec, conversion)
987
988
literal is any literal text to output. might be zero length
989
field_name is the string before the ':'. might be None
990
format_spec is the string after the ':'. mibht be None
991
conversion is either None, or the string after the '!'
992
*/
993
static PyObject *
994
formatteriter_next(formatteriterobject *it)
995
{
996
SubString literal;
997
SubString field_name;
998
SubString format_spec;
999
Py_UCS4 conversion;
1000
int format_spec_needs_expanding;
1001
int field_present;
1002
int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
1003
&field_name, &format_spec, &conversion,
1004
&format_spec_needs_expanding);
1005
1006
/* all of the SubString objects point into it->str, so no
1007
memory management needs to be done on them */
1008
assert(0 <= result && result <= 2);
1009
if (result == 0 || result == 1)
1010
/* if 0, error has already been set, if 1, iterator is empty */
1011
return NULL;
1012
else {
1013
PyObject *literal_str = NULL;
1014
PyObject *field_name_str = NULL;
1015
PyObject *format_spec_str = NULL;
1016
PyObject *conversion_str = NULL;
1017
PyObject *tuple = NULL;
1018
1019
literal_str = SubString_new_object(&literal);
1020
if (literal_str == NULL)
1021
goto done;
1022
1023
field_name_str = SubString_new_object(&field_name);
1024
if (field_name_str == NULL)
1025
goto done;
1026
1027
/* if field_name is non-zero length, return a string for
1028
format_spec (even if zero length), else return None */
1029
format_spec_str = (field_present ?
1030
SubString_new_object_or_empty :
1031
SubString_new_object)(&format_spec);
1032
if (format_spec_str == NULL)
1033
goto done;
1034
1035
/* if the conversion is not specified, return a None,
1036
otherwise create a one length string with the conversion
1037
character */
1038
if (conversion == '\0') {
1039
conversion_str = Py_NewRef(Py_None);
1040
}
1041
else
1042
conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1043
&conversion, 1);
1044
if (conversion_str == NULL)
1045
goto done;
1046
1047
tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
1048
conversion_str);
1049
done:
1050
Py_XDECREF(literal_str);
1051
Py_XDECREF(field_name_str);
1052
Py_XDECREF(format_spec_str);
1053
Py_XDECREF(conversion_str);
1054
return tuple;
1055
}
1056
}
1057
1058
static PyMethodDef formatteriter_methods[] = {
1059
{NULL, NULL} /* sentinel */
1060
};
1061
1062
static PyTypeObject PyFormatterIter_Type = {
1063
PyVarObject_HEAD_INIT(&PyType_Type, 0)
1064
"formatteriterator", /* tp_name */
1065
sizeof(formatteriterobject), /* tp_basicsize */
1066
0, /* tp_itemsize */
1067
/* methods */
1068
(destructor)formatteriter_dealloc, /* tp_dealloc */
1069
0, /* tp_vectorcall_offset */
1070
0, /* tp_getattr */
1071
0, /* tp_setattr */
1072
0, /* tp_as_async */
1073
0, /* tp_repr */
1074
0, /* tp_as_number */
1075
0, /* tp_as_sequence */
1076
0, /* tp_as_mapping */
1077
0, /* tp_hash */
1078
0, /* tp_call */
1079
0, /* tp_str */
1080
PyObject_GenericGetAttr, /* tp_getattro */
1081
0, /* tp_setattro */
1082
0, /* tp_as_buffer */
1083
Py_TPFLAGS_DEFAULT, /* tp_flags */
1084
0, /* tp_doc */
1085
0, /* tp_traverse */
1086
0, /* tp_clear */
1087
0, /* tp_richcompare */
1088
0, /* tp_weaklistoffset */
1089
PyObject_SelfIter, /* tp_iter */
1090
(iternextfunc)formatteriter_next, /* tp_iternext */
1091
formatteriter_methods, /* tp_methods */
1092
0,
1093
};
1094
1095
/* unicode_formatter_parser is used to implement
1096
string.Formatter.vformat. it parses a string and returns tuples
1097
describing the parsed elements. It's a wrapper around
1098
stringlib/string_format.h's MarkupIterator */
1099
static PyObject *
1100
formatter_parser(PyObject *ignored, PyObject *self)
1101
{
1102
formatteriterobject *it;
1103
1104
if (!PyUnicode_Check(self)) {
1105
PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1106
return NULL;
1107
}
1108
1109
it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1110
if (it == NULL)
1111
return NULL;
1112
1113
/* take ownership, give the object to the iterator */
1114
it->str = Py_NewRef(self);
1115
1116
/* initialize the contained MarkupIterator */
1117
MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self));
1118
return (PyObject *)it;
1119
}
1120
1121
1122
/************************************************************************/
1123
/*********** fieldnameiterator ******************************************/
1124
/************************************************************************/
1125
1126
1127
/* This is used to implement string.Formatter.vparse(). It parses the
1128
field name into attribute and item values. It's a Python-callable
1129
wrapper around FieldNameIterator */
1130
1131
typedef struct {
1132
PyObject_HEAD
1133
PyObject *str;
1134
FieldNameIterator it_field;
1135
} fieldnameiterobject;
1136
1137
static void
1138
fieldnameiter_dealloc(fieldnameiterobject *it)
1139
{
1140
Py_XDECREF(it->str);
1141
PyObject_Free(it);
1142
}
1143
1144
/* returns a tuple:
1145
(is_attr, value)
1146
is_attr is true if we used attribute syntax (e.g., '.foo')
1147
false if we used index syntax (e.g., '[foo]')
1148
value is an integer or string
1149
*/
1150
static PyObject *
1151
fieldnameiter_next(fieldnameiterobject *it)
1152
{
1153
int result;
1154
int is_attr;
1155
Py_ssize_t idx;
1156
SubString name;
1157
1158
result = FieldNameIterator_next(&it->it_field, &is_attr,
1159
&idx, &name);
1160
if (result == 0 || result == 1)
1161
/* if 0, error has already been set, if 1, iterator is empty */
1162
return NULL;
1163
else {
1164
PyObject* result = NULL;
1165
PyObject* is_attr_obj = NULL;
1166
PyObject* obj = NULL;
1167
1168
is_attr_obj = PyBool_FromLong(is_attr);
1169
if (is_attr_obj == NULL)
1170
goto done;
1171
1172
/* either an integer or a string */
1173
if (idx != -1)
1174
obj = PyLong_FromSsize_t(idx);
1175
else
1176
obj = SubString_new_object(&name);
1177
if (obj == NULL)
1178
goto done;
1179
1180
/* return a tuple of values */
1181
result = PyTuple_Pack(2, is_attr_obj, obj);
1182
1183
done:
1184
Py_XDECREF(is_attr_obj);
1185
Py_XDECREF(obj);
1186
return result;
1187
}
1188
}
1189
1190
static PyMethodDef fieldnameiter_methods[] = {
1191
{NULL, NULL} /* sentinel */
1192
};
1193
1194
static PyTypeObject PyFieldNameIter_Type = {
1195
PyVarObject_HEAD_INIT(&PyType_Type, 0)
1196
"fieldnameiterator", /* tp_name */
1197
sizeof(fieldnameiterobject), /* tp_basicsize */
1198
0, /* tp_itemsize */
1199
/* methods */
1200
(destructor)fieldnameiter_dealloc, /* tp_dealloc */
1201
0, /* tp_vectorcall_offset */
1202
0, /* tp_getattr */
1203
0, /* tp_setattr */
1204
0, /* tp_as_async */
1205
0, /* tp_repr */
1206
0, /* tp_as_number */
1207
0, /* tp_as_sequence */
1208
0, /* tp_as_mapping */
1209
0, /* tp_hash */
1210
0, /* tp_call */
1211
0, /* tp_str */
1212
PyObject_GenericGetAttr, /* tp_getattro */
1213
0, /* tp_setattro */
1214
0, /* tp_as_buffer */
1215
Py_TPFLAGS_DEFAULT, /* tp_flags */
1216
0, /* tp_doc */
1217
0, /* tp_traverse */
1218
0, /* tp_clear */
1219
0, /* tp_richcompare */
1220
0, /* tp_weaklistoffset */
1221
PyObject_SelfIter, /* tp_iter */
1222
(iternextfunc)fieldnameiter_next, /* tp_iternext */
1223
fieldnameiter_methods, /* tp_methods */
1224
0};
1225
1226
/* unicode_formatter_field_name_split is used to implement
1227
string.Formatter.vformat. it takes a PEP 3101 "field name", and
1228
returns a tuple of (first, rest): "first", the part before the
1229
first '.' or '['; and "rest", an iterator for the rest of the field
1230
name. it's a wrapper around stringlib/string_format.h's
1231
field_name_split. The iterator it returns is a
1232
FieldNameIterator */
1233
static PyObject *
1234
formatter_field_name_split(PyObject *ignored, PyObject *self)
1235
{
1236
SubString first;
1237
Py_ssize_t first_idx;
1238
fieldnameiterobject *it;
1239
1240
PyObject *first_obj = NULL;
1241
PyObject *result = NULL;
1242
1243
if (!PyUnicode_Check(self)) {
1244
PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1245
return NULL;
1246
}
1247
1248
it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1249
if (it == NULL)
1250
return NULL;
1251
1252
/* take ownership, give the object to the iterator. this is
1253
just to keep the field_name alive */
1254
it->str = Py_NewRef(self);
1255
1256
/* Pass in auto_number = NULL. We'll return an empty string for
1257
first_obj in that case. */
1258
if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self),
1259
&first, &first_idx, &it->it_field, NULL))
1260
goto done;
1261
1262
/* first becomes an integer, if possible; else a string */
1263
if (first_idx != -1)
1264
first_obj = PyLong_FromSsize_t(first_idx);
1265
else
1266
/* convert "first" into a string object */
1267
first_obj = SubString_new_object(&first);
1268
if (first_obj == NULL)
1269
goto done;
1270
1271
/* return a tuple of values */
1272
result = PyTuple_Pack(2, first_obj, it);
1273
1274
done:
1275
Py_XDECREF(it);
1276
Py_XDECREF(first_obj);
1277
return result;
1278
}
1279
1280