CoCalc -- unicode

GitHub Repository: allendowney/cpython
Path: blob/main/Objects/stringlib/unicode_format.h
¹² views
1
/*
2
    unicode_format.h -- implementation of str.format().
3
*/
4

5
#include "pycore_floatobject.h"   // _PyFloat_FormatAdvancedWriter()
6

7
/************************************************************************/
8
/***********   Global data structures and forward declarations  *********/
9
/************************************************************************/
10

11
/*
12
   A SubString consists of the characters between two string or
13
   unicode pointers.
14
*/
15
typedef struct {
16
    PyObject *str; /* borrowed reference */
17
    Py_ssize_t start, end;
18
} SubString;
19

20

21
typedef enum {
22
    ANS_INIT,
23
    ANS_AUTO,
24
    ANS_MANUAL
25
} AutoNumberState;   /* Keep track if we're auto-numbering fields */
26

27
/* Keeps track of our auto-numbering state, and which number field we're on */
28
typedef struct {
29
    AutoNumberState an_state;
30
    int an_field_number;
31
} AutoNumber;
32

33

34
/* forward declaration for recursion */
35
static PyObject *
36
build_string(SubString *input, PyObject *args, PyObject *kwargs,
37
             int recursion_depth, AutoNumber *auto_number);
38

39

40

41
/************************************************************************/
42
/**************************  Utility  functions  ************************/
43
/************************************************************************/
44

45
static void
46
AutoNumber_Init(AutoNumber *auto_number)
47
{
48
    auto_number->an_state = ANS_INIT;
49
    auto_number->an_field_number = 0;
50
}
51

52
/* fill in a SubString from a pointer and length */
53
Py_LOCAL_INLINE(void)
54
SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end)
55
{
56
    str->str = s;
57
    str->start = start;
58
    str->end = end;
59
}
60

61
/* return a new string.  if str->str is NULL, return None */
62
Py_LOCAL_INLINE(PyObject *)
63
SubString_new_object(SubString *str)
64
{
65
    if (str->str == NULL)
66
        Py_RETURN_NONE;
67
    return PyUnicode_Substring(str->str, str->start, str->end);
68
}
69

70
/* return a new string.  if str->str is NULL, return a new empty string */
71
Py_LOCAL_INLINE(PyObject *)
72
SubString_new_object_or_empty(SubString *str)
73
{
74
    if (str->str == NULL) {
75
        return PyUnicode_New(0, 0);
76
    }
77
    return SubString_new_object(str);
78
}
79

80
/* Return 1 if an error has been detected switching between automatic
81
   field numbering and manual field specification, else return 0. Set
82
   ValueError on error. */
83
static int
84
autonumber_state_error(AutoNumberState state, int field_name_is_empty)
85
{
86
    if (state == ANS_MANUAL) {
87
        if (field_name_is_empty) {
88
            PyErr_SetString(PyExc_ValueError, "cannot switch from "
89
                            "manual field specification to "
90
                            "automatic field numbering");
91
            return 1;
92
        }
93
    }
94
    else {
95
        if (!field_name_is_empty) {
96
            PyErr_SetString(PyExc_ValueError, "cannot switch from "
97
                            "automatic field numbering to "
98
                            "manual field specification");
99
            return 1;
100
        }
101
    }
102
    return 0;
103
}
104

105

106
/************************************************************************/
107
/***********  Format string parsing -- integers and identifiers *********/
108
/************************************************************************/
109

110
static Py_ssize_t
111
get_integer(const SubString *str)
112
{
113
    Py_ssize_t accumulator = 0;
114
    Py_ssize_t digitval;
115
    Py_ssize_t i;
116

117
    /* empty string is an error */
118
    if (str->start >= str->end)
119
        return -1;
120

121
    for (i = str->start; i < str->end; i++) {
122
        digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i));
123
        if (digitval < 0)
124
            return -1;
125
        /*
126
           Detect possible overflow before it happens:
127

128
              accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
129
              accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
130
        */
131
        if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
132
            PyErr_Format(PyExc_ValueError,
133
                         "Too many decimal digits in format string");
134
            return -1;
135
        }
136
        accumulator = accumulator * 10 + digitval;
137
    }
138
    return accumulator;
139
}
140

141
/************************************************************************/
142
/******** Functions to get field objects and specification strings ******/
143
/************************************************************************/
144

145
/* do the equivalent of obj.name */
146
static PyObject *
147
getattr(PyObject *obj, SubString *name)
148
{
149
    PyObject *newobj;
150
    PyObject *str = SubString_new_object(name);
151
    if (str == NULL)
152
        return NULL;
153
    newobj = PyObject_GetAttr(obj, str);
154
    Py_DECREF(str);
155
    return newobj;
156
}
157

158
/* do the equivalent of obj[idx], where obj is a sequence */
159
static PyObject *
160
getitem_sequence(PyObject *obj, Py_ssize_t idx)
161
{
162
    return PySequence_GetItem(obj, idx);
163
}
164

165
/* do the equivalent of obj[idx], where obj is not a sequence */
166
static PyObject *
167
getitem_idx(PyObject *obj, Py_ssize_t idx)
168
{
169
    PyObject *newobj;
170
    PyObject *idx_obj = PyLong_FromSsize_t(idx);
171
    if (idx_obj == NULL)
172
        return NULL;
173
    newobj = PyObject_GetItem(obj, idx_obj);
174
    Py_DECREF(idx_obj);
175
    return newobj;
176
}
177

178
/* do the equivalent of obj[name] */
179
static PyObject *
180
getitem_str(PyObject *obj, SubString *name)
181
{
182
    PyObject *newobj;
183
    PyObject *str = SubString_new_object(name);
184
    if (str == NULL)
185
        return NULL;
186
    newobj = PyObject_GetItem(obj, str);
187
    Py_DECREF(str);
188
    return newobj;
189
}
190

191
typedef struct {
192
    /* the entire string we're parsing.  we assume that someone else
193
       is managing its lifetime, and that it will exist for the
194
       lifetime of the iterator.  can be empty */
195
    SubString str;
196

197
    /* index to where we are inside field_name */
198
    Py_ssize_t index;
199
} FieldNameIterator;
200

201

202
static int
203
FieldNameIterator_init(FieldNameIterator *self, PyObject *s,
204
                       Py_ssize_t start, Py_ssize_t end)
205
{
206
    SubString_init(&self->str, s, start, end);
207
    self->index = start;
208
    return 1;
209
}
210

211
static int
212
_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
213
{
214
    Py_UCS4 c;
215

216
    name->str = self->str.str;
217
    name->start = self->index;
218

219
    /* return everything until '.' or '[' */
220
    while (self->index < self->str.end) {
221
        c = PyUnicode_READ_CHAR(self->str.str, self->index++);
222
        switch (c) {
223
        case '[':
224
        case '.':
225
            /* backup so that we this character will be seen next time */
226
            self->index--;
227
            break;
228
        default:
229
            continue;
230
        }
231
        break;
232
    }
233
    /* end of string is okay */
234
    name->end = self->index;
235
    return 1;
236
}
237

238
static int
239
_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
240
{
241
    int bracket_seen = 0;
242
    Py_UCS4 c;
243

244
    name->str = self->str.str;
245
    name->start = self->index;
246

247
    /* return everything until ']' */
248
    while (self->index < self->str.end) {
249
        c = PyUnicode_READ_CHAR(self->str.str, self->index++);
250
        switch (c) {
251
        case ']':
252
            bracket_seen = 1;
253
            break;
254
        default:
255
            continue;
256
        }
257
        break;
258
    }
259
    /* make sure we ended with a ']' */
260
    if (!bracket_seen) {
261
        PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
262
        return 0;
263
    }
264

265
    /* end of string is okay */
266
    /* don't include the ']' */
267
    name->end = self->index-1;
268
    return 1;
269
}
270

271
/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
272
static int
273
FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
274
                       Py_ssize_t *name_idx, SubString *name)
275
{
276
    /* check at end of input */
277
    if (self->index >= self->str.end)
278
        return 1;
279

280
    switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) {
281
    case '.':
282
        *is_attribute = 1;
283
        if (_FieldNameIterator_attr(self, name) == 0)
284
            return 0;
285
        *name_idx = -1;
286
        break;
287
    case '[':
288
        *is_attribute = 0;
289
        if (_FieldNameIterator_item(self, name) == 0)
290
            return 0;
291
        *name_idx = get_integer(name);
292
        if (*name_idx == -1 && PyErr_Occurred())
293
            return 0;
294
        break;
295
    default:
296
        /* Invalid character follows ']' */
297
        PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
298
                        "follow ']' in format field specifier");
299
        return 0;
300
    }
301

302
    /* empty string is an error */
303
    if (name->start == name->end) {
304
        PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
305
        return 0;
306
    }
307

308
    return 2;
309
}
310

311

312
/* input: field_name
313
   output: 'first' points to the part before the first '[' or '.'
314
           'first_idx' is -1 if 'first' is not an integer, otherwise
315
                       it's the value of first converted to an integer
316
           'rest' is an iterator to return the rest
317
*/
318
static int
319
field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first,
320
                 Py_ssize_t *first_idx, FieldNameIterator *rest,
321
                 AutoNumber *auto_number)
322
{
323
    Py_UCS4 c;
324
    Py_ssize_t i = start;
325
    int field_name_is_empty;
326
    int using_numeric_index;
327

328
    /* find the part up until the first '.' or '[' */
329
    while (i < end) {
330
        switch (c = PyUnicode_READ_CHAR(str, i++)) {
331
        case '[':
332
        case '.':
333
            /* backup so that we this character is available to the
334
               "rest" iterator */
335
            i--;
336
            break;
337
        default:
338
            continue;
339
        }
340
        break;
341
    }
342

343
    /* set up the return values */
344
    SubString_init(first, str, start, i);
345
    FieldNameIterator_init(rest, str, i, end);
346

347
    /* see if "first" is an integer, in which case it's used as an index */
348
    *first_idx = get_integer(first);
349
    if (*first_idx == -1 && PyErr_Occurred())
350
        return 0;
351

352
    field_name_is_empty = first->start >= first->end;
353

354
    /* If the field name is omitted or if we have a numeric index
355
       specified, then we're doing numeric indexing into args. */
356
    using_numeric_index = field_name_is_empty || *first_idx != -1;
357

358
    /* We always get here exactly one time for each field we're
359
       processing. And we get here in field order (counting by left
360
       braces). So this is the perfect place to handle automatic field
361
       numbering if the field name is omitted. */
362

363
    /* Check if we need to do the auto-numbering. It's not needed if
364
       we're called from string.Format routines, because it's handled
365
       in that class by itself. */
366
    if (auto_number) {
367
        /* Initialize our auto numbering state if this is the first
368
           time we're either auto-numbering or manually numbering. */
369
        if (auto_number->an_state == ANS_INIT && using_numeric_index)
370
            auto_number->an_state = field_name_is_empty ?
371
                ANS_AUTO : ANS_MANUAL;
372

373
        /* Make sure our state is consistent with what we're doing
374
           this time through. Only check if we're using a numeric
375
           index. */
376
        if (using_numeric_index)
377
            if (autonumber_state_error(auto_number->an_state,
378
                                       field_name_is_empty))
379
                return 0;
380
        /* Zero length field means we want to do auto-numbering of the
381
           fields. */
382
        if (field_name_is_empty)
383
            *first_idx = (auto_number->an_field_number)++;
384
    }
385

386
    return 1;
387
}
388

389

390
/*
391
    get_field_object returns the object inside {}, before the
392
    format_spec.  It handles getindex and getattr lookups and consumes
393
    the entire input string.
394
*/
395
static PyObject *
396
get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
397
                 AutoNumber *auto_number)
398
{
399
    PyObject *obj = NULL;
400
    int ok;
401
    int is_attribute;
402
    SubString name;
403
    SubString first;
404
    Py_ssize_t index;
405
    FieldNameIterator rest;
406

407
    if (!field_name_split(input->str, input->start, input->end, &first,
408
                          &index, &rest, auto_number)) {
409
        goto error;
410
    }
411

412
    if (index == -1) {
413
        /* look up in kwargs */
414
        PyObject *key = SubString_new_object(&first);
415
        if (key == NULL) {
416
            goto error;
417
        }
418
        if (kwargs == NULL) {
419
            PyErr_SetObject(PyExc_KeyError, key);
420
            Py_DECREF(key);
421
            goto error;
422
        }
423
        /* Use PyObject_GetItem instead of PyDict_GetItem because this
424
           code is no longer just used with kwargs. It might be passed
425
           a non-dict when called through format_map. */
426
        obj = PyObject_GetItem(kwargs, key);
427
        Py_DECREF(key);
428
        if (obj == NULL) {
429
            goto error;
430
        }
431
    }
432
    else {
433
        /* If args is NULL, we have a format string with a positional field
434
           with only kwargs to retrieve it from. This can only happen when
435
           used with format_map(), where positional arguments are not
436
           allowed. */
437
        if (args == NULL) {
438
            PyErr_SetString(PyExc_ValueError, "Format string contains "
439
                            "positional fields");
440
            goto error;
441
        }
442

443
        /* look up in args */
444
        obj = PySequence_GetItem(args, index);
445
        if (obj == NULL) {
446
            PyErr_Format(PyExc_IndexError,
447
                         "Replacement index %zd out of range for positional "
448
                         "args tuple",
449
                         index);
450
             goto error;
451
        }
452
    }
453

454
    /* iterate over the rest of the field_name */
455
    while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
456
                                        &name)) == 2) {
457
        PyObject *tmp;
458

459
        if (is_attribute)
460
            /* getattr lookup "." */
461
            tmp = getattr(obj, &name);
462
        else
463
            /* getitem lookup "[]" */
464
            if (index == -1)
465
                tmp = getitem_str(obj, &name);
466
            else
467
                if (PySequence_Check(obj))
468
                    tmp = getitem_sequence(obj, index);
469
                else
470
                    /* not a sequence */
471
                    tmp = getitem_idx(obj, index);
472
        if (tmp == NULL)
473
            goto error;
474

475
        /* assign to obj */
476
        Py_SETREF(obj, tmp);
477
    }
478
    /* end of iterator, this is the non-error case */
479
    if (ok == 1)
480
        return obj;
481
error:
482
    Py_XDECREF(obj);
483
    return NULL;
484
}
485

486
/************************************************************************/
487
/*****************  Field rendering functions  **************************/
488
/************************************************************************/
489

490
/*
491
    render_field() is the main function in this section.  It takes the
492
    field object and field specification string generated by
493
    get_field_and_spec, and renders the field into the output string.
494

495
    render_field calls fieldobj.__format__(format_spec) method, and
496
    appends to the output.
497
*/
498
static int
499
render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer)
500
{
501
    int ok = 0;
502
    PyObject *result = NULL;
503
    PyObject *format_spec_object = NULL;
504
    int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
505
    int err;
506

507
    /* If we know the type exactly, skip the lookup of __format__ and just
508
       call the formatter directly. */
509
    if (PyUnicode_CheckExact(fieldobj))
510
        formatter = _PyUnicode_FormatAdvancedWriter;
511
    else if (PyLong_CheckExact(fieldobj))
512
        formatter = _PyLong_FormatAdvancedWriter;
513
    else if (PyFloat_CheckExact(fieldobj))
514
        formatter = _PyFloat_FormatAdvancedWriter;
515
    else if (PyComplex_CheckExact(fieldobj))
516
        formatter = _PyComplex_FormatAdvancedWriter;
517

518
    if (formatter) {
519
        /* we know exactly which formatter will be called when __format__ is
520
           looked up, so call it directly, instead. */
521
        err = formatter(writer, fieldobj, format_spec->str,
522
                        format_spec->start, format_spec->end);
523
        return (err == 0);
524
    }
525
    else {
526
        /* We need to create an object out of the pointers we have, because
527
           __format__ takes a string/unicode object for format_spec. */
528
        if (format_spec->str)
529
            format_spec_object = PyUnicode_Substring(format_spec->str,
530
                                                     format_spec->start,
531
                                                     format_spec->end);
532
        else
533
            format_spec_object = PyUnicode_New(0, 0);
534
        if (format_spec_object == NULL)
535
            goto done;
536

537
        result = PyObject_Format(fieldobj, format_spec_object);
538
    }
539
    if (result == NULL)
540
        goto done;
541

542
    if (_PyUnicodeWriter_WriteStr(writer, result) == -1)
543
        goto done;
544
    ok = 1;
545

546
done:
547
    Py_XDECREF(format_spec_object);
548
    Py_XDECREF(result);
549
    return ok;
550
}
551

552
static int
553
parse_field(SubString *str, SubString *field_name, SubString *format_spec,
554
            int *format_spec_needs_expanding, Py_UCS4 *conversion)
555
{
556
    /* Note this function works if the field name is zero length,
557
       which is good.  Zero length field names are handled later, in
558
       field_name_split. */
559

560
    Py_UCS4 c = 0;
561

562
    /* initialize these, as they may be empty */
563
    *conversion = '\0';
564
    SubString_init(format_spec, NULL, 0, 0);
565

566
    /* Search for the field name.  it's terminated by the end of
567
       the string, or a ':' or '!' */
568
    field_name->str = str->str;
569
    field_name->start = str->start;
570
    while (str->start < str->end) {
571
        switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
572
        case '{':
573
            PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name");
574
            return 0;
575
        case '[':
576
            for (; str->start < str->end; str->start++)
577
                if (PyUnicode_READ_CHAR(str->str, str->start) == ']')
578
                    break;
579
            continue;
580
        case '}':
581
        case ':':
582
        case '!':
583
            break;
584
        default:
585
            continue;
586
        }
587
        break;
588
    }
589

590
    field_name->end = str->start - 1;
591
    if (c == '!' || c == ':') {
592
        Py_ssize_t count;
593
        /* we have a format specifier and/or a conversion */
594
        /* don't include the last character */
595

596
        /* see if there's a conversion specifier */
597
        if (c == '!') {
598
            /* there must be another character present */
599
            if (str->start >= str->end) {
600
                PyErr_SetString(PyExc_ValueError,
601
                                "end of string while looking for conversion "
602
                                "specifier");
603
                return 0;
604
            }
605
            *conversion = PyUnicode_READ_CHAR(str->str, str->start++);
606

607
            if (str->start < str->end) {
608
                c = PyUnicode_READ_CHAR(str->str, str->start++);
609
                if (c == '}')
610
                    return 1;
611
                if (c != ':') {
612
                    PyErr_SetString(PyExc_ValueError,
613
                                    "expected ':' after conversion specifier");
614
                    return 0;
615
                }
616
            }
617
        }
618
        format_spec->str = str->str;
619
        format_spec->start = str->start;
620
        count = 1;
621
        while (str->start < str->end) {
622
            switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
623
            case '{':
624
                *format_spec_needs_expanding = 1;
625
                count++;
626
                break;
627
            case '}':
628
                count--;
629
                if (count == 0) {
630
                    format_spec->end = str->start - 1;
631
                    return 1;
632
                }
633
                break;
634
            default:
635
                break;
636
            }
637
        }
638

639
        PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec");
640
        return 0;
641
    }
642
    else if (c != '}') {
643
        PyErr_SetString(PyExc_ValueError, "expected '}' before end of string");
644
        return 0;
645
    }
646

647
    return 1;
648
}
649

650
/************************************************************************/
651
/******* Output string allocation and escape-to-markup processing  ******/
652
/************************************************************************/
653

654
/* MarkupIterator breaks the string into pieces of either literal
655
   text, or things inside {} that need to be marked up.  it is
656
   designed to make it easy to wrap a Python iterator around it, for
657
   use with the Formatter class */
658

659
typedef struct {
660
    SubString str;
661
} MarkupIterator;
662

663
static int
664
MarkupIterator_init(MarkupIterator *self, PyObject *str,
665
                    Py_ssize_t start, Py_ssize_t end)
666
{
667
    SubString_init(&self->str, str, start, end);
668
    return 1;
669
}
670

671
/* returns 0 on error, 1 on non-error termination, and 2 if it got a
672
   string (or something to be expanded) */
673
static int
674
MarkupIterator_next(MarkupIterator *self, SubString *literal,
675
                    int *field_present, SubString *field_name,
676
                    SubString *format_spec, Py_UCS4 *conversion,
677
                    int *format_spec_needs_expanding)
678
{
679
    int at_end;
680
    Py_UCS4 c = 0;
681
    Py_ssize_t start;
682
    Py_ssize_t len;
683
    int markup_follows = 0;
684

685
    /* initialize all of the output variables */
686
    SubString_init(literal, NULL, 0, 0);
687
    SubString_init(field_name, NULL, 0, 0);
688
    SubString_init(format_spec, NULL, 0, 0);
689
    *conversion = '\0';
690
    *format_spec_needs_expanding = 0;
691
    *field_present = 0;
692

693
    /* No more input, end of iterator.  This is the normal exit
694
       path. */
695
    if (self->str.start >= self->str.end)
696
        return 1;
697

698
    start = self->str.start;
699

700
    /* First read any literal text. Read until the end of string, an
701
       escaped '{' or '}', or an unescaped '{'.  In order to never
702
       allocate memory and so I can just pass pointers around, if
703
       there's an escaped '{' or '}' then we'll return the literal
704
       including the brace, but no format object.  The next time
705
       through, we'll return the rest of the literal, skipping past
706
       the second consecutive brace. */
707
    while (self->str.start < self->str.end) {
708
        switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
709
        case '{':
710
        case '}':
711
            markup_follows = 1;
712
            break;
713
        default:
714
            continue;
715
        }
716
        break;
717
    }
718

719
    at_end = self->str.start >= self->str.end;
720
    len = self->str.start - start;
721

722
    if ((c == '}') && (at_end ||
723
                       (c != PyUnicode_READ_CHAR(self->str.str,
724
                                                 self->str.start)))) {
725
        PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
726
                        "in format string");
727
        return 0;
728
    }
729
    if (at_end && c == '{') {
730
        PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
731
                        "in format string");
732
        return 0;
733
    }
734
    if (!at_end) {
735
        if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) {
736
            /* escaped } or {, skip it in the input.  there is no
737
               markup object following us, just this literal text */
738
            self->str.start++;
739
            markup_follows = 0;
740
        }
741
        else
742
            len--;
743
    }
744

745
    /* record the literal text */
746
    literal->str = self->str.str;
747
    literal->start = start;
748
    literal->end = start + len;
749

750
    if (!markup_follows)
751
        return 2;
752

753
    /* this is markup; parse the field */
754
    *field_present = 1;
755
    if (!parse_field(&self->str, field_name, format_spec,
756
                     format_spec_needs_expanding, conversion))
757
        return 0;
758
    return 2;
759
}
760

761

762
/* do the !r or !s conversion on obj */
763
static PyObject *
764
do_conversion(PyObject *obj, Py_UCS4 conversion)
765
{
766
    /* XXX in pre-3.0, do we need to convert this to unicode, since it
767
       might have returned a string? */
768
    switch (conversion) {
769
    case 'r':
770
        return PyObject_Repr(obj);
771
    case 's':
772
        return PyObject_Str(obj);
773
    case 'a':
774
        return PyObject_ASCII(obj);
775
    default:
776
        if (conversion > 32 && conversion < 127) {
777
                /* It's the ASCII subrange; casting to char is safe
778
                   (assuming the execution character set is an ASCII
779
                   superset). */
780
                PyErr_Format(PyExc_ValueError,
781
                     "Unknown conversion specifier %c",
782
                     (char)conversion);
783
        } else
784
                PyErr_Format(PyExc_ValueError,
785
                     "Unknown conversion specifier \\x%x",
786
                     (unsigned int)conversion);
787
        return NULL;
788
    }
789
}
790

791
/* given:
792

793
   {field_name!conversion:format_spec}
794

795
   compute the result and write it to output.
796
   format_spec_needs_expanding is an optimization.  if it's false,
797
   just output the string directly, otherwise recursively expand the
798
   format_spec string.
799

800
   field_name is allowed to be zero length, in which case we
801
   are doing auto field numbering.
802
*/
803

804
static int
805
output_markup(SubString *field_name, SubString *format_spec,
806
              int format_spec_needs_expanding, Py_UCS4 conversion,
807
              _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs,
808
              int recursion_depth, AutoNumber *auto_number)
809
{
810
    PyObject *tmp = NULL;
811
    PyObject *fieldobj = NULL;
812
    SubString expanded_format_spec;
813
    SubString *actual_format_spec;
814
    int result = 0;
815

816
    /* convert field_name to an object */
817
    fieldobj = get_field_object(field_name, args, kwargs, auto_number);
818
    if (fieldobj == NULL)
819
        goto done;
820

821
    if (conversion != '\0') {
822
        tmp = do_conversion(fieldobj, conversion);
823
        if (tmp == NULL)
824
            goto done;
825

826
        /* do the assignment, transferring ownership: fieldobj = tmp */
827
        Py_SETREF(fieldobj, tmp);
828
        tmp = NULL;
829
    }
830

831
    /* if needed, recursively compute the format_spec */
832
    if (format_spec_needs_expanding) {
833
        tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
834
                           auto_number);
835
        if (tmp == NULL)
836
            goto done;
837

838
        /* note that in the case we're expanding the format string,
839
           tmp must be kept around until after the call to
840
           render_field. */
841
        SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp));
842
        actual_format_spec = &expanded_format_spec;
843
    }
844
    else
845
        actual_format_spec = format_spec;
846

847
    if (render_field(fieldobj, actual_format_spec, writer) == 0)
848
        goto done;
849

850
    result = 1;
851

852
done:
853
    Py_XDECREF(fieldobj);
854
    Py_XDECREF(tmp);
855

856
    return result;
857
}
858

859
/*
860
    do_markup is the top-level loop for the format() method.  It
861
    searches through the format string for escapes to markup codes, and
862
    calls other functions to move non-markup text to the output,
863
    and to perform the markup to the output.
864
*/
865
static int
866
do_markup(SubString *input, PyObject *args, PyObject *kwargs,
867
          _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number)
868
{
869
    MarkupIterator iter;
870
    int format_spec_needs_expanding;
871
    int result;
872
    int field_present;
873
    SubString literal;
874
    SubString field_name;
875
    SubString format_spec;
876
    Py_UCS4 conversion;
877

878
    MarkupIterator_init(&iter, input->str, input->start, input->end);
879
    while ((result = MarkupIterator_next(&iter, &literal, &field_present,
880
                                         &field_name, &format_spec,
881
                                         &conversion,
882
                                         &format_spec_needs_expanding)) == 2) {
883
        if (literal.end != literal.start) {
884
            if (!field_present && iter.str.start == iter.str.end)
885
                writer->overallocate = 0;
886
            if (_PyUnicodeWriter_WriteSubstring(writer, literal.str,
887
                                                literal.start, literal.end) < 0)
888
                return 0;
889
        }
890

891
        if (field_present) {
892
            if (iter.str.start == iter.str.end)
893
                writer->overallocate = 0;
894
            if (!output_markup(&field_name, &format_spec,
895
                               format_spec_needs_expanding, conversion, writer,
896
                               args, kwargs, recursion_depth, auto_number))
897
                return 0;
898
        }
899
    }
900
    return result;
901
}
902

903

904
/*
905
    build_string allocates the output string and then
906
    calls do_markup to do the heavy lifting.
907
*/
908
static PyObject *
909
build_string(SubString *input, PyObject *args, PyObject *kwargs,
910
             int recursion_depth, AutoNumber *auto_number)
911
{
912
    _PyUnicodeWriter writer;
913

914
    /* check the recursion level */
915
    if (recursion_depth <= 0) {
916
        PyErr_SetString(PyExc_ValueError,
917
                        "Max string recursion exceeded");
918
        return NULL;
919
    }
920

921
    _PyUnicodeWriter_Init(&writer);
922
    writer.overallocate = 1;
923
    writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100;
924

925
    if (!do_markup(input, args, kwargs, &writer, recursion_depth,
926
                   auto_number)) {
927
        _PyUnicodeWriter_Dealloc(&writer);
928
        return NULL;
929
    }
930

931
    return _PyUnicodeWriter_Finish(&writer);
932
}
933

934
/************************************************************************/
935
/*********** main routine ***********************************************/
936
/************************************************************************/
937

938
/* this is the main entry point */
939
static PyObject *
940
do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
941
{
942
    SubString input;
943

944
    /* PEP 3101 says only 2 levels, so that
945
       "{0:{1}}".format('abc', 's')            # works
946
       "{0:{1:{2}}}".format('abc', 's', '')    # fails
947
    */
948
    int recursion_depth = 2;
949

950
    AutoNumber auto_number;
951
    AutoNumber_Init(&auto_number);
952
    SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self));
953
    return build_string(&input, args, kwargs, recursion_depth, &auto_number);
954
}
955

956
static PyObject *
957
do_string_format_map(PyObject *self, PyObject *obj)
958
{
959
    return do_string_format(self, NULL, obj);
960
}
961

962

963
/************************************************************************/
964
/*********** formatteriterator ******************************************/
965
/************************************************************************/
966

967
/* This is used to implement string.Formatter.vparse().  It exists so
968
   Formatter can share code with the built in unicode.format() method.
969
   It's really just a wrapper around MarkupIterator that is callable
970
   from Python. */
971

972
typedef struct {
973
    PyObject_HEAD
974
    PyObject *str;
975
    MarkupIterator it_markup;
976
} formatteriterobject;
977

978
static void
979
formatteriter_dealloc(formatteriterobject *it)
980
{
981
    Py_XDECREF(it->str);
982
    PyObject_Free(it);
983
}
984

985
/* returns a tuple:
986
   (literal, field_name, format_spec, conversion)
987

988
   literal is any literal text to output.  might be zero length
989
   field_name is the string before the ':'.  might be None
990
   format_spec is the string after the ':'.  mibht be None
991
   conversion is either None, or the string after the '!'
992
*/
993
static PyObject *
994
formatteriter_next(formatteriterobject *it)
995
{
996
    SubString literal;
997
    SubString field_name;
998
    SubString format_spec;
999
    Py_UCS4 conversion;
1000
    int format_spec_needs_expanding;
1001
    int field_present;
1002
    int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
1003
                                     &field_name, &format_spec, &conversion,
1004
                                     &format_spec_needs_expanding);
1005

1006
    /* all of the SubString objects point into it->str, so no
1007
       memory management needs to be done on them */
1008
    assert(0 <= result && result <= 2);
1009
    if (result == 0 || result == 1)
1010
        /* if 0, error has already been set, if 1, iterator is empty */
1011
        return NULL;
1012
    else {
1013
        PyObject *literal_str = NULL;
1014
        PyObject *field_name_str = NULL;
1015
        PyObject *format_spec_str = NULL;
1016
        PyObject *conversion_str = NULL;
1017
        PyObject *tuple = NULL;
1018

1019
        literal_str = SubString_new_object(&literal);
1020
        if (literal_str == NULL)
1021
            goto done;
1022

1023
        field_name_str = SubString_new_object(&field_name);
1024
        if (field_name_str == NULL)
1025
            goto done;
1026

1027
        /* if field_name is non-zero length, return a string for
1028
           format_spec (even if zero length), else return None */
1029
        format_spec_str = (field_present ?
1030
                           SubString_new_object_or_empty :
1031
                           SubString_new_object)(&format_spec);
1032
        if (format_spec_str == NULL)
1033
            goto done;
1034

1035
        /* if the conversion is not specified, return a None,
1036
           otherwise create a one length string with the conversion
1037
           character */
1038
        if (conversion == '\0') {
1039
            conversion_str = Py_NewRef(Py_None);
1040
        }
1041
        else
1042
            conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1043
                                                       &conversion, 1);
1044
        if (conversion_str == NULL)
1045
            goto done;
1046

1047
        tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
1048
                             conversion_str);
1049
    done:
1050
        Py_XDECREF(literal_str);
1051
        Py_XDECREF(field_name_str);
1052
        Py_XDECREF(format_spec_str);
1053
        Py_XDECREF(conversion_str);
1054
        return tuple;
1055
    }
1056
}
1057

1058
static PyMethodDef formatteriter_methods[] = {
1059
    {NULL,              NULL}           /* sentinel */
1060
};
1061

1062
static PyTypeObject PyFormatterIter_Type = {
1063
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
1064
    "formatteriterator",                /* tp_name */
1065
    sizeof(formatteriterobject),        /* tp_basicsize */
1066
    0,                                  /* tp_itemsize */
1067
    /* methods */
1068
    (destructor)formatteriter_dealloc,  /* tp_dealloc */
1069
    0,                                  /* tp_vectorcall_offset */
1070
    0,                                  /* tp_getattr */
1071
    0,                                  /* tp_setattr */
1072
    0,                                  /* tp_as_async */
1073
    0,                                  /* tp_repr */
1074
    0,                                  /* tp_as_number */
1075
    0,                                  /* tp_as_sequence */
1076
    0,                                  /* tp_as_mapping */
1077
    0,                                  /* tp_hash */
1078
    0,                                  /* tp_call */
1079
    0,                                  /* tp_str */
1080
    PyObject_GenericGetAttr,            /* tp_getattro */
1081
    0,                                  /* tp_setattro */
1082
    0,                                  /* tp_as_buffer */
1083
    Py_TPFLAGS_DEFAULT,                 /* tp_flags */
1084
    0,                                  /* tp_doc */
1085
    0,                                  /* tp_traverse */
1086
    0,                                  /* tp_clear */
1087
    0,                                  /* tp_richcompare */
1088
    0,                                  /* tp_weaklistoffset */
1089
    PyObject_SelfIter,                  /* tp_iter */
1090
    (iternextfunc)formatteriter_next,   /* tp_iternext */
1091
    formatteriter_methods,              /* tp_methods */
1092
    0,
1093
};
1094

1095
/* unicode_formatter_parser is used to implement
1096
   string.Formatter.vformat.  it parses a string and returns tuples
1097
   describing the parsed elements.  It's a wrapper around
1098
   stringlib/string_format.h's MarkupIterator */
1099
static PyObject *
1100
formatter_parser(PyObject *ignored, PyObject *self)
1101
{
1102
    formatteriterobject *it;
1103

1104
    if (!PyUnicode_Check(self)) {
1105
        PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1106
        return NULL;
1107
    }
1108

1109
    it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1110
    if (it == NULL)
1111
        return NULL;
1112

1113
    /* take ownership, give the object to the iterator */
1114
    it->str = Py_NewRef(self);
1115

1116
    /* initialize the contained MarkupIterator */
1117
    MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self));
1118
    return (PyObject *)it;
1119
}
1120

1121

1122
/************************************************************************/
1123
/*********** fieldnameiterator ******************************************/
1124
/************************************************************************/
1125

1126

1127
/* This is used to implement string.Formatter.vparse().  It parses the
1128
   field name into attribute and item values.  It's a Python-callable
1129
   wrapper around FieldNameIterator */
1130

1131
typedef struct {
1132
    PyObject_HEAD
1133
    PyObject *str;
1134
    FieldNameIterator it_field;
1135
} fieldnameiterobject;
1136

1137
static void
1138
fieldnameiter_dealloc(fieldnameiterobject *it)
1139
{
1140
    Py_XDECREF(it->str);
1141
    PyObject_Free(it);
1142
}
1143

1144
/* returns a tuple:
1145
   (is_attr, value)
1146
   is_attr is true if we used attribute syntax (e.g., '.foo')
1147
              false if we used index syntax (e.g., '[foo]')
1148
   value is an integer or string
1149
*/
1150
static PyObject *
1151
fieldnameiter_next(fieldnameiterobject *it)
1152
{
1153
    int result;
1154
    int is_attr;
1155
    Py_ssize_t idx;
1156
    SubString name;
1157

1158
    result = FieldNameIterator_next(&it->it_field, &is_attr,
1159
                                    &idx, &name);
1160
    if (result == 0 || result == 1)
1161
        /* if 0, error has already been set, if 1, iterator is empty */
1162
        return NULL;
1163
    else {
1164
        PyObject* result = NULL;
1165
        PyObject* is_attr_obj = NULL;
1166
        PyObject* obj = NULL;
1167

1168
        is_attr_obj = PyBool_FromLong(is_attr);
1169
        if (is_attr_obj == NULL)
1170
            goto done;
1171

1172
        /* either an integer or a string */
1173
        if (idx != -1)
1174
            obj = PyLong_FromSsize_t(idx);
1175
        else
1176
            obj = SubString_new_object(&name);
1177
        if (obj == NULL)
1178
            goto done;
1179

1180
        /* return a tuple of values */
1181
        result = PyTuple_Pack(2, is_attr_obj, obj);
1182

1183
    done:
1184
        Py_XDECREF(is_attr_obj);
1185
        Py_XDECREF(obj);
1186
        return result;
1187
    }
1188
}
1189

1190
static PyMethodDef fieldnameiter_methods[] = {
1191
    {NULL,              NULL}           /* sentinel */
1192
};
1193

1194
static PyTypeObject PyFieldNameIter_Type = {
1195
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
1196
    "fieldnameiterator",                /* tp_name */
1197
    sizeof(fieldnameiterobject),        /* tp_basicsize */
1198
    0,                                  /* tp_itemsize */
1199
    /* methods */
1200
    (destructor)fieldnameiter_dealloc,  /* tp_dealloc */
1201
    0,                                  /* tp_vectorcall_offset */
1202
    0,                                  /* tp_getattr */
1203
    0,                                  /* tp_setattr */
1204
    0,                                  /* tp_as_async */
1205
    0,                                  /* tp_repr */
1206
    0,                                  /* tp_as_number */
1207
    0,                                  /* tp_as_sequence */
1208
    0,                                  /* tp_as_mapping */
1209
    0,                                  /* tp_hash */
1210
    0,                                  /* tp_call */
1211
    0,                                  /* tp_str */
1212
    PyObject_GenericGetAttr,            /* tp_getattro */
1213
    0,                                  /* tp_setattro */
1214
    0,                                  /* tp_as_buffer */
1215
    Py_TPFLAGS_DEFAULT,                 /* tp_flags */
1216
    0,                                  /* tp_doc */
1217
    0,                                  /* tp_traverse */
1218
    0,                                  /* tp_clear */
1219
    0,                                  /* tp_richcompare */
1220
    0,                                  /* tp_weaklistoffset */
1221
    PyObject_SelfIter,                  /* tp_iter */
1222
    (iternextfunc)fieldnameiter_next,   /* tp_iternext */
1223
    fieldnameiter_methods,              /* tp_methods */
1224
    0};
1225

1226
/* unicode_formatter_field_name_split is used to implement
1227
   string.Formatter.vformat.  it takes a PEP 3101 "field name", and
1228
   returns a tuple of (first, rest): "first", the part before the
1229
   first '.' or '['; and "rest", an iterator for the rest of the field
1230
   name.  it's a wrapper around stringlib/string_format.h's
1231
   field_name_split.  The iterator it returns is a
1232
   FieldNameIterator */
1233
static PyObject *
1234
formatter_field_name_split(PyObject *ignored, PyObject *self)
1235
{
1236
    SubString first;
1237
    Py_ssize_t first_idx;
1238
    fieldnameiterobject *it;
1239

1240
    PyObject *first_obj = NULL;
1241
    PyObject *result = NULL;
1242

1243
    if (!PyUnicode_Check(self)) {
1244
        PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1245
        return NULL;
1246
    }
1247

1248
    it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1249
    if (it == NULL)
1250
        return NULL;
1251

1252
    /* take ownership, give the object to the iterator.  this is
1253
       just to keep the field_name alive */
1254
    it->str = Py_NewRef(self);
1255

1256
    /* Pass in auto_number = NULL. We'll return an empty string for
1257
       first_obj in that case. */
1258
    if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self),
1259
                          &first, &first_idx, &it->it_field, NULL))
1260
        goto done;
1261

1262
    /* first becomes an integer, if possible; else a string */
1263
    if (first_idx != -1)
1264
        first_obj = PyLong_FromSsize_t(first_idx);
1265
    else
1266
        /* convert "first" into a string object */
1267
        first_obj = SubString_new_object(&first);
1268
    if (first_obj == NULL)
1269
        goto done;
1270

1271
    /* return a tuple of values */
1272
    result = PyTuple_Pack(2, first_obj, it);
1273

1274
done:
1275
    Py_XDECREF(it);
1276
    Py_XDECREF(first_obj);
1277
    return result;
1278
}
1279

1280
Product

Resources

Company