CoCalc -- formatter

GitHub Repository: allendowney/cpython
Path: blob/main/Python/formatter_unicode.c
¹² views
1
/* implements the unicode (as opposed to string) version of the
2
   built-in formatters for string, int, float.  that is, the versions
3
   of int.__float__, etc., that take and return unicode objects */
4

5
#include "Python.h"
6
#include "pycore_fileutils.h"     // _Py_GetLocaleconvNumeric()
7
#include "pycore_long.h"          // _PyLong_FormatWriter()
8
#include <locale.h>
9

10
/* Raises an exception about an unknown presentation type for this
11
 * type. */
12

13
static void
14
unknown_presentation_type(Py_UCS4 presentation_type,
15
                          const char* type_name)
16
{
17
    /* %c might be out-of-range, hence the two cases. */
18
    if (presentation_type > 32 && presentation_type < 128)
19
        PyErr_Format(PyExc_ValueError,
20
                     "Unknown format code '%c' "
21
                     "for object of type '%.200s'",
22
                     (char)presentation_type,
23
                     type_name);
24
    else
25
        PyErr_Format(PyExc_ValueError,
26
                     "Unknown format code '\\x%x' "
27
                     "for object of type '%.200s'",
28
                     (unsigned int)presentation_type,
29
                     type_name);
30
}
31

32
static void
33
invalid_thousands_separator_type(char specifier, Py_UCS4 presentation_type)
34
{
35
    assert(specifier == ',' || specifier == '_');
36
    if (presentation_type > 32 && presentation_type < 128)
37
        PyErr_Format(PyExc_ValueError,
38
                     "Cannot specify '%c' with '%c'.",
39
                     specifier, (char)presentation_type);
40
    else
41
        PyErr_Format(PyExc_ValueError,
42
                     "Cannot specify '%c' with '\\x%x'.",
43
                     specifier, (unsigned int)presentation_type);
44
}
45

46
static void
47
invalid_comma_and_underscore(void)
48
{
49
    PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
50
}
51

52
/*
53
    get_integer consumes 0 or more decimal digit characters from an
54
    input string, updates *result with the corresponding positive
55
    integer, and returns the number of digits consumed.
56

57
    returns -1 on error.
58
*/
59
static int
60
get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
61
                  Py_ssize_t *result)
62
{
63
    Py_ssize_t accumulator, digitval, pos = *ppos;
64
    int numdigits;
65
    int kind = PyUnicode_KIND(str);
66
    const void *data = PyUnicode_DATA(str);
67

68
    accumulator = numdigits = 0;
69
    for (; pos < end; pos++, numdigits++) {
70
        digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
71
        if (digitval < 0)
72
            break;
73
        /*
74
           Detect possible overflow before it happens:
75

76
              accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
77
              accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
78
        */
79
        if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
80
            PyErr_Format(PyExc_ValueError,
81
                         "Too many decimal digits in format string");
82
            *ppos = pos;
83
            return -1;
84
        }
85
        accumulator = accumulator * 10 + digitval;
86
    }
87
    *ppos = pos;
88
    *result = accumulator;
89
    return numdigits;
90
}
91

92
/************************************************************************/
93
/*********** standard format specifier parsing **************************/
94
/************************************************************************/
95

96
/* returns true if this character is a specifier alignment token */
97
Py_LOCAL_INLINE(int)
98
is_alignment_token(Py_UCS4 c)
99
{
100
    switch (c) {
101
    case '<': case '>': case '=': case '^':
102
        return 1;
103
    default:
104
        return 0;
105
    }
106
}
107

108
/* returns true if this character is a sign element */
109
Py_LOCAL_INLINE(int)
110
is_sign_element(Py_UCS4 c)
111
{
112
    switch (c) {
113
    case ' ': case '+': case '-':
114
        return 1;
115
    default:
116
        return 0;
117
    }
118
}
119

120
/* Locale type codes. LT_NO_LOCALE must be zero. */
121
enum LocaleType {
122
    LT_NO_LOCALE = 0,
123
    LT_DEFAULT_LOCALE = ',',
124
    LT_UNDERSCORE_LOCALE = '_',
125
    LT_UNDER_FOUR_LOCALE,
126
    LT_CURRENT_LOCALE
127
};
128

129
typedef struct {
130
    Py_UCS4 fill_char;
131
    Py_UCS4 align;
132
    int alternate;
133
    int no_neg_0;
134
    Py_UCS4 sign;
135
    Py_ssize_t width;
136
    enum LocaleType thousands_separators;
137
    Py_ssize_t precision;
138
    Py_UCS4 type;
139
} InternalFormatSpec;
140

141

142
/*
143
  ptr points to the start of the format_spec, end points just past its end.
144
  fills in format with the parsed information.
145
  returns 1 on success, 0 on failure.
146
  if failure, sets the exception
147
*/
148
static int
149
parse_internal_render_format_spec(PyObject *obj,
150
                                  PyObject *format_spec,
151
                                  Py_ssize_t start, Py_ssize_t end,
152
                                  InternalFormatSpec *format,
153
                                  char default_type,
154
                                  char default_align)
155
{
156
    Py_ssize_t pos = start;
157
    int kind = PyUnicode_KIND(format_spec);
158
    const void *data = PyUnicode_DATA(format_spec);
159
    /* end-pos is used throughout this code to specify the length of
160
       the input string */
161
#define READ_spec(index) PyUnicode_READ(kind, data, index)
162

163
    Py_ssize_t consumed;
164
    int align_specified = 0;
165
    int fill_char_specified = 0;
166

167
    format->fill_char = ' ';
168
    format->align = default_align;
169
    format->alternate = 0;
170
    format->no_neg_0 = 0;
171
    format->sign = '\0';
172
    format->width = -1;
173
    format->thousands_separators = LT_NO_LOCALE;
174
    format->precision = -1;
175
    format->type = default_type;
176

177
    /* If the second char is an alignment token,
178
       then parse the fill char */
179
    if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
180
        format->align = READ_spec(pos+1);
181
        format->fill_char = READ_spec(pos);
182
        fill_char_specified = 1;
183
        align_specified = 1;
184
        pos += 2;
185
    }
186
    else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
187
        format->align = READ_spec(pos);
188
        align_specified = 1;
189
        ++pos;
190
    }
191

192
    /* Parse the various sign options */
193
    if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
194
        format->sign = READ_spec(pos);
195
        ++pos;
196
    }
197

198
    /* If the next character is z, request coercion of negative 0.
199
       Applies only to floats. */
200
    if (end-pos >= 1 && READ_spec(pos) == 'z') {
201
        format->no_neg_0 = 1;
202
        ++pos;
203
    }
204

205
    /* If the next character is #, we're in alternate mode.  This only
206
       applies to integers. */
207
    if (end-pos >= 1 && READ_spec(pos) == '#') {
208
        format->alternate = 1;
209
        ++pos;
210
    }
211

212
    /* The special case for 0-padding (backwards compat) */
213
    if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
214
        format->fill_char = '0';
215
        if (!align_specified && default_align == '>') {
216
            format->align = '=';
217
        }
218
        ++pos;
219
    }
220

221
    consumed = get_integer(format_spec, &pos, end, &format->width);
222
    if (consumed == -1)
223
        /* Overflow error. Exception already set. */
224
        return 0;
225

226
    /* If consumed is 0, we didn't consume any characters for the
227
       width. In that case, reset the width to -1, because
228
       get_integer() will have set it to zero. -1 is how we record
229
       that the width wasn't specified. */
230
    if (consumed == 0)
231
        format->width = -1;
232

233
    /* Comma signifies add thousands separators */
234
    if (end-pos && READ_spec(pos) == ',') {
235
        format->thousands_separators = LT_DEFAULT_LOCALE;
236
        ++pos;
237
    }
238
    /* Underscore signifies add thousands separators */
239
    if (end-pos && READ_spec(pos) == '_') {
240
        if (format->thousands_separators != LT_NO_LOCALE) {
241
            invalid_comma_and_underscore();
242
            return 0;
243
        }
244
        format->thousands_separators = LT_UNDERSCORE_LOCALE;
245
        ++pos;
246
    }
247
    if (end-pos && READ_spec(pos) == ',') {
248
        if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
249
            invalid_comma_and_underscore();
250
            return 0;
251
        }
252
    }
253

254
    /* Parse field precision */
255
    if (end-pos && READ_spec(pos) == '.') {
256
        ++pos;
257

258
        consumed = get_integer(format_spec, &pos, end, &format->precision);
259
        if (consumed == -1)
260
            /* Overflow error. Exception already set. */
261
            return 0;
262

263
        /* Not having a precision after a dot is an error. */
264
        if (consumed == 0) {
265
            PyErr_Format(PyExc_ValueError,
266
                         "Format specifier missing precision");
267
            return 0;
268
        }
269

270
    }
271

272
    /* Finally, parse the type field. */
273

274
    if (end-pos > 1) {
275
        /* More than one char remains, so this is an invalid format
276
           specifier. */
277
        /* Create a temporary object that contains the format spec we're
278
           operating on.  It's format_spec[start:end] (in Python syntax). */
279
        PyObject* actual_format_spec = PyUnicode_FromKindAndData(kind,
280
                                         (char*)data + kind*start,
281
                                         end-start);
282
        if (actual_format_spec != NULL) {
283
            PyErr_Format(PyExc_ValueError,
284
                "Invalid format specifier '%U' for object of type '%.200s'",
285
                actual_format_spec, Py_TYPE(obj)->tp_name);
286
            Py_DECREF(actual_format_spec);
287
        }
288
        return 0;
289
    }
290

291
    if (end-pos == 1) {
292
        format->type = READ_spec(pos);
293
        ++pos;
294
    }
295

296
    /* Do as much validating as we can, just by looking at the format
297
       specifier.  Do not take into account what type of formatting
298
       we're doing (int, float, string). */
299

300
    if (format->thousands_separators) {
301
        switch (format->type) {
302
        case 'd':
303
        case 'e':
304
        case 'f':
305
        case 'g':
306
        case 'E':
307
        case 'G':
308
        case '%':
309
        case 'F':
310
        case '\0':
311
            /* These are allowed. See PEP 378.*/
312
            break;
313
        case 'b':
314
        case 'o':
315
        case 'x':
316
        case 'X':
317
            /* Underscores are allowed in bin/oct/hex. See PEP 515. */
318
            if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
319
                /* Every four digits, not every three, in bin/oct/hex. */
320
                format->thousands_separators = LT_UNDER_FOUR_LOCALE;
321
                break;
322
            }
323
            /* fall through */
324
        default:
325
            invalid_thousands_separator_type(format->thousands_separators, format->type);
326
            return 0;
327
        }
328
    }
329

330
    assert (format->align <= 127);
331
    assert (format->sign <= 127);
332
    return 1;
333
}
334

335
/* Calculate the padding needed. */
336
static void
337
calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
338
             Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
339
             Py_ssize_t *n_total)
340
{
341
    if (width >= 0) {
342
        if (nchars > width)
343
            *n_total = nchars;
344
        else
345
            *n_total = width;
346
    }
347
    else {
348
        /* not specified, use all of the chars and no more */
349
        *n_total = nchars;
350
    }
351

352
    /* Figure out how much leading space we need, based on the
353
       aligning */
354
    if (align == '>')
355
        *n_lpadding = *n_total - nchars;
356
    else if (align == '^')
357
        *n_lpadding = (*n_total - nchars) / 2;
358
    else if (align == '<' || align == '=')
359
        *n_lpadding = 0;
360
    else {
361
        /* We should never have an unspecified alignment. */
362
        Py_UNREACHABLE();
363
    }
364

365
    *n_rpadding = *n_total - nchars - *n_lpadding;
366
}
367

368
/* Do the padding, and return a pointer to where the caller-supplied
369
   content goes. */
370
static int
371
fill_padding(_PyUnicodeWriter *writer,
372
             Py_ssize_t nchars,
373
             Py_UCS4 fill_char, Py_ssize_t n_lpadding,
374
             Py_ssize_t n_rpadding)
375
{
376
    Py_ssize_t pos;
377

378
    /* Pad on left. */
379
    if (n_lpadding) {
380
        pos = writer->pos;
381
        _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
382
    }
383

384
    /* Pad on right. */
385
    if (n_rpadding) {
386
        pos = writer->pos + nchars + n_lpadding;
387
        _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
388
    }
389

390
    /* Pointer to the user content. */
391
    writer->pos += n_lpadding;
392
    return 0;
393
}
394

395
/************************************************************************/
396
/*********** common routines for numeric formatting *********************/
397
/************************************************************************/
398

399
/* Locale info needed for formatting integers and the part of floats
400
   before and including the decimal. Note that locales only support
401
   8-bit chars, not unicode. */
402
typedef struct {
403
    PyObject *decimal_point;
404
    PyObject *thousands_sep;
405
    const char *grouping;
406
    char *grouping_buffer;
407
} LocaleInfo;
408

409
#define LocaleInfo_STATIC_INIT {0, 0, 0, 0}
410

411
/* describes the layout for an integer, see the comment in
412
   calc_number_widths() for details */
413
typedef struct {
414
    Py_ssize_t n_lpadding;
415
    Py_ssize_t n_prefix;
416
    Py_ssize_t n_spadding;
417
    Py_ssize_t n_rpadding;
418
    char sign;
419
    Py_ssize_t n_sign;      /* number of digits needed for sign (0/1) */
420
    Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
421
                                    any grouping chars. */
422
    Py_ssize_t n_decimal;   /* 0 if only an integer */
423
    Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
424
                               excluding the decimal itself, if
425
                               present. */
426

427
    /* These 2 are not the widths of fields, but are needed by
428
       STRINGLIB_GROUPING. */
429
    Py_ssize_t n_digits;    /* The number of digits before a decimal
430
                               or exponent. */
431
    Py_ssize_t n_min_width; /* The min_width we used when we computed
432
                               the n_grouped_digits width. */
433
} NumberFieldWidths;
434

435

436
/* Given a number of the form:
437
   digits[remainder]
438
   where ptr points to the start and end points to the end, find where
439
    the integer part ends. This could be a decimal, an exponent, both,
440
    or neither.
441
   If a decimal point is present, set *has_decimal and increment
442
    remainder beyond it.
443
   Results are undefined (but shouldn't crash) for improperly
444
    formatted strings.
445
*/
446
static void
447
parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
448
             Py_ssize_t *n_remainder, int *has_decimal)
449
{
450
    Py_ssize_t remainder;
451
    int kind = PyUnicode_KIND(s);
452
    const void *data = PyUnicode_DATA(s);
453

454
    while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
455
        ++pos;
456
    remainder = pos;
457

458
    /* Does remainder start with a decimal point? */
459
    *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
460

461
    /* Skip the decimal point. */
462
    if (*has_decimal)
463
        remainder++;
464

465
    *n_remainder = end - remainder;
466
}
467

468
/* not all fields of format are used.  for example, precision is
469
   unused.  should this take discrete params in order to be more clear
470
   about what it does?  or is passing a single format parameter easier
471
   and more efficient enough to justify a little obfuscation?
472
   Return -1 on error. */
473
static Py_ssize_t
474
calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
475
                   Py_UCS4 sign_char, Py_ssize_t n_start,
476
                   Py_ssize_t n_end, Py_ssize_t n_remainder,
477
                   int has_decimal, const LocaleInfo *locale,
478
                   const InternalFormatSpec *format, Py_UCS4 *maxchar)
479
{
480
    Py_ssize_t n_non_digit_non_padding;
481
    Py_ssize_t n_padding;
482

483
    spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
484
    spec->n_lpadding = 0;
485
    spec->n_prefix = n_prefix;
486
    spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
487
    spec->n_remainder = n_remainder;
488
    spec->n_spadding = 0;
489
    spec->n_rpadding = 0;
490
    spec->sign = '\0';
491
    spec->n_sign = 0;
492

493
    /* the output will look like:
494
       |                                                                                         |
495
       | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
496
       |                                                                                         |
497

498
       sign is computed from format->sign and the actual
499
       sign of the number
500

501
       prefix is given (it's for the '0x' prefix)
502

503
       digits is already known
504

505
       the total width is either given, or computed from the
506
       actual digits
507

508
       only one of lpadding, spadding, and rpadding can be non-zero,
509
       and it's calculated from the width and other fields
510
    */
511

512
    /* compute the various parts we're going to write */
513
    switch (format->sign) {
514
    case '+':
515
        /* always put a + or - */
516
        spec->n_sign = 1;
517
        spec->sign = (sign_char == '-' ? '-' : '+');
518
        break;
519
    case ' ':
520
        spec->n_sign = 1;
521
        spec->sign = (sign_char == '-' ? '-' : ' ');
522
        break;
523
    default:
524
        /* Not specified, or the default (-) */
525
        if (sign_char == '-') {
526
            spec->n_sign = 1;
527
            spec->sign = '-';
528
        }
529
    }
530

531
    /* The number of chars used for non-digits and non-padding. */
532
    n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
533
        spec->n_remainder;
534

535
    /* min_width can go negative, that's okay. format->width == -1 means
536
       we don't care. */
537
    if (format->fill_char == '0' && format->align == '=')
538
        spec->n_min_width = format->width - n_non_digit_non_padding;
539
    else
540
        spec->n_min_width = 0;
541

542
    if (spec->n_digits == 0)
543
        /* This case only occurs when using 'c' formatting, we need
544
           to special case it because the grouping code always wants
545
           to have at least one character. */
546
        spec->n_grouped_digits = 0;
547
    else {
548
        Py_UCS4 grouping_maxchar;
549
        spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
550
            NULL, 0,
551
            NULL, 0, spec->n_digits,
552
            spec->n_min_width,
553
            locale->grouping, locale->thousands_sep, &grouping_maxchar);
554
        if (spec->n_grouped_digits == -1) {
555
            return -1;
556
        }
557
        *maxchar = Py_MAX(*maxchar, grouping_maxchar);
558
    }
559

560
    /* Given the desired width and the total of digit and non-digit
561
       space we consume, see if we need any padding. format->width can
562
       be negative (meaning no padding), but this code still works in
563
       that case. */
564
    n_padding = format->width -
565
                        (n_non_digit_non_padding + spec->n_grouped_digits);
566
    if (n_padding > 0) {
567
        /* Some padding is needed. Determine if it's left, space, or right. */
568
        switch (format->align) {
569
        case '<':
570
            spec->n_rpadding = n_padding;
571
            break;
572
        case '^':
573
            spec->n_lpadding = n_padding / 2;
574
            spec->n_rpadding = n_padding - spec->n_lpadding;
575
            break;
576
        case '=':
577
            spec->n_spadding = n_padding;
578
            break;
579
        case '>':
580
            spec->n_lpadding = n_padding;
581
            break;
582
        default:
583
            /* Shouldn't get here */
584
            Py_UNREACHABLE();
585
        }
586
    }
587

588
    if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
589
        *maxchar = Py_MAX(*maxchar, format->fill_char);
590

591
    if (spec->n_decimal)
592
        *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
593

594
    return spec->n_lpadding + spec->n_sign + spec->n_prefix +
595
        spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
596
        spec->n_remainder + spec->n_rpadding;
597
}
598

599
/* Fill in the digit parts of a number's string representation,
600
   as determined in calc_number_widths().
601
   Return -1 on error, or 0 on success. */
602
static int
603
fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
604
            PyObject *digits, Py_ssize_t d_start,
605
            PyObject *prefix, Py_ssize_t p_start,
606
            Py_UCS4 fill_char,
607
            LocaleInfo *locale, int toupper)
608
{
609
    /* Used to keep track of digits, decimal, and remainder. */
610
    Py_ssize_t d_pos = d_start;
611
    const int kind = writer->kind;
612
    const void *data = writer->data;
613
    Py_ssize_t r;
614

615
    if (spec->n_lpadding) {
616
        _PyUnicode_FastFill(writer->buffer,
617
                            writer->pos, spec->n_lpadding, fill_char);
618
        writer->pos += spec->n_lpadding;
619
    }
620
    if (spec->n_sign == 1) {
621
        PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
622
        writer->pos++;
623
    }
624
    if (spec->n_prefix) {
625
        _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
626
                                      prefix, p_start,
627
                                      spec->n_prefix);
628
        if (toupper) {
629
            Py_ssize_t t;
630
            for (t = 0; t < spec->n_prefix; t++) {
631
                Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
632
                c = Py_TOUPPER(c);
633
                assert (c <= 127);
634
                PyUnicode_WRITE(kind, data, writer->pos + t, c);
635
            }
636
        }
637
        writer->pos += spec->n_prefix;
638
    }
639
    if (spec->n_spadding) {
640
        _PyUnicode_FastFill(writer->buffer,
641
                            writer->pos, spec->n_spadding, fill_char);
642
        writer->pos += spec->n_spadding;
643
    }
644

645
    /* Only for type 'c' special case, it has no digits. */
646
    if (spec->n_digits != 0) {
647
        /* Fill the digits with InsertThousandsGrouping. */
648
        r = _PyUnicode_InsertThousandsGrouping(
649
                writer, spec->n_grouped_digits,
650
                digits, d_pos, spec->n_digits,
651
                spec->n_min_width,
652
                locale->grouping, locale->thousands_sep, NULL);
653
        if (r == -1)
654
            return -1;
655
        assert(r == spec->n_grouped_digits);
656
        d_pos += spec->n_digits;
657
    }
658
    if (toupper) {
659
        Py_ssize_t t;
660
        for (t = 0; t < spec->n_grouped_digits; t++) {
661
            Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
662
            c = Py_TOUPPER(c);
663
            if (c > 127) {
664
                PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
665
                return -1;
666
            }
667
            PyUnicode_WRITE(kind, data, writer->pos + t, c);
668
        }
669
    }
670
    writer->pos += spec->n_grouped_digits;
671

672
    if (spec->n_decimal) {
673
        _PyUnicode_FastCopyCharacters(
674
            writer->buffer, writer->pos,
675
            locale->decimal_point, 0, spec->n_decimal);
676
        writer->pos += spec->n_decimal;
677
        d_pos += 1;
678
    }
679

680
    if (spec->n_remainder) {
681
        _PyUnicode_FastCopyCharacters(
682
            writer->buffer, writer->pos,
683
            digits, d_pos, spec->n_remainder);
684
        writer->pos += spec->n_remainder;
685
        /* d_pos += spec->n_remainder; */
686
    }
687

688
    if (spec->n_rpadding) {
689
        _PyUnicode_FastFill(writer->buffer,
690
                            writer->pos, spec->n_rpadding,
691
                            fill_char);
692
        writer->pos += spec->n_rpadding;
693
    }
694
    return 0;
695
}
696

697
static const char no_grouping[1] = {CHAR_MAX};
698

699
/* Find the decimal point character(s?), thousands_separator(s?), and
700
   grouping description, either for the current locale if type is
701
   LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
702
   LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
703
static int
704
get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
705
{
706
    switch (type) {
707
    case LT_CURRENT_LOCALE: {
708
        struct lconv *lc = localeconv();
709
        if (_Py_GetLocaleconvNumeric(lc,
710
                                     &locale_info->decimal_point,
711
                                     &locale_info->thousands_sep) < 0) {
712
            return -1;
713
        }
714

715
        /* localeconv() grouping can become a dangling pointer or point
716
           to a different string if another thread calls localeconv() during
717
           the string formatting. Copy the string to avoid this risk. */
718
        locale_info->grouping_buffer = _PyMem_Strdup(lc->grouping);
719
        if (locale_info->grouping_buffer == NULL) {
720
            PyErr_NoMemory();
721
            return -1;
722
        }
723
        locale_info->grouping = locale_info->grouping_buffer;
724
        break;
725
    }
726
    case LT_DEFAULT_LOCALE:
727
    case LT_UNDERSCORE_LOCALE:
728
    case LT_UNDER_FOUR_LOCALE:
729
        locale_info->decimal_point = PyUnicode_FromOrdinal('.');
730
        locale_info->thousands_sep = PyUnicode_FromOrdinal(
731
            type == LT_DEFAULT_LOCALE ? ',' : '_');
732
        if (!locale_info->decimal_point || !locale_info->thousands_sep)
733
            return -1;
734
        if (type != LT_UNDER_FOUR_LOCALE)
735
            locale_info->grouping = "\3"; /* Group every 3 characters.  The
736
                                         (implicit) trailing 0 means repeat
737
                                         infinitely. */
738
        else
739
            locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
740
        break;
741
    case LT_NO_LOCALE:
742
        locale_info->decimal_point = PyUnicode_FromOrdinal('.');
743
        locale_info->thousands_sep = PyUnicode_New(0, 0);
744
        if (!locale_info->decimal_point || !locale_info->thousands_sep)
745
            return -1;
746
        locale_info->grouping = no_grouping;
747
        break;
748
    }
749
    return 0;
750
}
751

752
static void
753
free_locale_info(LocaleInfo *locale_info)
754
{
755
    Py_XDECREF(locale_info->decimal_point);
756
    Py_XDECREF(locale_info->thousands_sep);
757
    PyMem_Free(locale_info->grouping_buffer);
758
}
759

760
/************************************************************************/
761
/*********** string formatting ******************************************/
762
/************************************************************************/
763

764
static int
765
format_string_internal(PyObject *value, const InternalFormatSpec *format,
766
                       _PyUnicodeWriter *writer)
767
{
768
    Py_ssize_t lpad;
769
    Py_ssize_t rpad;
770
    Py_ssize_t total;
771
    Py_ssize_t len;
772
    int result = -1;
773
    Py_UCS4 maxchar;
774

775
    assert(PyUnicode_IS_READY(value));
776
    len = PyUnicode_GET_LENGTH(value);
777

778
    /* sign is not allowed on strings */
779
    if (format->sign != '\0') {
780
        if (format->sign == ' ') {
781
            PyErr_SetString(PyExc_ValueError,
782
                "Space not allowed in string format specifier");
783
        }
784
        else {
785
            PyErr_SetString(PyExc_ValueError,
786
                "Sign not allowed in string format specifier");
787
        }
788
        goto done;
789
    }
790

791
    /* negative 0 coercion is not allowed on strings */
792
    if (format->no_neg_0) {
793
        PyErr_SetString(PyExc_ValueError,
794
                        "Negative zero coercion (z) not allowed in string format "
795
                        "specifier");
796
        goto done;
797
    }
798

799
    /* alternate is not allowed on strings */
800
    if (format->alternate) {
801
        PyErr_SetString(PyExc_ValueError,
802
                        "Alternate form (#) not allowed in string format "
803
                        "specifier");
804
        goto done;
805
    }
806

807
    /* '=' alignment not allowed on strings */
808
    if (format->align == '=') {
809
        PyErr_SetString(PyExc_ValueError,
810
                        "'=' alignment not allowed "
811
                        "in string format specifier");
812
        goto done;
813
    }
814

815
    if ((format->width == -1 || format->width <= len)
816
        && (format->precision == -1 || format->precision >= len)) {
817
        /* Fast path */
818
        return _PyUnicodeWriter_WriteStr(writer, value);
819
    }
820

821
    /* if precision is specified, output no more that format.precision
822
       characters */
823
    if (format->precision >= 0 && len >= format->precision) {
824
        len = format->precision;
825
    }
826

827
    calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
828

829
    maxchar = writer->maxchar;
830
    if (lpad != 0 || rpad != 0)
831
        maxchar = Py_MAX(maxchar, format->fill_char);
832
    if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
833
        Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
834
        maxchar = Py_MAX(maxchar, valmaxchar);
835
    }
836

837
    /* allocate the resulting string */
838
    if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
839
        goto done;
840

841
    /* Write into that space. First the padding. */
842
    result = fill_padding(writer, len, format->fill_char, lpad, rpad);
843
    if (result == -1)
844
        goto done;
845

846
    /* Then the source string. */
847
    if (len) {
848
        _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
849
                                      value, 0, len);
850
    }
851
    writer->pos += (len + rpad);
852
    result = 0;
853

854
done:
855
    return result;
856
}
857

858

859
/************************************************************************/
860
/*********** long formatting ********************************************/
861
/************************************************************************/
862

863
static int
864
format_long_internal(PyObject *value, const InternalFormatSpec *format,
865
                     _PyUnicodeWriter *writer)
866
{
867
    int result = -1;
868
    Py_UCS4 maxchar = 127;
869
    PyObject *tmp = NULL;
870
    Py_ssize_t inumeric_chars;
871
    Py_UCS4 sign_char = '\0';
872
    Py_ssize_t n_digits;       /* count of digits need from the computed
873
                                  string */
874
    Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
875
                                   produces non-digits */
876
    Py_ssize_t n_prefix = 0;   /* Count of prefix chars, (e.g., '0x') */
877
    Py_ssize_t n_total;
878
    Py_ssize_t prefix = 0;
879
    NumberFieldWidths spec;
880
    long x;
881

882
    /* Locale settings, either from the actual locale or
883
       from a hard-code pseudo-locale */
884
    LocaleInfo locale = LocaleInfo_STATIC_INIT;
885

886
    /* no precision allowed on integers */
887
    if (format->precision != -1) {
888
        PyErr_SetString(PyExc_ValueError,
889
                        "Precision not allowed in integer format specifier");
890
        goto done;
891
    }
892
    /* no negative zero coercion on integers */
893
    if (format->no_neg_0) {
894
        PyErr_SetString(PyExc_ValueError,
895
                        "Negative zero coercion (z) not allowed in integer"
896
                        " format specifier");
897
        goto done;
898
    }
899

900
    /* special case for character formatting */
901
    if (format->type == 'c') {
902
        /* error to specify a sign */
903
        if (format->sign != '\0') {
904
            PyErr_SetString(PyExc_ValueError,
905
                            "Sign not allowed with integer"
906
                            " format specifier 'c'");
907
            goto done;
908
        }
909
        /* error to request alternate format */
910
        if (format->alternate) {
911
            PyErr_SetString(PyExc_ValueError,
912
                            "Alternate form (#) not allowed with integer"
913
                            " format specifier 'c'");
914
            goto done;
915
        }
916

917
        /* taken from unicodeobject.c formatchar() */
918
        /* Integer input truncated to a character */
919
        x = PyLong_AsLong(value);
920
        if (x == -1 && PyErr_Occurred())
921
            goto done;
922
        if (x < 0 || x > 0x10ffff) {
923
            PyErr_SetString(PyExc_OverflowError,
924
                            "%c arg not in range(0x110000)");
925
            goto done;
926
        }
927
        tmp = PyUnicode_FromOrdinal(x);
928
        inumeric_chars = 0;
929
        n_digits = 1;
930
        maxchar = Py_MAX(maxchar, (Py_UCS4)x);
931

932
        /* As a sort-of hack, we tell calc_number_widths that we only
933
           have "remainder" characters. calc_number_widths thinks
934
           these are characters that don't get formatted, only copied
935
           into the output string. We do this for 'c' formatting,
936
           because the characters are likely to be non-digits. */
937
        n_remainder = 1;
938
    }
939
    else {
940
        int base;
941
        int leading_chars_to_skip = 0;  /* Number of characters added by
942
                                           PyNumber_ToBase that we want to
943
                                           skip over. */
944

945
        /* Compute the base and how many characters will be added by
946
           PyNumber_ToBase */
947
        switch (format->type) {
948
        case 'b':
949
            base = 2;
950
            leading_chars_to_skip = 2; /* 0b */
951
            break;
952
        case 'o':
953
            base = 8;
954
            leading_chars_to_skip = 2; /* 0o */
955
            break;
956
        case 'x':
957
        case 'X':
958
            base = 16;
959
            leading_chars_to_skip = 2; /* 0x */
960
            break;
961
        default:  /* shouldn't be needed, but stops a compiler warning */
962
        case 'd':
963
        case 'n':
964
            base = 10;
965
            break;
966
        }
967

968
        if (format->sign != '+' && format->sign != ' '
969
            && format->width == -1
970
            && format->type != 'X' && format->type != 'n'
971
            && !format->thousands_separators
972
            && PyLong_CheckExact(value))
973
        {
974
            /* Fast path */
975
            return _PyLong_FormatWriter(writer, value, base, format->alternate);
976
        }
977

978
        /* The number of prefix chars is the same as the leading
979
           chars to skip */
980
        if (format->alternate)
981
            n_prefix = leading_chars_to_skip;
982

983
        /* Do the hard part, converting to a string in a given base */
984
        tmp = _PyLong_Format(value, base);
985
        if (tmp == NULL)
986
            goto done;
987

988
        inumeric_chars = 0;
989
        n_digits = PyUnicode_GET_LENGTH(tmp);
990

991
        prefix = inumeric_chars;
992

993
        /* Is a sign character present in the output?  If so, remember it
994
           and skip it */
995
        if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
996
            sign_char = '-';
997
            ++prefix;
998
            ++leading_chars_to_skip;
999
        }
1000

1001
        /* Skip over the leading chars (0x, 0b, etc.) */
1002
        n_digits -= leading_chars_to_skip;
1003
        inumeric_chars += leading_chars_to_skip;
1004
    }
1005

1006
    /* Determine the grouping, separator, and decimal point, if any. */
1007
    if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1008
                        format->thousands_separators,
1009
                        &locale) == -1)
1010
        goto done;
1011

1012
    /* Calculate how much memory we'll need. */
1013
    n_total = calc_number_widths(&spec, n_prefix, sign_char, inumeric_chars,
1014
                                 inumeric_chars + n_digits, n_remainder, 0,
1015
                                 &locale, format, &maxchar);
1016
    if (n_total == -1) {
1017
        goto done;
1018
    }
1019

1020
    /* Allocate the memory. */
1021
    if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
1022
        goto done;
1023

1024
    /* Populate the memory. */
1025
    result = fill_number(writer, &spec,
1026
                         tmp, inumeric_chars,
1027
                         tmp, prefix, format->fill_char,
1028
                         &locale, format->type == 'X');
1029

1030
done:
1031
    Py_XDECREF(tmp);
1032
    free_locale_info(&locale);
1033
    return result;
1034
}
1035

1036
/************************************************************************/
1037
/*********** float formatting *******************************************/
1038
/************************************************************************/
1039

1040
/* much of this is taken from unicodeobject.c */
1041
static int
1042
format_float_internal(PyObject *value,
1043
                      const InternalFormatSpec *format,
1044
                      _PyUnicodeWriter *writer)
1045
{
1046
    char *buf = NULL;       /* buffer returned from PyOS_double_to_string */
1047
    Py_ssize_t n_digits;
1048
    Py_ssize_t n_remainder;
1049
    Py_ssize_t n_total;
1050
    int has_decimal;
1051
    double val;
1052
    int precision, default_precision = 6;
1053
    Py_UCS4 type = format->type;
1054
    int add_pct = 0;
1055
    Py_ssize_t index;
1056
    NumberFieldWidths spec;
1057
    int flags = 0;
1058
    int result = -1;
1059
    Py_UCS4 maxchar = 127;
1060
    Py_UCS4 sign_char = '\0';
1061
    int float_type; /* Used to see if we have a nan, inf, or regular float. */
1062
    PyObject *unicode_tmp = NULL;
1063

1064
    /* Locale settings, either from the actual locale or
1065
       from a hard-code pseudo-locale */
1066
    LocaleInfo locale = LocaleInfo_STATIC_INIT;
1067

1068
    if (format->precision > INT_MAX) {
1069
        PyErr_SetString(PyExc_ValueError, "precision too big");
1070
        goto done;
1071
    }
1072
    precision = (int)format->precision;
1073

1074
    if (format->alternate)
1075
        flags |= Py_DTSF_ALT;
1076
    if (format->no_neg_0)
1077
        flags |= Py_DTSF_NO_NEG_0;
1078

1079
    if (type == '\0') {
1080
        /* Omitted type specifier.  Behaves in the same way as repr(x)
1081
           and str(x) if no precision is given, else like 'g', but with
1082
           at least one digit after the decimal point. */
1083
        flags |= Py_DTSF_ADD_DOT_0;
1084
        type = 'r';
1085
        default_precision = 0;
1086
    }
1087

1088
    if (type == 'n')
1089
        /* 'n' is the same as 'g', except for the locale used to
1090
           format the result. We take care of that later. */
1091
        type = 'g';
1092

1093
    val = PyFloat_AsDouble(value);
1094
    if (val == -1.0 && PyErr_Occurred())
1095
        goto done;
1096

1097
    if (type == '%') {
1098
        type = 'f';
1099
        val *= 100;
1100
        add_pct = 1;
1101
    }
1102

1103
    if (precision < 0)
1104
        precision = default_precision;
1105
    else if (type == 'r')
1106
        type = 'g';
1107

1108
    /* Cast "type", because if we're in unicode we need to pass an
1109
       8-bit char. This is safe, because we've restricted what "type"
1110
       can be. */
1111
    buf = PyOS_double_to_string(val, (char)type, precision, flags,
1112
                                &float_type);
1113
    if (buf == NULL)
1114
        goto done;
1115
    n_digits = strlen(buf);
1116

1117
    if (add_pct) {
1118
        /* We know that buf has a trailing zero (since we just called
1119
           strlen() on it), and we don't use that fact any more. So we
1120
           can just write over the trailing zero. */
1121
        buf[n_digits] = '%';
1122
        n_digits += 1;
1123
    }
1124

1125
    if (format->sign != '+' && format->sign != ' '
1126
        && format->width == -1
1127
        && format->type != 'n'
1128
        && !format->thousands_separators)
1129
    {
1130
        /* Fast path */
1131
        result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
1132
        PyMem_Free(buf);
1133
        return result;
1134
    }
1135

1136
    /* Since there is no unicode version of PyOS_double_to_string,
1137
       just use the 8 bit version and then convert to unicode. */
1138
    unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
1139
    PyMem_Free(buf);
1140
    if (unicode_tmp == NULL)
1141
        goto done;
1142

1143
    /* Is a sign character present in the output?  If so, remember it
1144
       and skip it */
1145
    index = 0;
1146
    if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1147
        sign_char = '-';
1148
        ++index;
1149
        --n_digits;
1150
    }
1151

1152
    /* Determine if we have any "remainder" (after the digits, might include
1153
       decimal or exponent or both (or neither)) */
1154
    parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1155

1156
    /* Determine the grouping, separator, and decimal point, if any. */
1157
    if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1158
                        format->thousands_separators,
1159
                        &locale) == -1)
1160
        goto done;
1161

1162
    /* Calculate how much memory we'll need. */
1163
    n_total = calc_number_widths(&spec, 0, sign_char, index,
1164
                                 index + n_digits, n_remainder, has_decimal,
1165
                                 &locale, format, &maxchar);
1166
    if (n_total == -1) {
1167
        goto done;
1168
    }
1169

1170
    /* Allocate the memory. */
1171
    if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
1172
        goto done;
1173

1174
    /* Populate the memory. */
1175
    result = fill_number(writer, &spec,
1176
                         unicode_tmp, index,
1177
                         NULL, 0, format->fill_char,
1178
                         &locale, 0);
1179

1180
done:
1181
    Py_XDECREF(unicode_tmp);
1182
    free_locale_info(&locale);
1183
    return result;
1184
}
1185

1186
/************************************************************************/
1187
/*********** complex formatting *****************************************/
1188
/************************************************************************/
1189

1190
static int
1191
format_complex_internal(PyObject *value,
1192
                        const InternalFormatSpec *format,
1193
                        _PyUnicodeWriter *writer)
1194
{
1195
    double re;
1196
    double im;
1197
    char *re_buf = NULL;       /* buffer returned from PyOS_double_to_string */
1198
    char *im_buf = NULL;       /* buffer returned from PyOS_double_to_string */
1199

1200
    InternalFormatSpec tmp_format = *format;
1201
    Py_ssize_t n_re_digits;
1202
    Py_ssize_t n_im_digits;
1203
    Py_ssize_t n_re_remainder;
1204
    Py_ssize_t n_im_remainder;
1205
    Py_ssize_t n_re_total;
1206
    Py_ssize_t n_im_total;
1207
    int re_has_decimal;
1208
    int im_has_decimal;
1209
    int precision, default_precision = 6;
1210
    Py_UCS4 type = format->type;
1211
    Py_ssize_t i_re;
1212
    Py_ssize_t i_im;
1213
    NumberFieldWidths re_spec;
1214
    NumberFieldWidths im_spec;
1215
    int flags = 0;
1216
    int result = -1;
1217
    Py_UCS4 maxchar = 127;
1218
    int rkind;
1219
    void *rdata;
1220
    Py_UCS4 re_sign_char = '\0';
1221
    Py_UCS4 im_sign_char = '\0';
1222
    int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1223
    int im_float_type;
1224
    int add_parens = 0;
1225
    int skip_re = 0;
1226
    Py_ssize_t lpad;
1227
    Py_ssize_t rpad;
1228
    Py_ssize_t total;
1229
    PyObject *re_unicode_tmp = NULL;
1230
    PyObject *im_unicode_tmp = NULL;
1231

1232
    /* Locale settings, either from the actual locale or
1233
       from a hard-code pseudo-locale */
1234
    LocaleInfo locale = LocaleInfo_STATIC_INIT;
1235

1236
    if (format->precision > INT_MAX) {
1237
        PyErr_SetString(PyExc_ValueError, "precision too big");
1238
        goto done;
1239
    }
1240
    precision = (int)format->precision;
1241

1242
    /* Zero padding is not allowed. */
1243
    if (format->fill_char == '0') {
1244
        PyErr_SetString(PyExc_ValueError,
1245
                        "Zero padding is not allowed in complex format "
1246
                        "specifier");
1247
        goto done;
1248
    }
1249

1250
    /* Neither is '=' alignment . */
1251
    if (format->align == '=') {
1252
        PyErr_SetString(PyExc_ValueError,
1253
                        "'=' alignment flag is not allowed in complex format "
1254
                        "specifier");
1255
        goto done;
1256
    }
1257

1258
    re = PyComplex_RealAsDouble(value);
1259
    if (re == -1.0 && PyErr_Occurred())
1260
        goto done;
1261
    im = PyComplex_ImagAsDouble(value);
1262
    if (im == -1.0 && PyErr_Occurred())
1263
        goto done;
1264

1265
    if (format->alternate)
1266
        flags |= Py_DTSF_ALT;
1267
    if (format->no_neg_0)
1268
        flags |= Py_DTSF_NO_NEG_0;
1269

1270
    if (type == '\0') {
1271
        /* Omitted type specifier. Should be like str(self). */
1272
        type = 'r';
1273
        default_precision = 0;
1274
        if (re == 0.0 && copysign(1.0, re) == 1.0)
1275
            skip_re = 1;
1276
        else
1277
            add_parens = 1;
1278
    }
1279

1280
    if (type == 'n')
1281
        /* 'n' is the same as 'g', except for the locale used to
1282
           format the result. We take care of that later. */
1283
        type = 'g';
1284

1285
    if (precision < 0)
1286
        precision = default_precision;
1287
    else if (type == 'r')
1288
        type = 'g';
1289

1290
    /* Cast "type", because if we're in unicode we need to pass an
1291
       8-bit char. This is safe, because we've restricted what "type"
1292
       can be. */
1293
    re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1294
                                   &re_float_type);
1295
    if (re_buf == NULL)
1296
        goto done;
1297
    im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1298
                                   &im_float_type);
1299
    if (im_buf == NULL)
1300
        goto done;
1301

1302
    n_re_digits = strlen(re_buf);
1303
    n_im_digits = strlen(im_buf);
1304

1305
    /* Since there is no unicode version of PyOS_double_to_string,
1306
       just use the 8 bit version and then convert to unicode. */
1307
    re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
1308
    if (re_unicode_tmp == NULL)
1309
        goto done;
1310
    i_re = 0;
1311

1312
    im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
1313
    if (im_unicode_tmp == NULL)
1314
        goto done;
1315
    i_im = 0;
1316

1317
    /* Is a sign character present in the output?  If so, remember it
1318
       and skip it */
1319
    if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1320
        re_sign_char = '-';
1321
        ++i_re;
1322
        --n_re_digits;
1323
    }
1324
    if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1325
        im_sign_char = '-';
1326
        ++i_im;
1327
        --n_im_digits;
1328
    }
1329

1330
    /* Determine if we have any "remainder" (after the digits, might include
1331
       decimal or exponent or both (or neither)) */
1332
    parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
1333
                 &n_re_remainder, &re_has_decimal);
1334
    parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
1335
                 &n_im_remainder, &im_has_decimal);
1336

1337
    /* Determine the grouping, separator, and decimal point, if any. */
1338
    if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1339
                        format->thousands_separators,
1340
                        &locale) == -1)
1341
        goto done;
1342

1343
    /* Turn off any padding. We'll do it later after we've composed
1344
       the numbers without padding. */
1345
    tmp_format.fill_char = '\0';
1346
    tmp_format.align = '<';
1347
    tmp_format.width = -1;
1348

1349
    /* Calculate how much memory we'll need. */
1350
    n_re_total = calc_number_widths(&re_spec, 0, re_sign_char,
1351
                                    i_re, i_re + n_re_digits, n_re_remainder,
1352
                                    re_has_decimal, &locale, &tmp_format,
1353
                                    &maxchar);
1354
    if (n_re_total == -1) {
1355
        goto done;
1356
    }
1357

1358
    /* Same formatting, but always include a sign, unless the real part is
1359
     * going to be omitted, in which case we use whatever sign convention was
1360
     * requested by the original format. */
1361
    if (!skip_re)
1362
        tmp_format.sign = '+';
1363
    n_im_total = calc_number_widths(&im_spec, 0, im_sign_char,
1364
                                    i_im, i_im + n_im_digits, n_im_remainder,
1365
                                    im_has_decimal, &locale, &tmp_format,
1366
                                    &maxchar);
1367
    if (n_im_total == -1) {
1368
        goto done;
1369
    }
1370

1371
    if (skip_re)
1372
        n_re_total = 0;
1373

1374
    /* Add 1 for the 'j', and optionally 2 for parens. */
1375
    calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1376
                 format->width, format->align, &lpad, &rpad, &total);
1377

1378
    if (lpad || rpad)
1379
        maxchar = Py_MAX(maxchar, format->fill_char);
1380

1381
    if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
1382
        goto done;
1383
    rkind = writer->kind;
1384
    rdata = writer->data;
1385

1386
    /* Populate the memory. First, the padding. */
1387
    result = fill_padding(writer,
1388
                          n_re_total + n_im_total + 1 + add_parens * 2,
1389
                          format->fill_char, lpad, rpad);
1390
    if (result == -1)
1391
        goto done;
1392

1393
    if (add_parens) {
1394
        PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
1395
        writer->pos++;
1396
    }
1397

1398
    if (!skip_re) {
1399
        result = fill_number(writer, &re_spec,
1400
                             re_unicode_tmp, i_re,
1401
                             NULL, 0,
1402
                             0,
1403
                             &locale, 0);
1404
        if (result == -1)
1405
            goto done;
1406
    }
1407
    result = fill_number(writer, &im_spec,
1408
                         im_unicode_tmp, i_im,
1409
                         NULL, 0,
1410
                         0,
1411
                         &locale, 0);
1412
    if (result == -1)
1413
        goto done;
1414
    PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
1415
    writer->pos++;
1416

1417
    if (add_parens) {
1418
        PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
1419
        writer->pos++;
1420
    }
1421

1422
    writer->pos += rpad;
1423

1424
done:
1425
    PyMem_Free(re_buf);
1426
    PyMem_Free(im_buf);
1427
    Py_XDECREF(re_unicode_tmp);
1428
    Py_XDECREF(im_unicode_tmp);
1429
    free_locale_info(&locale);
1430
    return result;
1431
}
1432

1433
/************************************************************************/
1434
/*********** built in formatters ****************************************/
1435
/************************************************************************/
1436
static int
1437
format_obj(PyObject *obj, _PyUnicodeWriter *writer)
1438
{
1439
    PyObject *str;
1440
    int err;
1441

1442
    str = PyObject_Str(obj);
1443
    if (str == NULL)
1444
        return -1;
1445
    err = _PyUnicodeWriter_WriteStr(writer, str);
1446
    Py_DECREF(str);
1447
    return err;
1448
}
1449

1450
int
1451
_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1452
                                PyObject *obj,
1453
                                PyObject *format_spec,
1454
                                Py_ssize_t start, Py_ssize_t end)
1455
{
1456
    InternalFormatSpec format;
1457

1458
    assert(PyUnicode_Check(obj));
1459

1460
    /* check for the special case of zero length format spec, make
1461
       it equivalent to str(obj) */
1462
    if (start == end) {
1463
        if (PyUnicode_CheckExact(obj))
1464
            return _PyUnicodeWriter_WriteStr(writer, obj);
1465
        else
1466
            return format_obj(obj, writer);
1467
    }
1468

1469
    /* parse the format_spec */
1470
    if (!parse_internal_render_format_spec(obj, format_spec, start, end,
1471
                                           &format, 's', '<'))
1472
        return -1;
1473

1474
    /* type conversion? */
1475
    switch (format.type) {
1476
    case 's':
1477
        /* no type conversion needed, already a string.  do the formatting */
1478
        return format_string_internal(obj, &format, writer);
1479
    default:
1480
        /* unknown */
1481
        unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1482
        return -1;
1483
    }
1484
}
1485

1486
int
1487
_PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1488
                             PyObject *obj,
1489
                             PyObject *format_spec,
1490
                             Py_ssize_t start, Py_ssize_t end)
1491
{
1492
    PyObject *tmp = NULL;
1493
    InternalFormatSpec format;
1494
    int result = -1;
1495

1496
    /* check for the special case of zero length format spec, make
1497
       it equivalent to str(obj) */
1498
    if (start == end) {
1499
        if (PyLong_CheckExact(obj))
1500
            return _PyLong_FormatWriter(writer, obj, 10, 0);
1501
        else
1502
            return format_obj(obj, writer);
1503
    }
1504

1505
    /* parse the format_spec */
1506
    if (!parse_internal_render_format_spec(obj, format_spec, start, end,
1507
                                           &format, 'd', '>'))
1508
        goto done;
1509

1510
    /* type conversion? */
1511
    switch (format.type) {
1512
    case 'b':
1513
    case 'c':
1514
    case 'd':
1515
    case 'o':
1516
    case 'x':
1517
    case 'X':
1518
    case 'n':
1519
        /* no type conversion needed, already an int.  do the formatting */
1520
        result = format_long_internal(obj, &format, writer);
1521
        break;
1522

1523
    case 'e':
1524
    case 'E':
1525
    case 'f':
1526
    case 'F':
1527
    case 'g':
1528
    case 'G':
1529
    case '%':
1530
        /* convert to float */
1531
        tmp = PyNumber_Float(obj);
1532
        if (tmp == NULL)
1533
            goto done;
1534
        result = format_float_internal(tmp, &format, writer);
1535
        break;
1536

1537
    default:
1538
        /* unknown */
1539
        unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1540
        goto done;
1541
    }
1542

1543
done:
1544
    Py_XDECREF(tmp);
1545
    return result;
1546
}
1547

1548
int
1549
_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1550
                              PyObject *obj,
1551
                              PyObject *format_spec,
1552
                              Py_ssize_t start, Py_ssize_t end)
1553
{
1554
    InternalFormatSpec format;
1555

1556
    /* check for the special case of zero length format spec, make
1557
       it equivalent to str(obj) */
1558
    if (start == end)
1559
        return format_obj(obj, writer);
1560

1561
    /* parse the format_spec */
1562
    if (!parse_internal_render_format_spec(obj, format_spec, start, end,
1563
                                           &format, '\0', '>'))
1564
        return -1;
1565

1566
    /* type conversion? */
1567
    switch (format.type) {
1568
    case '\0': /* No format code: like 'g', but with at least one decimal. */
1569
    case 'e':
1570
    case 'E':
1571
    case 'f':
1572
    case 'F':
1573
    case 'g':
1574
    case 'G':
1575
    case 'n':
1576
    case '%':
1577
        /* no conversion, already a float.  do the formatting */
1578
        return format_float_internal(obj, &format, writer);
1579

1580
    default:
1581
        /* unknown */
1582
        unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1583
        return -1;
1584
    }
1585
}
1586

1587
int
1588
_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1589
                                PyObject *obj,
1590
                                PyObject *format_spec,
1591
                                Py_ssize_t start, Py_ssize_t end)
1592
{
1593
    InternalFormatSpec format;
1594

1595
    /* check for the special case of zero length format spec, make
1596
       it equivalent to str(obj) */
1597
    if (start == end)
1598
        return format_obj(obj, writer);
1599

1600
    /* parse the format_spec */
1601
    if (!parse_internal_render_format_spec(obj, format_spec, start, end,
1602
                                           &format, '\0', '>'))
1603
        return -1;
1604

1605
    /* type conversion? */
1606
    switch (format.type) {
1607
    case '\0': /* No format code: like 'g', but with at least one decimal. */
1608
    case 'e':
1609
    case 'E':
1610
    case 'f':
1611
    case 'F':
1612
    case 'g':
1613
    case 'G':
1614
    case 'n':
1615
        /* no conversion, already a complex.  do the formatting */
1616
        return format_complex_internal(obj, &format, writer);
1617

1618
    default:
1619
        /* unknown */
1620
        unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1621
        return -1;
1622
    }
1623
}
1624

1625
Product

Resources

Company