Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Python/formatter_unicode.c
12 views
1
/* implements the unicode (as opposed to string) version of the
2
built-in formatters for string, int, float. that is, the versions
3
of int.__float__, etc., that take and return unicode objects */
4
5
#include "Python.h"
6
#include "pycore_fileutils.h" // _Py_GetLocaleconvNumeric()
7
#include "pycore_long.h" // _PyLong_FormatWriter()
8
#include <locale.h>
9
10
/* Raises an exception about an unknown presentation type for this
11
* type. */
12
13
static void
14
unknown_presentation_type(Py_UCS4 presentation_type,
15
const char* type_name)
16
{
17
/* %c might be out-of-range, hence the two cases. */
18
if (presentation_type > 32 && presentation_type < 128)
19
PyErr_Format(PyExc_ValueError,
20
"Unknown format code '%c' "
21
"for object of type '%.200s'",
22
(char)presentation_type,
23
type_name);
24
else
25
PyErr_Format(PyExc_ValueError,
26
"Unknown format code '\\x%x' "
27
"for object of type '%.200s'",
28
(unsigned int)presentation_type,
29
type_name);
30
}
31
32
static void
33
invalid_thousands_separator_type(char specifier, Py_UCS4 presentation_type)
34
{
35
assert(specifier == ',' || specifier == '_');
36
if (presentation_type > 32 && presentation_type < 128)
37
PyErr_Format(PyExc_ValueError,
38
"Cannot specify '%c' with '%c'.",
39
specifier, (char)presentation_type);
40
else
41
PyErr_Format(PyExc_ValueError,
42
"Cannot specify '%c' with '\\x%x'.",
43
specifier, (unsigned int)presentation_type);
44
}
45
46
static void
47
invalid_comma_and_underscore(void)
48
{
49
PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
50
}
51
52
/*
53
get_integer consumes 0 or more decimal digit characters from an
54
input string, updates *result with the corresponding positive
55
integer, and returns the number of digits consumed.
56
57
returns -1 on error.
58
*/
59
static int
60
get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
61
Py_ssize_t *result)
62
{
63
Py_ssize_t accumulator, digitval, pos = *ppos;
64
int numdigits;
65
int kind = PyUnicode_KIND(str);
66
const void *data = PyUnicode_DATA(str);
67
68
accumulator = numdigits = 0;
69
for (; pos < end; pos++, numdigits++) {
70
digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
71
if (digitval < 0)
72
break;
73
/*
74
Detect possible overflow before it happens:
75
76
accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
77
accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
78
*/
79
if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
80
PyErr_Format(PyExc_ValueError,
81
"Too many decimal digits in format string");
82
*ppos = pos;
83
return -1;
84
}
85
accumulator = accumulator * 10 + digitval;
86
}
87
*ppos = pos;
88
*result = accumulator;
89
return numdigits;
90
}
91
92
/************************************************************************/
93
/*********** standard format specifier parsing **************************/
94
/************************************************************************/
95
96
/* returns true if this character is a specifier alignment token */
97
Py_LOCAL_INLINE(int)
98
is_alignment_token(Py_UCS4 c)
99
{
100
switch (c) {
101
case '<': case '>': case '=': case '^':
102
return 1;
103
default:
104
return 0;
105
}
106
}
107
108
/* returns true if this character is a sign element */
109
Py_LOCAL_INLINE(int)
110
is_sign_element(Py_UCS4 c)
111
{
112
switch (c) {
113
case ' ': case '+': case '-':
114
return 1;
115
default:
116
return 0;
117
}
118
}
119
120
/* Locale type codes. LT_NO_LOCALE must be zero. */
121
enum LocaleType {
122
LT_NO_LOCALE = 0,
123
LT_DEFAULT_LOCALE = ',',
124
LT_UNDERSCORE_LOCALE = '_',
125
LT_UNDER_FOUR_LOCALE,
126
LT_CURRENT_LOCALE
127
};
128
129
typedef struct {
130
Py_UCS4 fill_char;
131
Py_UCS4 align;
132
int alternate;
133
int no_neg_0;
134
Py_UCS4 sign;
135
Py_ssize_t width;
136
enum LocaleType thousands_separators;
137
Py_ssize_t precision;
138
Py_UCS4 type;
139
} InternalFormatSpec;
140
141
142
/*
143
ptr points to the start of the format_spec, end points just past its end.
144
fills in format with the parsed information.
145
returns 1 on success, 0 on failure.
146
if failure, sets the exception
147
*/
148
static int
149
parse_internal_render_format_spec(PyObject *obj,
150
PyObject *format_spec,
151
Py_ssize_t start, Py_ssize_t end,
152
InternalFormatSpec *format,
153
char default_type,
154
char default_align)
155
{
156
Py_ssize_t pos = start;
157
int kind = PyUnicode_KIND(format_spec);
158
const void *data = PyUnicode_DATA(format_spec);
159
/* end-pos is used throughout this code to specify the length of
160
the input string */
161
#define READ_spec(index) PyUnicode_READ(kind, data, index)
162
163
Py_ssize_t consumed;
164
int align_specified = 0;
165
int fill_char_specified = 0;
166
167
format->fill_char = ' ';
168
format->align = default_align;
169
format->alternate = 0;
170
format->no_neg_0 = 0;
171
format->sign = '\0';
172
format->width = -1;
173
format->thousands_separators = LT_NO_LOCALE;
174
format->precision = -1;
175
format->type = default_type;
176
177
/* If the second char is an alignment token,
178
then parse the fill char */
179
if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
180
format->align = READ_spec(pos+1);
181
format->fill_char = READ_spec(pos);
182
fill_char_specified = 1;
183
align_specified = 1;
184
pos += 2;
185
}
186
else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
187
format->align = READ_spec(pos);
188
align_specified = 1;
189
++pos;
190
}
191
192
/* Parse the various sign options */
193
if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
194
format->sign = READ_spec(pos);
195
++pos;
196
}
197
198
/* If the next character is z, request coercion of negative 0.
199
Applies only to floats. */
200
if (end-pos >= 1 && READ_spec(pos) == 'z') {
201
format->no_neg_0 = 1;
202
++pos;
203
}
204
205
/* If the next character is #, we're in alternate mode. This only
206
applies to integers. */
207
if (end-pos >= 1 && READ_spec(pos) == '#') {
208
format->alternate = 1;
209
++pos;
210
}
211
212
/* The special case for 0-padding (backwards compat) */
213
if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
214
format->fill_char = '0';
215
if (!align_specified && default_align == '>') {
216
format->align = '=';
217
}
218
++pos;
219
}
220
221
consumed = get_integer(format_spec, &pos, end, &format->width);
222
if (consumed == -1)
223
/* Overflow error. Exception already set. */
224
return 0;
225
226
/* If consumed is 0, we didn't consume any characters for the
227
width. In that case, reset the width to -1, because
228
get_integer() will have set it to zero. -1 is how we record
229
that the width wasn't specified. */
230
if (consumed == 0)
231
format->width = -1;
232
233
/* Comma signifies add thousands separators */
234
if (end-pos && READ_spec(pos) == ',') {
235
format->thousands_separators = LT_DEFAULT_LOCALE;
236
++pos;
237
}
238
/* Underscore signifies add thousands separators */
239
if (end-pos && READ_spec(pos) == '_') {
240
if (format->thousands_separators != LT_NO_LOCALE) {
241
invalid_comma_and_underscore();
242
return 0;
243
}
244
format->thousands_separators = LT_UNDERSCORE_LOCALE;
245
++pos;
246
}
247
if (end-pos && READ_spec(pos) == ',') {
248
if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
249
invalid_comma_and_underscore();
250
return 0;
251
}
252
}
253
254
/* Parse field precision */
255
if (end-pos && READ_spec(pos) == '.') {
256
++pos;
257
258
consumed = get_integer(format_spec, &pos, end, &format->precision);
259
if (consumed == -1)
260
/* Overflow error. Exception already set. */
261
return 0;
262
263
/* Not having a precision after a dot is an error. */
264
if (consumed == 0) {
265
PyErr_Format(PyExc_ValueError,
266
"Format specifier missing precision");
267
return 0;
268
}
269
270
}
271
272
/* Finally, parse the type field. */
273
274
if (end-pos > 1) {
275
/* More than one char remains, so this is an invalid format
276
specifier. */
277
/* Create a temporary object that contains the format spec we're
278
operating on. It's format_spec[start:end] (in Python syntax). */
279
PyObject* actual_format_spec = PyUnicode_FromKindAndData(kind,
280
(char*)data + kind*start,
281
end-start);
282
if (actual_format_spec != NULL) {
283
PyErr_Format(PyExc_ValueError,
284
"Invalid format specifier '%U' for object of type '%.200s'",
285
actual_format_spec, Py_TYPE(obj)->tp_name);
286
Py_DECREF(actual_format_spec);
287
}
288
return 0;
289
}
290
291
if (end-pos == 1) {
292
format->type = READ_spec(pos);
293
++pos;
294
}
295
296
/* Do as much validating as we can, just by looking at the format
297
specifier. Do not take into account what type of formatting
298
we're doing (int, float, string). */
299
300
if (format->thousands_separators) {
301
switch (format->type) {
302
case 'd':
303
case 'e':
304
case 'f':
305
case 'g':
306
case 'E':
307
case 'G':
308
case '%':
309
case 'F':
310
case '\0':
311
/* These are allowed. See PEP 378.*/
312
break;
313
case 'b':
314
case 'o':
315
case 'x':
316
case 'X':
317
/* Underscores are allowed in bin/oct/hex. See PEP 515. */
318
if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
319
/* Every four digits, not every three, in bin/oct/hex. */
320
format->thousands_separators = LT_UNDER_FOUR_LOCALE;
321
break;
322
}
323
/* fall through */
324
default:
325
invalid_thousands_separator_type(format->thousands_separators, format->type);
326
return 0;
327
}
328
}
329
330
assert (format->align <= 127);
331
assert (format->sign <= 127);
332
return 1;
333
}
334
335
/* Calculate the padding needed. */
336
static void
337
calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
338
Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
339
Py_ssize_t *n_total)
340
{
341
if (width >= 0) {
342
if (nchars > width)
343
*n_total = nchars;
344
else
345
*n_total = width;
346
}
347
else {
348
/* not specified, use all of the chars and no more */
349
*n_total = nchars;
350
}
351
352
/* Figure out how much leading space we need, based on the
353
aligning */
354
if (align == '>')
355
*n_lpadding = *n_total - nchars;
356
else if (align == '^')
357
*n_lpadding = (*n_total - nchars) / 2;
358
else if (align == '<' || align == '=')
359
*n_lpadding = 0;
360
else {
361
/* We should never have an unspecified alignment. */
362
Py_UNREACHABLE();
363
}
364
365
*n_rpadding = *n_total - nchars - *n_lpadding;
366
}
367
368
/* Do the padding, and return a pointer to where the caller-supplied
369
content goes. */
370
static int
371
fill_padding(_PyUnicodeWriter *writer,
372
Py_ssize_t nchars,
373
Py_UCS4 fill_char, Py_ssize_t n_lpadding,
374
Py_ssize_t n_rpadding)
375
{
376
Py_ssize_t pos;
377
378
/* Pad on left. */
379
if (n_lpadding) {
380
pos = writer->pos;
381
_PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
382
}
383
384
/* Pad on right. */
385
if (n_rpadding) {
386
pos = writer->pos + nchars + n_lpadding;
387
_PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
388
}
389
390
/* Pointer to the user content. */
391
writer->pos += n_lpadding;
392
return 0;
393
}
394
395
/************************************************************************/
396
/*********** common routines for numeric formatting *********************/
397
/************************************************************************/
398
399
/* Locale info needed for formatting integers and the part of floats
400
before and including the decimal. Note that locales only support
401
8-bit chars, not unicode. */
402
typedef struct {
403
PyObject *decimal_point;
404
PyObject *thousands_sep;
405
const char *grouping;
406
char *grouping_buffer;
407
} LocaleInfo;
408
409
#define LocaleInfo_STATIC_INIT {0, 0, 0, 0}
410
411
/* describes the layout for an integer, see the comment in
412
calc_number_widths() for details */
413
typedef struct {
414
Py_ssize_t n_lpadding;
415
Py_ssize_t n_prefix;
416
Py_ssize_t n_spadding;
417
Py_ssize_t n_rpadding;
418
char sign;
419
Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
420
Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
421
any grouping chars. */
422
Py_ssize_t n_decimal; /* 0 if only an integer */
423
Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
424
excluding the decimal itself, if
425
present. */
426
427
/* These 2 are not the widths of fields, but are needed by
428
STRINGLIB_GROUPING. */
429
Py_ssize_t n_digits; /* The number of digits before a decimal
430
or exponent. */
431
Py_ssize_t n_min_width; /* The min_width we used when we computed
432
the n_grouped_digits width. */
433
} NumberFieldWidths;
434
435
436
/* Given a number of the form:
437
digits[remainder]
438
where ptr points to the start and end points to the end, find where
439
the integer part ends. This could be a decimal, an exponent, both,
440
or neither.
441
If a decimal point is present, set *has_decimal and increment
442
remainder beyond it.
443
Results are undefined (but shouldn't crash) for improperly
444
formatted strings.
445
*/
446
static void
447
parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
448
Py_ssize_t *n_remainder, int *has_decimal)
449
{
450
Py_ssize_t remainder;
451
int kind = PyUnicode_KIND(s);
452
const void *data = PyUnicode_DATA(s);
453
454
while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
455
++pos;
456
remainder = pos;
457
458
/* Does remainder start with a decimal point? */
459
*has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
460
461
/* Skip the decimal point. */
462
if (*has_decimal)
463
remainder++;
464
465
*n_remainder = end - remainder;
466
}
467
468
/* not all fields of format are used. for example, precision is
469
unused. should this take discrete params in order to be more clear
470
about what it does? or is passing a single format parameter easier
471
and more efficient enough to justify a little obfuscation?
472
Return -1 on error. */
473
static Py_ssize_t
474
calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
475
Py_UCS4 sign_char, Py_ssize_t n_start,
476
Py_ssize_t n_end, Py_ssize_t n_remainder,
477
int has_decimal, const LocaleInfo *locale,
478
const InternalFormatSpec *format, Py_UCS4 *maxchar)
479
{
480
Py_ssize_t n_non_digit_non_padding;
481
Py_ssize_t n_padding;
482
483
spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
484
spec->n_lpadding = 0;
485
spec->n_prefix = n_prefix;
486
spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
487
spec->n_remainder = n_remainder;
488
spec->n_spadding = 0;
489
spec->n_rpadding = 0;
490
spec->sign = '\0';
491
spec->n_sign = 0;
492
493
/* the output will look like:
494
| |
495
| <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
496
| |
497
498
sign is computed from format->sign and the actual
499
sign of the number
500
501
prefix is given (it's for the '0x' prefix)
502
503
digits is already known
504
505
the total width is either given, or computed from the
506
actual digits
507
508
only one of lpadding, spadding, and rpadding can be non-zero,
509
and it's calculated from the width and other fields
510
*/
511
512
/* compute the various parts we're going to write */
513
switch (format->sign) {
514
case '+':
515
/* always put a + or - */
516
spec->n_sign = 1;
517
spec->sign = (sign_char == '-' ? '-' : '+');
518
break;
519
case ' ':
520
spec->n_sign = 1;
521
spec->sign = (sign_char == '-' ? '-' : ' ');
522
break;
523
default:
524
/* Not specified, or the default (-) */
525
if (sign_char == '-') {
526
spec->n_sign = 1;
527
spec->sign = '-';
528
}
529
}
530
531
/* The number of chars used for non-digits and non-padding. */
532
n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
533
spec->n_remainder;
534
535
/* min_width can go negative, that's okay. format->width == -1 means
536
we don't care. */
537
if (format->fill_char == '0' && format->align == '=')
538
spec->n_min_width = format->width - n_non_digit_non_padding;
539
else
540
spec->n_min_width = 0;
541
542
if (spec->n_digits == 0)
543
/* This case only occurs when using 'c' formatting, we need
544
to special case it because the grouping code always wants
545
to have at least one character. */
546
spec->n_grouped_digits = 0;
547
else {
548
Py_UCS4 grouping_maxchar;
549
spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
550
NULL, 0,
551
NULL, 0, spec->n_digits,
552
spec->n_min_width,
553
locale->grouping, locale->thousands_sep, &grouping_maxchar);
554
if (spec->n_grouped_digits == -1) {
555
return -1;
556
}
557
*maxchar = Py_MAX(*maxchar, grouping_maxchar);
558
}
559
560
/* Given the desired width and the total of digit and non-digit
561
space we consume, see if we need any padding. format->width can
562
be negative (meaning no padding), but this code still works in
563
that case. */
564
n_padding = format->width -
565
(n_non_digit_non_padding + spec->n_grouped_digits);
566
if (n_padding > 0) {
567
/* Some padding is needed. Determine if it's left, space, or right. */
568
switch (format->align) {
569
case '<':
570
spec->n_rpadding = n_padding;
571
break;
572
case '^':
573
spec->n_lpadding = n_padding / 2;
574
spec->n_rpadding = n_padding - spec->n_lpadding;
575
break;
576
case '=':
577
spec->n_spadding = n_padding;
578
break;
579
case '>':
580
spec->n_lpadding = n_padding;
581
break;
582
default:
583
/* Shouldn't get here */
584
Py_UNREACHABLE();
585
}
586
}
587
588
if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
589
*maxchar = Py_MAX(*maxchar, format->fill_char);
590
591
if (spec->n_decimal)
592
*maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
593
594
return spec->n_lpadding + spec->n_sign + spec->n_prefix +
595
spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
596
spec->n_remainder + spec->n_rpadding;
597
}
598
599
/* Fill in the digit parts of a number's string representation,
600
as determined in calc_number_widths().
601
Return -1 on error, or 0 on success. */
602
static int
603
fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
604
PyObject *digits, Py_ssize_t d_start,
605
PyObject *prefix, Py_ssize_t p_start,
606
Py_UCS4 fill_char,
607
LocaleInfo *locale, int toupper)
608
{
609
/* Used to keep track of digits, decimal, and remainder. */
610
Py_ssize_t d_pos = d_start;
611
const int kind = writer->kind;
612
const void *data = writer->data;
613
Py_ssize_t r;
614
615
if (spec->n_lpadding) {
616
_PyUnicode_FastFill(writer->buffer,
617
writer->pos, spec->n_lpadding, fill_char);
618
writer->pos += spec->n_lpadding;
619
}
620
if (spec->n_sign == 1) {
621
PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
622
writer->pos++;
623
}
624
if (spec->n_prefix) {
625
_PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
626
prefix, p_start,
627
spec->n_prefix);
628
if (toupper) {
629
Py_ssize_t t;
630
for (t = 0; t < spec->n_prefix; t++) {
631
Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
632
c = Py_TOUPPER(c);
633
assert (c <= 127);
634
PyUnicode_WRITE(kind, data, writer->pos + t, c);
635
}
636
}
637
writer->pos += spec->n_prefix;
638
}
639
if (spec->n_spadding) {
640
_PyUnicode_FastFill(writer->buffer,
641
writer->pos, spec->n_spadding, fill_char);
642
writer->pos += spec->n_spadding;
643
}
644
645
/* Only for type 'c' special case, it has no digits. */
646
if (spec->n_digits != 0) {
647
/* Fill the digits with InsertThousandsGrouping. */
648
r = _PyUnicode_InsertThousandsGrouping(
649
writer, spec->n_grouped_digits,
650
digits, d_pos, spec->n_digits,
651
spec->n_min_width,
652
locale->grouping, locale->thousands_sep, NULL);
653
if (r == -1)
654
return -1;
655
assert(r == spec->n_grouped_digits);
656
d_pos += spec->n_digits;
657
}
658
if (toupper) {
659
Py_ssize_t t;
660
for (t = 0; t < spec->n_grouped_digits; t++) {
661
Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
662
c = Py_TOUPPER(c);
663
if (c > 127) {
664
PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
665
return -1;
666
}
667
PyUnicode_WRITE(kind, data, writer->pos + t, c);
668
}
669
}
670
writer->pos += spec->n_grouped_digits;
671
672
if (spec->n_decimal) {
673
_PyUnicode_FastCopyCharacters(
674
writer->buffer, writer->pos,
675
locale->decimal_point, 0, spec->n_decimal);
676
writer->pos += spec->n_decimal;
677
d_pos += 1;
678
}
679
680
if (spec->n_remainder) {
681
_PyUnicode_FastCopyCharacters(
682
writer->buffer, writer->pos,
683
digits, d_pos, spec->n_remainder);
684
writer->pos += spec->n_remainder;
685
/* d_pos += spec->n_remainder; */
686
}
687
688
if (spec->n_rpadding) {
689
_PyUnicode_FastFill(writer->buffer,
690
writer->pos, spec->n_rpadding,
691
fill_char);
692
writer->pos += spec->n_rpadding;
693
}
694
return 0;
695
}
696
697
static const char no_grouping[1] = {CHAR_MAX};
698
699
/* Find the decimal point character(s?), thousands_separator(s?), and
700
grouping description, either for the current locale if type is
701
LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
702
LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
703
static int
704
get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
705
{
706
switch (type) {
707
case LT_CURRENT_LOCALE: {
708
struct lconv *lc = localeconv();
709
if (_Py_GetLocaleconvNumeric(lc,
710
&locale_info->decimal_point,
711
&locale_info->thousands_sep) < 0) {
712
return -1;
713
}
714
715
/* localeconv() grouping can become a dangling pointer or point
716
to a different string if another thread calls localeconv() during
717
the string formatting. Copy the string to avoid this risk. */
718
locale_info->grouping_buffer = _PyMem_Strdup(lc->grouping);
719
if (locale_info->grouping_buffer == NULL) {
720
PyErr_NoMemory();
721
return -1;
722
}
723
locale_info->grouping = locale_info->grouping_buffer;
724
break;
725
}
726
case LT_DEFAULT_LOCALE:
727
case LT_UNDERSCORE_LOCALE:
728
case LT_UNDER_FOUR_LOCALE:
729
locale_info->decimal_point = PyUnicode_FromOrdinal('.');
730
locale_info->thousands_sep = PyUnicode_FromOrdinal(
731
type == LT_DEFAULT_LOCALE ? ',' : '_');
732
if (!locale_info->decimal_point || !locale_info->thousands_sep)
733
return -1;
734
if (type != LT_UNDER_FOUR_LOCALE)
735
locale_info->grouping = "\3"; /* Group every 3 characters. The
736
(implicit) trailing 0 means repeat
737
infinitely. */
738
else
739
locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
740
break;
741
case LT_NO_LOCALE:
742
locale_info->decimal_point = PyUnicode_FromOrdinal('.');
743
locale_info->thousands_sep = PyUnicode_New(0, 0);
744
if (!locale_info->decimal_point || !locale_info->thousands_sep)
745
return -1;
746
locale_info->grouping = no_grouping;
747
break;
748
}
749
return 0;
750
}
751
752
static void
753
free_locale_info(LocaleInfo *locale_info)
754
{
755
Py_XDECREF(locale_info->decimal_point);
756
Py_XDECREF(locale_info->thousands_sep);
757
PyMem_Free(locale_info->grouping_buffer);
758
}
759
760
/************************************************************************/
761
/*********** string formatting ******************************************/
762
/************************************************************************/
763
764
static int
765
format_string_internal(PyObject *value, const InternalFormatSpec *format,
766
_PyUnicodeWriter *writer)
767
{
768
Py_ssize_t lpad;
769
Py_ssize_t rpad;
770
Py_ssize_t total;
771
Py_ssize_t len;
772
int result = -1;
773
Py_UCS4 maxchar;
774
775
assert(PyUnicode_IS_READY(value));
776
len = PyUnicode_GET_LENGTH(value);
777
778
/* sign is not allowed on strings */
779
if (format->sign != '\0') {
780
if (format->sign == ' ') {
781
PyErr_SetString(PyExc_ValueError,
782
"Space not allowed in string format specifier");
783
}
784
else {
785
PyErr_SetString(PyExc_ValueError,
786
"Sign not allowed in string format specifier");
787
}
788
goto done;
789
}
790
791
/* negative 0 coercion is not allowed on strings */
792
if (format->no_neg_0) {
793
PyErr_SetString(PyExc_ValueError,
794
"Negative zero coercion (z) not allowed in string format "
795
"specifier");
796
goto done;
797
}
798
799
/* alternate is not allowed on strings */
800
if (format->alternate) {
801
PyErr_SetString(PyExc_ValueError,
802
"Alternate form (#) not allowed in string format "
803
"specifier");
804
goto done;
805
}
806
807
/* '=' alignment not allowed on strings */
808
if (format->align == '=') {
809
PyErr_SetString(PyExc_ValueError,
810
"'=' alignment not allowed "
811
"in string format specifier");
812
goto done;
813
}
814
815
if ((format->width == -1 || format->width <= len)
816
&& (format->precision == -1 || format->precision >= len)) {
817
/* Fast path */
818
return _PyUnicodeWriter_WriteStr(writer, value);
819
}
820
821
/* if precision is specified, output no more that format.precision
822
characters */
823
if (format->precision >= 0 && len >= format->precision) {
824
len = format->precision;
825
}
826
827
calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
828
829
maxchar = writer->maxchar;
830
if (lpad != 0 || rpad != 0)
831
maxchar = Py_MAX(maxchar, format->fill_char);
832
if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
833
Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
834
maxchar = Py_MAX(maxchar, valmaxchar);
835
}
836
837
/* allocate the resulting string */
838
if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
839
goto done;
840
841
/* Write into that space. First the padding. */
842
result = fill_padding(writer, len, format->fill_char, lpad, rpad);
843
if (result == -1)
844
goto done;
845
846
/* Then the source string. */
847
if (len) {
848
_PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
849
value, 0, len);
850
}
851
writer->pos += (len + rpad);
852
result = 0;
853
854
done:
855
return result;
856
}
857
858
859
/************************************************************************/
860
/*********** long formatting ********************************************/
861
/************************************************************************/
862
863
static int
864
format_long_internal(PyObject *value, const InternalFormatSpec *format,
865
_PyUnicodeWriter *writer)
866
{
867
int result = -1;
868
Py_UCS4 maxchar = 127;
869
PyObject *tmp = NULL;
870
Py_ssize_t inumeric_chars;
871
Py_UCS4 sign_char = '\0';
872
Py_ssize_t n_digits; /* count of digits need from the computed
873
string */
874
Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
875
produces non-digits */
876
Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
877
Py_ssize_t n_total;
878
Py_ssize_t prefix = 0;
879
NumberFieldWidths spec;
880
long x;
881
882
/* Locale settings, either from the actual locale or
883
from a hard-code pseudo-locale */
884
LocaleInfo locale = LocaleInfo_STATIC_INIT;
885
886
/* no precision allowed on integers */
887
if (format->precision != -1) {
888
PyErr_SetString(PyExc_ValueError,
889
"Precision not allowed in integer format specifier");
890
goto done;
891
}
892
/* no negative zero coercion on integers */
893
if (format->no_neg_0) {
894
PyErr_SetString(PyExc_ValueError,
895
"Negative zero coercion (z) not allowed in integer"
896
" format specifier");
897
goto done;
898
}
899
900
/* special case for character formatting */
901
if (format->type == 'c') {
902
/* error to specify a sign */
903
if (format->sign != '\0') {
904
PyErr_SetString(PyExc_ValueError,
905
"Sign not allowed with integer"
906
" format specifier 'c'");
907
goto done;
908
}
909
/* error to request alternate format */
910
if (format->alternate) {
911
PyErr_SetString(PyExc_ValueError,
912
"Alternate form (#) not allowed with integer"
913
" format specifier 'c'");
914
goto done;
915
}
916
917
/* taken from unicodeobject.c formatchar() */
918
/* Integer input truncated to a character */
919
x = PyLong_AsLong(value);
920
if (x == -1 && PyErr_Occurred())
921
goto done;
922
if (x < 0 || x > 0x10ffff) {
923
PyErr_SetString(PyExc_OverflowError,
924
"%c arg not in range(0x110000)");
925
goto done;
926
}
927
tmp = PyUnicode_FromOrdinal(x);
928
inumeric_chars = 0;
929
n_digits = 1;
930
maxchar = Py_MAX(maxchar, (Py_UCS4)x);
931
932
/* As a sort-of hack, we tell calc_number_widths that we only
933
have "remainder" characters. calc_number_widths thinks
934
these are characters that don't get formatted, only copied
935
into the output string. We do this for 'c' formatting,
936
because the characters are likely to be non-digits. */
937
n_remainder = 1;
938
}
939
else {
940
int base;
941
int leading_chars_to_skip = 0; /* Number of characters added by
942
PyNumber_ToBase that we want to
943
skip over. */
944
945
/* Compute the base and how many characters will be added by
946
PyNumber_ToBase */
947
switch (format->type) {
948
case 'b':
949
base = 2;
950
leading_chars_to_skip = 2; /* 0b */
951
break;
952
case 'o':
953
base = 8;
954
leading_chars_to_skip = 2; /* 0o */
955
break;
956
case 'x':
957
case 'X':
958
base = 16;
959
leading_chars_to_skip = 2; /* 0x */
960
break;
961
default: /* shouldn't be needed, but stops a compiler warning */
962
case 'd':
963
case 'n':
964
base = 10;
965
break;
966
}
967
968
if (format->sign != '+' && format->sign != ' '
969
&& format->width == -1
970
&& format->type != 'X' && format->type != 'n'
971
&& !format->thousands_separators
972
&& PyLong_CheckExact(value))
973
{
974
/* Fast path */
975
return _PyLong_FormatWriter(writer, value, base, format->alternate);
976
}
977
978
/* The number of prefix chars is the same as the leading
979
chars to skip */
980
if (format->alternate)
981
n_prefix = leading_chars_to_skip;
982
983
/* Do the hard part, converting to a string in a given base */
984
tmp = _PyLong_Format(value, base);
985
if (tmp == NULL)
986
goto done;
987
988
inumeric_chars = 0;
989
n_digits = PyUnicode_GET_LENGTH(tmp);
990
991
prefix = inumeric_chars;
992
993
/* Is a sign character present in the output? If so, remember it
994
and skip it */
995
if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
996
sign_char = '-';
997
++prefix;
998
++leading_chars_to_skip;
999
}
1000
1001
/* Skip over the leading chars (0x, 0b, etc.) */
1002
n_digits -= leading_chars_to_skip;
1003
inumeric_chars += leading_chars_to_skip;
1004
}
1005
1006
/* Determine the grouping, separator, and decimal point, if any. */
1007
if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1008
format->thousands_separators,
1009
&locale) == -1)
1010
goto done;
1011
1012
/* Calculate how much memory we'll need. */
1013
n_total = calc_number_widths(&spec, n_prefix, sign_char, inumeric_chars,
1014
inumeric_chars + n_digits, n_remainder, 0,
1015
&locale, format, &maxchar);
1016
if (n_total == -1) {
1017
goto done;
1018
}
1019
1020
/* Allocate the memory. */
1021
if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
1022
goto done;
1023
1024
/* Populate the memory. */
1025
result = fill_number(writer, &spec,
1026
tmp, inumeric_chars,
1027
tmp, prefix, format->fill_char,
1028
&locale, format->type == 'X');
1029
1030
done:
1031
Py_XDECREF(tmp);
1032
free_locale_info(&locale);
1033
return result;
1034
}
1035
1036
/************************************************************************/
1037
/*********** float formatting *******************************************/
1038
/************************************************************************/
1039
1040
/* much of this is taken from unicodeobject.c */
1041
static int
1042
format_float_internal(PyObject *value,
1043
const InternalFormatSpec *format,
1044
_PyUnicodeWriter *writer)
1045
{
1046
char *buf = NULL; /* buffer returned from PyOS_double_to_string */
1047
Py_ssize_t n_digits;
1048
Py_ssize_t n_remainder;
1049
Py_ssize_t n_total;
1050
int has_decimal;
1051
double val;
1052
int precision, default_precision = 6;
1053
Py_UCS4 type = format->type;
1054
int add_pct = 0;
1055
Py_ssize_t index;
1056
NumberFieldWidths spec;
1057
int flags = 0;
1058
int result = -1;
1059
Py_UCS4 maxchar = 127;
1060
Py_UCS4 sign_char = '\0';
1061
int float_type; /* Used to see if we have a nan, inf, or regular float. */
1062
PyObject *unicode_tmp = NULL;
1063
1064
/* Locale settings, either from the actual locale or
1065
from a hard-code pseudo-locale */
1066
LocaleInfo locale = LocaleInfo_STATIC_INIT;
1067
1068
if (format->precision > INT_MAX) {
1069
PyErr_SetString(PyExc_ValueError, "precision too big");
1070
goto done;
1071
}
1072
precision = (int)format->precision;
1073
1074
if (format->alternate)
1075
flags |= Py_DTSF_ALT;
1076
if (format->no_neg_0)
1077
flags |= Py_DTSF_NO_NEG_0;
1078
1079
if (type == '\0') {
1080
/* Omitted type specifier. Behaves in the same way as repr(x)
1081
and str(x) if no precision is given, else like 'g', but with
1082
at least one digit after the decimal point. */
1083
flags |= Py_DTSF_ADD_DOT_0;
1084
type = 'r';
1085
default_precision = 0;
1086
}
1087
1088
if (type == 'n')
1089
/* 'n' is the same as 'g', except for the locale used to
1090
format the result. We take care of that later. */
1091
type = 'g';
1092
1093
val = PyFloat_AsDouble(value);
1094
if (val == -1.0 && PyErr_Occurred())
1095
goto done;
1096
1097
if (type == '%') {
1098
type = 'f';
1099
val *= 100;
1100
add_pct = 1;
1101
}
1102
1103
if (precision < 0)
1104
precision = default_precision;
1105
else if (type == 'r')
1106
type = 'g';
1107
1108
/* Cast "type", because if we're in unicode we need to pass an
1109
8-bit char. This is safe, because we've restricted what "type"
1110
can be. */
1111
buf = PyOS_double_to_string(val, (char)type, precision, flags,
1112
&float_type);
1113
if (buf == NULL)
1114
goto done;
1115
n_digits = strlen(buf);
1116
1117
if (add_pct) {
1118
/* We know that buf has a trailing zero (since we just called
1119
strlen() on it), and we don't use that fact any more. So we
1120
can just write over the trailing zero. */
1121
buf[n_digits] = '%';
1122
n_digits += 1;
1123
}
1124
1125
if (format->sign != '+' && format->sign != ' '
1126
&& format->width == -1
1127
&& format->type != 'n'
1128
&& !format->thousands_separators)
1129
{
1130
/* Fast path */
1131
result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
1132
PyMem_Free(buf);
1133
return result;
1134
}
1135
1136
/* Since there is no unicode version of PyOS_double_to_string,
1137
just use the 8 bit version and then convert to unicode. */
1138
unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
1139
PyMem_Free(buf);
1140
if (unicode_tmp == NULL)
1141
goto done;
1142
1143
/* Is a sign character present in the output? If so, remember it
1144
and skip it */
1145
index = 0;
1146
if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1147
sign_char = '-';
1148
++index;
1149
--n_digits;
1150
}
1151
1152
/* Determine if we have any "remainder" (after the digits, might include
1153
decimal or exponent or both (or neither)) */
1154
parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1155
1156
/* Determine the grouping, separator, and decimal point, if any. */
1157
if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1158
format->thousands_separators,
1159
&locale) == -1)
1160
goto done;
1161
1162
/* Calculate how much memory we'll need. */
1163
n_total = calc_number_widths(&spec, 0, sign_char, index,
1164
index + n_digits, n_remainder, has_decimal,
1165
&locale, format, &maxchar);
1166
if (n_total == -1) {
1167
goto done;
1168
}
1169
1170
/* Allocate the memory. */
1171
if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
1172
goto done;
1173
1174
/* Populate the memory. */
1175
result = fill_number(writer, &spec,
1176
unicode_tmp, index,
1177
NULL, 0, format->fill_char,
1178
&locale, 0);
1179
1180
done:
1181
Py_XDECREF(unicode_tmp);
1182
free_locale_info(&locale);
1183
return result;
1184
}
1185
1186
/************************************************************************/
1187
/*********** complex formatting *****************************************/
1188
/************************************************************************/
1189
1190
static int
1191
format_complex_internal(PyObject *value,
1192
const InternalFormatSpec *format,
1193
_PyUnicodeWriter *writer)
1194
{
1195
double re;
1196
double im;
1197
char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1198
char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1199
1200
InternalFormatSpec tmp_format = *format;
1201
Py_ssize_t n_re_digits;
1202
Py_ssize_t n_im_digits;
1203
Py_ssize_t n_re_remainder;
1204
Py_ssize_t n_im_remainder;
1205
Py_ssize_t n_re_total;
1206
Py_ssize_t n_im_total;
1207
int re_has_decimal;
1208
int im_has_decimal;
1209
int precision, default_precision = 6;
1210
Py_UCS4 type = format->type;
1211
Py_ssize_t i_re;
1212
Py_ssize_t i_im;
1213
NumberFieldWidths re_spec;
1214
NumberFieldWidths im_spec;
1215
int flags = 0;
1216
int result = -1;
1217
Py_UCS4 maxchar = 127;
1218
int rkind;
1219
void *rdata;
1220
Py_UCS4 re_sign_char = '\0';
1221
Py_UCS4 im_sign_char = '\0';
1222
int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1223
int im_float_type;
1224
int add_parens = 0;
1225
int skip_re = 0;
1226
Py_ssize_t lpad;
1227
Py_ssize_t rpad;
1228
Py_ssize_t total;
1229
PyObject *re_unicode_tmp = NULL;
1230
PyObject *im_unicode_tmp = NULL;
1231
1232
/* Locale settings, either from the actual locale or
1233
from a hard-code pseudo-locale */
1234
LocaleInfo locale = LocaleInfo_STATIC_INIT;
1235
1236
if (format->precision > INT_MAX) {
1237
PyErr_SetString(PyExc_ValueError, "precision too big");
1238
goto done;
1239
}
1240
precision = (int)format->precision;
1241
1242
/* Zero padding is not allowed. */
1243
if (format->fill_char == '0') {
1244
PyErr_SetString(PyExc_ValueError,
1245
"Zero padding is not allowed in complex format "
1246
"specifier");
1247
goto done;
1248
}
1249
1250
/* Neither is '=' alignment . */
1251
if (format->align == '=') {
1252
PyErr_SetString(PyExc_ValueError,
1253
"'=' alignment flag is not allowed in complex format "
1254
"specifier");
1255
goto done;
1256
}
1257
1258
re = PyComplex_RealAsDouble(value);
1259
if (re == -1.0 && PyErr_Occurred())
1260
goto done;
1261
im = PyComplex_ImagAsDouble(value);
1262
if (im == -1.0 && PyErr_Occurred())
1263
goto done;
1264
1265
if (format->alternate)
1266
flags |= Py_DTSF_ALT;
1267
if (format->no_neg_0)
1268
flags |= Py_DTSF_NO_NEG_0;
1269
1270
if (type == '\0') {
1271
/* Omitted type specifier. Should be like str(self). */
1272
type = 'r';
1273
default_precision = 0;
1274
if (re == 0.0 && copysign(1.0, re) == 1.0)
1275
skip_re = 1;
1276
else
1277
add_parens = 1;
1278
}
1279
1280
if (type == 'n')
1281
/* 'n' is the same as 'g', except for the locale used to
1282
format the result. We take care of that later. */
1283
type = 'g';
1284
1285
if (precision < 0)
1286
precision = default_precision;
1287
else if (type == 'r')
1288
type = 'g';
1289
1290
/* Cast "type", because if we're in unicode we need to pass an
1291
8-bit char. This is safe, because we've restricted what "type"
1292
can be. */
1293
re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1294
&re_float_type);
1295
if (re_buf == NULL)
1296
goto done;
1297
im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1298
&im_float_type);
1299
if (im_buf == NULL)
1300
goto done;
1301
1302
n_re_digits = strlen(re_buf);
1303
n_im_digits = strlen(im_buf);
1304
1305
/* Since there is no unicode version of PyOS_double_to_string,
1306
just use the 8 bit version and then convert to unicode. */
1307
re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
1308
if (re_unicode_tmp == NULL)
1309
goto done;
1310
i_re = 0;
1311
1312
im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
1313
if (im_unicode_tmp == NULL)
1314
goto done;
1315
i_im = 0;
1316
1317
/* Is a sign character present in the output? If so, remember it
1318
and skip it */
1319
if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1320
re_sign_char = '-';
1321
++i_re;
1322
--n_re_digits;
1323
}
1324
if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1325
im_sign_char = '-';
1326
++i_im;
1327
--n_im_digits;
1328
}
1329
1330
/* Determine if we have any "remainder" (after the digits, might include
1331
decimal or exponent or both (or neither)) */
1332
parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
1333
&n_re_remainder, &re_has_decimal);
1334
parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
1335
&n_im_remainder, &im_has_decimal);
1336
1337
/* Determine the grouping, separator, and decimal point, if any. */
1338
if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1339
format->thousands_separators,
1340
&locale) == -1)
1341
goto done;
1342
1343
/* Turn off any padding. We'll do it later after we've composed
1344
the numbers without padding. */
1345
tmp_format.fill_char = '\0';
1346
tmp_format.align = '<';
1347
tmp_format.width = -1;
1348
1349
/* Calculate how much memory we'll need. */
1350
n_re_total = calc_number_widths(&re_spec, 0, re_sign_char,
1351
i_re, i_re + n_re_digits, n_re_remainder,
1352
re_has_decimal, &locale, &tmp_format,
1353
&maxchar);
1354
if (n_re_total == -1) {
1355
goto done;
1356
}
1357
1358
/* Same formatting, but always include a sign, unless the real part is
1359
* going to be omitted, in which case we use whatever sign convention was
1360
* requested by the original format. */
1361
if (!skip_re)
1362
tmp_format.sign = '+';
1363
n_im_total = calc_number_widths(&im_spec, 0, im_sign_char,
1364
i_im, i_im + n_im_digits, n_im_remainder,
1365
im_has_decimal, &locale, &tmp_format,
1366
&maxchar);
1367
if (n_im_total == -1) {
1368
goto done;
1369
}
1370
1371
if (skip_re)
1372
n_re_total = 0;
1373
1374
/* Add 1 for the 'j', and optionally 2 for parens. */
1375
calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1376
format->width, format->align, &lpad, &rpad, &total);
1377
1378
if (lpad || rpad)
1379
maxchar = Py_MAX(maxchar, format->fill_char);
1380
1381
if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
1382
goto done;
1383
rkind = writer->kind;
1384
rdata = writer->data;
1385
1386
/* Populate the memory. First, the padding. */
1387
result = fill_padding(writer,
1388
n_re_total + n_im_total + 1 + add_parens * 2,
1389
format->fill_char, lpad, rpad);
1390
if (result == -1)
1391
goto done;
1392
1393
if (add_parens) {
1394
PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
1395
writer->pos++;
1396
}
1397
1398
if (!skip_re) {
1399
result = fill_number(writer, &re_spec,
1400
re_unicode_tmp, i_re,
1401
NULL, 0,
1402
0,
1403
&locale, 0);
1404
if (result == -1)
1405
goto done;
1406
}
1407
result = fill_number(writer, &im_spec,
1408
im_unicode_tmp, i_im,
1409
NULL, 0,
1410
0,
1411
&locale, 0);
1412
if (result == -1)
1413
goto done;
1414
PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
1415
writer->pos++;
1416
1417
if (add_parens) {
1418
PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
1419
writer->pos++;
1420
}
1421
1422
writer->pos += rpad;
1423
1424
done:
1425
PyMem_Free(re_buf);
1426
PyMem_Free(im_buf);
1427
Py_XDECREF(re_unicode_tmp);
1428
Py_XDECREF(im_unicode_tmp);
1429
free_locale_info(&locale);
1430
return result;
1431
}
1432
1433
/************************************************************************/
1434
/*********** built in formatters ****************************************/
1435
/************************************************************************/
1436
static int
1437
format_obj(PyObject *obj, _PyUnicodeWriter *writer)
1438
{
1439
PyObject *str;
1440
int err;
1441
1442
str = PyObject_Str(obj);
1443
if (str == NULL)
1444
return -1;
1445
err = _PyUnicodeWriter_WriteStr(writer, str);
1446
Py_DECREF(str);
1447
return err;
1448
}
1449
1450
int
1451
_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1452
PyObject *obj,
1453
PyObject *format_spec,
1454
Py_ssize_t start, Py_ssize_t end)
1455
{
1456
InternalFormatSpec format;
1457
1458
assert(PyUnicode_Check(obj));
1459
1460
/* check for the special case of zero length format spec, make
1461
it equivalent to str(obj) */
1462
if (start == end) {
1463
if (PyUnicode_CheckExact(obj))
1464
return _PyUnicodeWriter_WriteStr(writer, obj);
1465
else
1466
return format_obj(obj, writer);
1467
}
1468
1469
/* parse the format_spec */
1470
if (!parse_internal_render_format_spec(obj, format_spec, start, end,
1471
&format, 's', '<'))
1472
return -1;
1473
1474
/* type conversion? */
1475
switch (format.type) {
1476
case 's':
1477
/* no type conversion needed, already a string. do the formatting */
1478
return format_string_internal(obj, &format, writer);
1479
default:
1480
/* unknown */
1481
unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1482
return -1;
1483
}
1484
}
1485
1486
int
1487
_PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1488
PyObject *obj,
1489
PyObject *format_spec,
1490
Py_ssize_t start, Py_ssize_t end)
1491
{
1492
PyObject *tmp = NULL;
1493
InternalFormatSpec format;
1494
int result = -1;
1495
1496
/* check for the special case of zero length format spec, make
1497
it equivalent to str(obj) */
1498
if (start == end) {
1499
if (PyLong_CheckExact(obj))
1500
return _PyLong_FormatWriter(writer, obj, 10, 0);
1501
else
1502
return format_obj(obj, writer);
1503
}
1504
1505
/* parse the format_spec */
1506
if (!parse_internal_render_format_spec(obj, format_spec, start, end,
1507
&format, 'd', '>'))
1508
goto done;
1509
1510
/* type conversion? */
1511
switch (format.type) {
1512
case 'b':
1513
case 'c':
1514
case 'd':
1515
case 'o':
1516
case 'x':
1517
case 'X':
1518
case 'n':
1519
/* no type conversion needed, already an int. do the formatting */
1520
result = format_long_internal(obj, &format, writer);
1521
break;
1522
1523
case 'e':
1524
case 'E':
1525
case 'f':
1526
case 'F':
1527
case 'g':
1528
case 'G':
1529
case '%':
1530
/* convert to float */
1531
tmp = PyNumber_Float(obj);
1532
if (tmp == NULL)
1533
goto done;
1534
result = format_float_internal(tmp, &format, writer);
1535
break;
1536
1537
default:
1538
/* unknown */
1539
unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1540
goto done;
1541
}
1542
1543
done:
1544
Py_XDECREF(tmp);
1545
return result;
1546
}
1547
1548
int
1549
_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1550
PyObject *obj,
1551
PyObject *format_spec,
1552
Py_ssize_t start, Py_ssize_t end)
1553
{
1554
InternalFormatSpec format;
1555
1556
/* check for the special case of zero length format spec, make
1557
it equivalent to str(obj) */
1558
if (start == end)
1559
return format_obj(obj, writer);
1560
1561
/* parse the format_spec */
1562
if (!parse_internal_render_format_spec(obj, format_spec, start, end,
1563
&format, '\0', '>'))
1564
return -1;
1565
1566
/* type conversion? */
1567
switch (format.type) {
1568
case '\0': /* No format code: like 'g', but with at least one decimal. */
1569
case 'e':
1570
case 'E':
1571
case 'f':
1572
case 'F':
1573
case 'g':
1574
case 'G':
1575
case 'n':
1576
case '%':
1577
/* no conversion, already a float. do the formatting */
1578
return format_float_internal(obj, &format, writer);
1579
1580
default:
1581
/* unknown */
1582
unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1583
return -1;
1584
}
1585
}
1586
1587
int
1588
_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1589
PyObject *obj,
1590
PyObject *format_spec,
1591
Py_ssize_t start, Py_ssize_t end)
1592
{
1593
InternalFormatSpec format;
1594
1595
/* check for the special case of zero length format spec, make
1596
it equivalent to str(obj) */
1597
if (start == end)
1598
return format_obj(obj, writer);
1599
1600
/* parse the format_spec */
1601
if (!parse_internal_render_format_spec(obj, format_spec, start, end,
1602
&format, '\0', '>'))
1603
return -1;
1604
1605
/* type conversion? */
1606
switch (format.type) {
1607
case '\0': /* No format code: like 'g', but with at least one decimal. */
1608
case 'e':
1609
case 'E':
1610
case 'f':
1611
case 'F':
1612
case 'g':
1613
case 'G':
1614
case 'n':
1615
/* no conversion, already a complex. do the formatting */
1616
return format_complex_internal(obj, &format, writer);
1617
1618
default:
1619
/* unknown */
1620
unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1621
return -1;
1622
}
1623
}
1624
1625