Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Objects/stringlib/transmogrify.h
12 views
1
#if STRINGLIB_IS_UNICODE
2
# error "transmogrify.h only compatible with byte-wise strings"
3
#endif
4
5
/* the more complicated methods. parts of these should be pulled out into the
6
shared code in bytes_methods.c to cut down on duplicate code bloat. */
7
8
/*[clinic input]
9
class B "PyObject *" "&PyType_Type"
10
[clinic start generated code]*/
11
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2935558188d97c76]*/
12
13
#include "clinic/transmogrify.h.h"
14
15
static inline PyObject *
16
return_self(PyObject *self)
17
{
18
#if !STRINGLIB_MUTABLE
19
if (STRINGLIB_CHECK_EXACT(self)) {
20
return Py_NewRef(self);
21
}
22
#endif
23
return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
24
}
25
26
/*[clinic input]
27
B.expandtabs as stringlib_expandtabs
28
29
tabsize: int = 8
30
31
Return a copy where all tab characters are expanded using spaces.
32
33
If tabsize is not given, a tab size of 8 characters is assumed.
34
[clinic start generated code]*/
35
36
static PyObject *
37
stringlib_expandtabs_impl(PyObject *self, int tabsize)
38
/*[clinic end generated code: output=069cb7fae72e4c2b input=3c6d3b12aa3ccbea]*/
39
{
40
const char *e, *p;
41
char *q;
42
Py_ssize_t i, j;
43
PyObject *u;
44
45
/* First pass: determine size of output string */
46
i = j = 0;
47
e = STRINGLIB_STR(self) + STRINGLIB_LEN(self);
48
for (p = STRINGLIB_STR(self); p < e; p++) {
49
if (*p == '\t') {
50
if (tabsize > 0) {
51
Py_ssize_t incr = tabsize - (j % tabsize);
52
if (j > PY_SSIZE_T_MAX - incr)
53
goto overflow;
54
j += incr;
55
}
56
}
57
else {
58
if (j > PY_SSIZE_T_MAX - 1)
59
goto overflow;
60
j++;
61
if (*p == '\n' || *p == '\r') {
62
if (i > PY_SSIZE_T_MAX - j)
63
goto overflow;
64
i += j;
65
j = 0;
66
}
67
}
68
}
69
70
if (i > PY_SSIZE_T_MAX - j)
71
goto overflow;
72
73
/* Second pass: create output string and fill it */
74
u = STRINGLIB_NEW(NULL, i + j);
75
if (!u)
76
return NULL;
77
78
j = 0;
79
q = STRINGLIB_STR(u);
80
81
for (p = STRINGLIB_STR(self); p < e; p++) {
82
if (*p == '\t') {
83
if (tabsize > 0) {
84
i = tabsize - (j % tabsize);
85
j += i;
86
while (i--)
87
*q++ = ' ';
88
}
89
}
90
else {
91
j++;
92
*q++ = *p;
93
if (*p == '\n' || *p == '\r')
94
j = 0;
95
}
96
}
97
98
return u;
99
overflow:
100
PyErr_SetString(PyExc_OverflowError, "result too long");
101
return NULL;
102
}
103
104
static inline PyObject *
105
pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
106
{
107
PyObject *u;
108
109
if (left < 0)
110
left = 0;
111
if (right < 0)
112
right = 0;
113
114
if (left == 0 && right == 0) {
115
return return_self(self);
116
}
117
118
u = STRINGLIB_NEW(NULL, left + STRINGLIB_LEN(self) + right);
119
if (u) {
120
if (left)
121
memset(STRINGLIB_STR(u), fill, left);
122
memcpy(STRINGLIB_STR(u) + left,
123
STRINGLIB_STR(self),
124
STRINGLIB_LEN(self));
125
if (right)
126
memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self),
127
fill, right);
128
}
129
130
return u;
131
}
132
133
/*[clinic input]
134
B.ljust as stringlib_ljust
135
136
width: Py_ssize_t
137
fillchar: char = b' '
138
/
139
140
Return a left-justified string of length width.
141
142
Padding is done using the specified fill character.
143
[clinic start generated code]*/
144
145
static PyObject *
146
stringlib_ljust_impl(PyObject *self, Py_ssize_t width, char fillchar)
147
/*[clinic end generated code: output=c79ca173c5ff8337 input=eff2d014bc7d80df]*/
148
{
149
if (STRINGLIB_LEN(self) >= width) {
150
return return_self(self);
151
}
152
153
return pad(self, 0, width - STRINGLIB_LEN(self), fillchar);
154
}
155
156
157
/*[clinic input]
158
B.rjust as stringlib_rjust
159
160
width: Py_ssize_t
161
fillchar: char = b' '
162
/
163
164
Return a right-justified string of length width.
165
166
Padding is done using the specified fill character.
167
[clinic start generated code]*/
168
169
static PyObject *
170
stringlib_rjust_impl(PyObject *self, Py_ssize_t width, char fillchar)
171
/*[clinic end generated code: output=7df5d728a5439570 input=218b0bd31308955d]*/
172
{
173
if (STRINGLIB_LEN(self) >= width) {
174
return return_self(self);
175
}
176
177
return pad(self, width - STRINGLIB_LEN(self), 0, fillchar);
178
}
179
180
181
/*[clinic input]
182
B.center as stringlib_center
183
184
width: Py_ssize_t
185
fillchar: char = b' '
186
/
187
188
Return a centered string of length width.
189
190
Padding is done using the specified fill character.
191
[clinic start generated code]*/
192
193
static PyObject *
194
stringlib_center_impl(PyObject *self, Py_ssize_t width, char fillchar)
195
/*[clinic end generated code: output=d8da2e055288b4c2 input=3776fd278765d89b]*/
196
{
197
Py_ssize_t marg, left;
198
199
if (STRINGLIB_LEN(self) >= width) {
200
return return_self(self);
201
}
202
203
marg = width - STRINGLIB_LEN(self);
204
left = marg / 2 + (marg & width & 1);
205
206
return pad(self, left, marg - left, fillchar);
207
}
208
209
/*[clinic input]
210
B.zfill as stringlib_zfill
211
212
width: Py_ssize_t
213
/
214
215
Pad a numeric string with zeros on the left, to fill a field of the given width.
216
217
The original string is never truncated.
218
[clinic start generated code]*/
219
220
static PyObject *
221
stringlib_zfill_impl(PyObject *self, Py_ssize_t width)
222
/*[clinic end generated code: output=0b3c684a7f1b2319 input=2da6d7b8e9bcb19a]*/
223
{
224
Py_ssize_t fill;
225
PyObject *s;
226
char *p;
227
228
if (STRINGLIB_LEN(self) >= width) {
229
return return_self(self);
230
}
231
232
fill = width - STRINGLIB_LEN(self);
233
234
s = pad(self, fill, 0, '0');
235
236
if (s == NULL)
237
return NULL;
238
239
p = STRINGLIB_STR(s);
240
if (p[fill] == '+' || p[fill] == '-') {
241
/* move sign to beginning of string */
242
p[0] = p[fill];
243
p[fill] = '0';
244
}
245
246
return s;
247
}
248
249
250
/* find and count characters and substrings */
251
252
#define findchar(target, target_len, c) \
253
((char *)memchr((const void *)(target), c, target_len))
254
255
256
static Py_ssize_t
257
countchar(const char *target, Py_ssize_t target_len, char c,
258
Py_ssize_t maxcount)
259
{
260
Py_ssize_t count = 0;
261
const char *start = target;
262
const char *end = target + target_len;
263
264
while ((start = findchar(start, end - start, c)) != NULL) {
265
count++;
266
if (count >= maxcount)
267
break;
268
start += 1;
269
}
270
return count;
271
}
272
273
274
/* Algorithms for different cases of string replacement */
275
276
/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
277
static PyObject *
278
stringlib_replace_interleave(PyObject *self,
279
const char *to_s, Py_ssize_t to_len,
280
Py_ssize_t maxcount)
281
{
282
const char *self_s;
283
char *result_s;
284
Py_ssize_t self_len, result_len;
285
Py_ssize_t count, i;
286
PyObject *result;
287
288
self_len = STRINGLIB_LEN(self);
289
290
/* 1 at the end plus 1 after every character;
291
count = min(maxcount, self_len + 1) */
292
if (maxcount <= self_len) {
293
count = maxcount;
294
}
295
else {
296
/* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
297
count = self_len + 1;
298
}
299
300
/* Check for overflow */
301
/* result_len = count * to_len + self_len; */
302
assert(count > 0);
303
if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
304
PyErr_SetString(PyExc_OverflowError,
305
"replace bytes is too long");
306
return NULL;
307
}
308
result_len = count * to_len + self_len;
309
result = STRINGLIB_NEW(NULL, result_len);
310
if (result == NULL) {
311
return NULL;
312
}
313
314
self_s = STRINGLIB_STR(self);
315
result_s = STRINGLIB_STR(result);
316
317
if (to_len > 1) {
318
/* Lay the first one down (guaranteed this will occur) */
319
memcpy(result_s, to_s, to_len);
320
result_s += to_len;
321
count -= 1;
322
323
for (i = 0; i < count; i++) {
324
*result_s++ = *self_s++;
325
memcpy(result_s, to_s, to_len);
326
result_s += to_len;
327
}
328
}
329
else {
330
result_s[0] = to_s[0];
331
result_s += to_len;
332
count -= 1;
333
for (i = 0; i < count; i++) {
334
*result_s++ = *self_s++;
335
result_s[0] = to_s[0];
336
result_s += to_len;
337
}
338
}
339
340
/* Copy the rest of the original string */
341
memcpy(result_s, self_s, self_len - i);
342
343
return result;
344
}
345
346
/* Special case for deleting a single character */
347
/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
348
static PyObject *
349
stringlib_replace_delete_single_character(PyObject *self,
350
char from_c, Py_ssize_t maxcount)
351
{
352
const char *self_s, *start, *next, *end;
353
char *result_s;
354
Py_ssize_t self_len, result_len;
355
Py_ssize_t count;
356
PyObject *result;
357
358
self_len = STRINGLIB_LEN(self);
359
self_s = STRINGLIB_STR(self);
360
361
count = countchar(self_s, self_len, from_c, maxcount);
362
if (count == 0) {
363
return return_self(self);
364
}
365
366
result_len = self_len - count; /* from_len == 1 */
367
assert(result_len>=0);
368
369
result = STRINGLIB_NEW(NULL, result_len);
370
if (result == NULL) {
371
return NULL;
372
}
373
result_s = STRINGLIB_STR(result);
374
375
start = self_s;
376
end = self_s + self_len;
377
while (count-- > 0) {
378
next = findchar(start, end - start, from_c);
379
if (next == NULL)
380
break;
381
memcpy(result_s, start, next - start);
382
result_s += (next - start);
383
start = next + 1;
384
}
385
memcpy(result_s, start, end - start);
386
387
return result;
388
}
389
390
/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
391
392
static PyObject *
393
stringlib_replace_delete_substring(PyObject *self,
394
const char *from_s, Py_ssize_t from_len,
395
Py_ssize_t maxcount)
396
{
397
const char *self_s, *start, *next, *end;
398
char *result_s;
399
Py_ssize_t self_len, result_len;
400
Py_ssize_t count, offset;
401
PyObject *result;
402
403
self_len = STRINGLIB_LEN(self);
404
self_s = STRINGLIB_STR(self);
405
406
count = stringlib_count(self_s, self_len,
407
from_s, from_len,
408
maxcount);
409
410
if (count == 0) {
411
/* no matches */
412
return return_self(self);
413
}
414
415
result_len = self_len - (count * from_len);
416
assert (result_len>=0);
417
418
result = STRINGLIB_NEW(NULL, result_len);
419
if (result == NULL) {
420
return NULL;
421
}
422
result_s = STRINGLIB_STR(result);
423
424
start = self_s;
425
end = self_s + self_len;
426
while (count-- > 0) {
427
offset = stringlib_find(start, end - start,
428
from_s, from_len,
429
0);
430
if (offset == -1)
431
break;
432
next = start + offset;
433
434
memcpy(result_s, start, next - start);
435
436
result_s += (next - start);
437
start = next + from_len;
438
}
439
memcpy(result_s, start, end - start);
440
return result;
441
}
442
443
/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
444
static PyObject *
445
stringlib_replace_single_character_in_place(PyObject *self,
446
char from_c, char to_c,
447
Py_ssize_t maxcount)
448
{
449
const char *self_s, *end;
450
char *result_s, *start, *next;
451
Py_ssize_t self_len;
452
PyObject *result;
453
454
/* The result string will be the same size */
455
self_s = STRINGLIB_STR(self);
456
self_len = STRINGLIB_LEN(self);
457
458
next = findchar(self_s, self_len, from_c);
459
460
if (next == NULL) {
461
/* No matches; return the original bytes */
462
return return_self(self);
463
}
464
465
/* Need to make a new bytes */
466
result = STRINGLIB_NEW(NULL, self_len);
467
if (result == NULL) {
468
return NULL;
469
}
470
result_s = STRINGLIB_STR(result);
471
memcpy(result_s, self_s, self_len);
472
473
/* change everything in-place, starting with this one */
474
start = result_s + (next - self_s);
475
*start = to_c;
476
start++;
477
end = result_s + self_len;
478
479
while (--maxcount > 0) {
480
next = findchar(start, end - start, from_c);
481
if (next == NULL)
482
break;
483
*next = to_c;
484
start = next + 1;
485
}
486
487
return result;
488
}
489
490
/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
491
static PyObject *
492
stringlib_replace_substring_in_place(PyObject *self,
493
const char *from_s, Py_ssize_t from_len,
494
const char *to_s, Py_ssize_t to_len,
495
Py_ssize_t maxcount)
496
{
497
const char *self_s, *end;
498
char *result_s, *start;
499
Py_ssize_t self_len, offset;
500
PyObject *result;
501
502
/* The result bytes will be the same size */
503
504
self_s = STRINGLIB_STR(self);
505
self_len = STRINGLIB_LEN(self);
506
507
offset = stringlib_find(self_s, self_len,
508
from_s, from_len,
509
0);
510
if (offset == -1) {
511
/* No matches; return the original bytes */
512
return return_self(self);
513
}
514
515
/* Need to make a new bytes */
516
result = STRINGLIB_NEW(NULL, self_len);
517
if (result == NULL) {
518
return NULL;
519
}
520
result_s = STRINGLIB_STR(result);
521
memcpy(result_s, self_s, self_len);
522
523
/* change everything in-place, starting with this one */
524
start = result_s + offset;
525
memcpy(start, to_s, from_len);
526
start += from_len;
527
end = result_s + self_len;
528
529
while ( --maxcount > 0) {
530
offset = stringlib_find(start, end - start,
531
from_s, from_len,
532
0);
533
if (offset == -1)
534
break;
535
memcpy(start + offset, to_s, from_len);
536
start += offset + from_len;
537
}
538
539
return result;
540
}
541
542
/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
543
static PyObject *
544
stringlib_replace_single_character(PyObject *self,
545
char from_c,
546
const char *to_s, Py_ssize_t to_len,
547
Py_ssize_t maxcount)
548
{
549
const char *self_s, *start, *next, *end;
550
char *result_s;
551
Py_ssize_t self_len, result_len;
552
Py_ssize_t count;
553
PyObject *result;
554
555
self_s = STRINGLIB_STR(self);
556
self_len = STRINGLIB_LEN(self);
557
558
count = countchar(self_s, self_len, from_c, maxcount);
559
if (count == 0) {
560
/* no matches, return unchanged */
561
return return_self(self);
562
}
563
564
/* use the difference between current and new, hence the "-1" */
565
/* result_len = self_len + count * (to_len-1) */
566
assert(count > 0);
567
if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
568
PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
569
return NULL;
570
}
571
result_len = self_len + count * (to_len - 1);
572
573
result = STRINGLIB_NEW(NULL, result_len);
574
if (result == NULL) {
575
return NULL;
576
}
577
result_s = STRINGLIB_STR(result);
578
579
start = self_s;
580
end = self_s + self_len;
581
while (count-- > 0) {
582
next = findchar(start, end - start, from_c);
583
if (next == NULL)
584
break;
585
586
if (next == start) {
587
/* replace with the 'to' */
588
memcpy(result_s, to_s, to_len);
589
result_s += to_len;
590
start += 1;
591
} else {
592
/* copy the unchanged old then the 'to' */
593
memcpy(result_s, start, next - start);
594
result_s += (next - start);
595
memcpy(result_s, to_s, to_len);
596
result_s += to_len;
597
start = next + 1;
598
}
599
}
600
/* Copy the remainder of the remaining bytes */
601
memcpy(result_s, start, end - start);
602
603
return result;
604
}
605
606
/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
607
static PyObject *
608
stringlib_replace_substring(PyObject *self,
609
const char *from_s, Py_ssize_t from_len,
610
const char *to_s, Py_ssize_t to_len,
611
Py_ssize_t maxcount)
612
{
613
const char *self_s, *start, *next, *end;
614
char *result_s;
615
Py_ssize_t self_len, result_len;
616
Py_ssize_t count, offset;
617
PyObject *result;
618
619
self_s = STRINGLIB_STR(self);
620
self_len = STRINGLIB_LEN(self);
621
622
count = stringlib_count(self_s, self_len,
623
from_s, from_len,
624
maxcount);
625
626
if (count == 0) {
627
/* no matches, return unchanged */
628
return return_self(self);
629
}
630
631
/* Check for overflow */
632
/* result_len = self_len + count * (to_len-from_len) */
633
assert(count > 0);
634
if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
635
PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
636
return NULL;
637
}
638
result_len = self_len + count * (to_len - from_len);
639
640
result = STRINGLIB_NEW(NULL, result_len);
641
if (result == NULL) {
642
return NULL;
643
}
644
result_s = STRINGLIB_STR(result);
645
646
start = self_s;
647
end = self_s + self_len;
648
while (count-- > 0) {
649
offset = stringlib_find(start, end - start,
650
from_s, from_len,
651
0);
652
if (offset == -1)
653
break;
654
next = start + offset;
655
if (next == start) {
656
/* replace with the 'to' */
657
memcpy(result_s, to_s, to_len);
658
result_s += to_len;
659
start += from_len;
660
} else {
661
/* copy the unchanged old then the 'to' */
662
memcpy(result_s, start, next - start);
663
result_s += (next - start);
664
memcpy(result_s, to_s, to_len);
665
result_s += to_len;
666
start = next + from_len;
667
}
668
}
669
/* Copy the remainder of the remaining bytes */
670
memcpy(result_s, start, end - start);
671
672
return result;
673
}
674
675
676
static PyObject *
677
stringlib_replace(PyObject *self,
678
const char *from_s, Py_ssize_t from_len,
679
const char *to_s, Py_ssize_t to_len,
680
Py_ssize_t maxcount)
681
{
682
if (STRINGLIB_LEN(self) < from_len) {
683
/* nothing to do; return the original bytes */
684
return return_self(self);
685
}
686
if (maxcount < 0) {
687
maxcount = PY_SSIZE_T_MAX;
688
} else if (maxcount == 0) {
689
/* nothing to do; return the original bytes */
690
return return_self(self);
691
}
692
693
/* Handle zero-length special cases */
694
if (from_len == 0) {
695
if (to_len == 0) {
696
/* nothing to do; return the original bytes */
697
return return_self(self);
698
}
699
/* insert the 'to' bytes everywhere. */
700
/* >>> b"Python".replace(b"", b".") */
701
/* b'.P.y.t.h.o.n.' */
702
return stringlib_replace_interleave(self, to_s, to_len, maxcount);
703
}
704
705
if (to_len == 0) {
706
/* delete all occurrences of 'from' bytes */
707
if (from_len == 1) {
708
return stringlib_replace_delete_single_character(
709
self, from_s[0], maxcount);
710
} else {
711
return stringlib_replace_delete_substring(
712
self, from_s, from_len, maxcount);
713
}
714
}
715
716
/* Handle special case where both bytes have the same length */
717
718
if (from_len == to_len) {
719
if (from_len == 1) {
720
return stringlib_replace_single_character_in_place(
721
self, from_s[0], to_s[0], maxcount);
722
} else {
723
return stringlib_replace_substring_in_place(
724
self, from_s, from_len, to_s, to_len, maxcount);
725
}
726
}
727
728
/* Otherwise use the more generic algorithms */
729
if (from_len == 1) {
730
return stringlib_replace_single_character(
731
self, from_s[0], to_s, to_len, maxcount);
732
} else {
733
/* len('from')>=2, len('to')>=1 */
734
return stringlib_replace_substring(
735
self, from_s, from_len, to_s, to_len, maxcount);
736
}
737
}
738
739
#undef findchar
740
741