Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Objects/bytesobject.c
12 views
1
/* bytes object implementation */
2
3
#include "Python.h"
4
#include "pycore_abstract.h" // _PyIndex_Check()
5
#include "pycore_bytesobject.h" // _PyBytes_Find(), _PyBytes_Repeat()
6
#include "pycore_bytes_methods.h" // _Py_bytes_startswith()
7
#include "pycore_call.h" // _PyObject_CallNoArgs()
8
#include "pycore_format.h" // F_LJUST
9
#include "pycore_global_objects.h" // _Py_GET_GLOBAL_OBJECT()
10
#include "pycore_initconfig.h" // _PyStatus_OK()
11
#include "pycore_long.h" // _PyLong_DigitValue
12
#include "pycore_object.h" // _PyObject_GC_TRACK
13
#include "pycore_pymem.h" // PYMEM_CLEANBYTE
14
#include "pycore_strhex.h" // _Py_strhex_with_sep()
15
16
#include <stddef.h>
17
18
/*[clinic input]
19
class bytes "PyBytesObject *" "&PyBytes_Type"
20
[clinic start generated code]*/
21
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
22
23
#include "clinic/bytesobject.c.h"
24
25
/* PyBytesObject_SIZE gives the basic size of a bytes object; any memory allocation
26
for a bytes object of length n should request PyBytesObject_SIZE + n bytes.
27
28
Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
29
3 or 7 bytes per bytes object allocation on a typical system.
30
*/
31
#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
32
33
/* Forward declaration */
34
Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
35
char *str);
36
37
38
#define CHARACTERS _Py_SINGLETON(bytes_characters)
39
#define CHARACTER(ch) \
40
((PyBytesObject *)&(CHARACTERS[ch]));
41
#define EMPTY (&_Py_SINGLETON(bytes_empty))
42
43
44
// Return a borrowed reference to the empty bytes string singleton.
45
static inline PyObject* bytes_get_empty(void)
46
{
47
return &EMPTY->ob_base.ob_base;
48
}
49
50
51
// Return a strong reference to the empty bytes string singleton.
52
static inline PyObject* bytes_new_empty(void)
53
{
54
return Py_NewRef(EMPTY);
55
}
56
57
58
/*
59
For PyBytes_FromString(), the parameter `str' points to a null-terminated
60
string containing exactly `size' bytes.
61
62
For PyBytes_FromStringAndSize(), the parameter `str' is
63
either NULL or else points to a string containing at least `size' bytes.
64
For PyBytes_FromStringAndSize(), the string in the `str' parameter does
65
not have to be null-terminated. (Therefore it is safe to construct a
66
substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
67
If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
68
bytes (setting the last byte to the null terminating character) and you can
69
fill in the data yourself. If `str' is non-NULL then the resulting
70
PyBytes object must be treated as immutable and you must not fill in nor
71
alter the data yourself, since the strings may be shared.
72
73
The PyObject member `op->ob_size', which denotes the number of "extra
74
items" in a variable-size object, will contain the number of bytes
75
allocated for string data, not counting the null terminating character.
76
It is therefore equal to the `size' parameter (for
77
PyBytes_FromStringAndSize()) or the length of the string in the `str'
78
parameter (for PyBytes_FromString()).
79
*/
80
static PyObject *
81
_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
82
{
83
PyBytesObject *op;
84
assert(size >= 0);
85
86
if (size == 0) {
87
return bytes_new_empty();
88
}
89
90
if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
91
PyErr_SetString(PyExc_OverflowError,
92
"byte string is too large");
93
return NULL;
94
}
95
96
/* Inline PyObject_NewVar */
97
if (use_calloc)
98
op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
99
else
100
op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
101
if (op == NULL) {
102
return PyErr_NoMemory();
103
}
104
_PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
105
_Py_COMP_DIAG_PUSH
106
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
107
op->ob_shash = -1;
108
_Py_COMP_DIAG_POP
109
if (!use_calloc) {
110
op->ob_sval[size] = '\0';
111
}
112
return (PyObject *) op;
113
}
114
115
PyObject *
116
PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
117
{
118
PyBytesObject *op;
119
if (size < 0) {
120
PyErr_SetString(PyExc_SystemError,
121
"Negative size passed to PyBytes_FromStringAndSize");
122
return NULL;
123
}
124
if (size == 1 && str != NULL) {
125
op = CHARACTER(*str & 255);
126
return Py_NewRef(op);
127
}
128
if (size == 0) {
129
return bytes_new_empty();
130
}
131
132
op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
133
if (op == NULL)
134
return NULL;
135
if (str == NULL)
136
return (PyObject *) op;
137
138
memcpy(op->ob_sval, str, size);
139
return (PyObject *) op;
140
}
141
142
PyObject *
143
PyBytes_FromString(const char *str)
144
{
145
size_t size;
146
PyBytesObject *op;
147
148
assert(str != NULL);
149
size = strlen(str);
150
if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
151
PyErr_SetString(PyExc_OverflowError,
152
"byte string is too long");
153
return NULL;
154
}
155
156
if (size == 0) {
157
return bytes_new_empty();
158
}
159
else if (size == 1) {
160
op = CHARACTER(*str & 255);
161
return Py_NewRef(op);
162
}
163
164
/* Inline PyObject_NewVar */
165
op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
166
if (op == NULL) {
167
return PyErr_NoMemory();
168
}
169
_PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
170
_Py_COMP_DIAG_PUSH
171
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
172
op->ob_shash = -1;
173
_Py_COMP_DIAG_POP
174
memcpy(op->ob_sval, str, size+1);
175
return (PyObject *) op;
176
}
177
178
PyObject *
179
PyBytes_FromFormatV(const char *format, va_list vargs)
180
{
181
char *s;
182
const char *f;
183
const char *p;
184
Py_ssize_t prec;
185
int longflag;
186
int size_tflag;
187
/* Longest 64-bit formatted numbers:
188
- "18446744073709551615\0" (21 bytes)
189
- "-9223372036854775808\0" (21 bytes)
190
Decimal takes the most space (it isn't enough for octal.)
191
192
Longest 64-bit pointer representation:
193
"0xffffffffffffffff\0" (19 bytes). */
194
char buffer[21];
195
_PyBytesWriter writer;
196
197
_PyBytesWriter_Init(&writer);
198
199
s = _PyBytesWriter_Alloc(&writer, strlen(format));
200
if (s == NULL)
201
return NULL;
202
writer.overallocate = 1;
203
204
#define WRITE_BYTES(str) \
205
do { \
206
s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
207
if (s == NULL) \
208
goto error; \
209
} while (0)
210
211
for (f = format; *f; f++) {
212
if (*f != '%') {
213
*s++ = *f;
214
continue;
215
}
216
217
p = f++;
218
219
/* ignore the width (ex: 10 in "%10s") */
220
while (Py_ISDIGIT(*f))
221
f++;
222
223
/* parse the precision (ex: 10 in "%.10s") */
224
prec = 0;
225
if (*f == '.') {
226
f++;
227
for (; Py_ISDIGIT(*f); f++) {
228
prec = (prec * 10) + (*f - '0');
229
}
230
}
231
232
while (*f && *f != '%' && !Py_ISALPHA(*f))
233
f++;
234
235
/* handle the long flag ('l'), but only for %ld and %lu.
236
others can be added when necessary. */
237
longflag = 0;
238
if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
239
longflag = 1;
240
++f;
241
}
242
243
/* handle the size_t flag ('z'). */
244
size_tflag = 0;
245
if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
246
size_tflag = 1;
247
++f;
248
}
249
250
/* subtract bytes preallocated for the format string
251
(ex: 2 for "%s") */
252
writer.min_size -= (f - p + 1);
253
254
switch (*f) {
255
case 'c':
256
{
257
int c = va_arg(vargs, int);
258
if (c < 0 || c > 255) {
259
PyErr_SetString(PyExc_OverflowError,
260
"PyBytes_FromFormatV(): %c format "
261
"expects an integer in range [0; 255]");
262
goto error;
263
}
264
writer.min_size++;
265
*s++ = (unsigned char)c;
266
break;
267
}
268
269
case 'd':
270
if (longflag) {
271
sprintf(buffer, "%ld", va_arg(vargs, long));
272
}
273
else if (size_tflag) {
274
sprintf(buffer, "%zd", va_arg(vargs, Py_ssize_t));
275
}
276
else {
277
sprintf(buffer, "%d", va_arg(vargs, int));
278
}
279
assert(strlen(buffer) < sizeof(buffer));
280
WRITE_BYTES(buffer);
281
break;
282
283
case 'u':
284
if (longflag) {
285
sprintf(buffer, "%lu", va_arg(vargs, unsigned long));
286
}
287
else if (size_tflag) {
288
sprintf(buffer, "%zu", va_arg(vargs, size_t));
289
}
290
else {
291
sprintf(buffer, "%u", va_arg(vargs, unsigned int));
292
}
293
assert(strlen(buffer) < sizeof(buffer));
294
WRITE_BYTES(buffer);
295
break;
296
297
case 'i':
298
sprintf(buffer, "%i", va_arg(vargs, int));
299
assert(strlen(buffer) < sizeof(buffer));
300
WRITE_BYTES(buffer);
301
break;
302
303
case 'x':
304
sprintf(buffer, "%x", va_arg(vargs, int));
305
assert(strlen(buffer) < sizeof(buffer));
306
WRITE_BYTES(buffer);
307
break;
308
309
case 's':
310
{
311
Py_ssize_t i;
312
313
p = va_arg(vargs, const char*);
314
if (prec <= 0) {
315
i = strlen(p);
316
}
317
else {
318
i = 0;
319
while (i < prec && p[i]) {
320
i++;
321
}
322
}
323
s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
324
if (s == NULL)
325
goto error;
326
break;
327
}
328
329
case 'p':
330
sprintf(buffer, "%p", va_arg(vargs, void*));
331
assert(strlen(buffer) < sizeof(buffer));
332
/* %p is ill-defined: ensure leading 0x. */
333
if (buffer[1] == 'X')
334
buffer[1] = 'x';
335
else if (buffer[1] != 'x') {
336
memmove(buffer+2, buffer, strlen(buffer)+1);
337
buffer[0] = '0';
338
buffer[1] = 'x';
339
}
340
WRITE_BYTES(buffer);
341
break;
342
343
case '%':
344
writer.min_size++;
345
*s++ = '%';
346
break;
347
348
default:
349
if (*f == 0) {
350
/* fix min_size if we reached the end of the format string */
351
writer.min_size++;
352
}
353
354
/* invalid format string: copy unformatted string and exit */
355
WRITE_BYTES(p);
356
return _PyBytesWriter_Finish(&writer, s);
357
}
358
}
359
360
#undef WRITE_BYTES
361
362
return _PyBytesWriter_Finish(&writer, s);
363
364
error:
365
_PyBytesWriter_Dealloc(&writer);
366
return NULL;
367
}
368
369
PyObject *
370
PyBytes_FromFormat(const char *format, ...)
371
{
372
PyObject* ret;
373
va_list vargs;
374
375
va_start(vargs, format);
376
ret = PyBytes_FromFormatV(format, vargs);
377
va_end(vargs);
378
return ret;
379
}
380
381
/* Helpers for formatstring */
382
383
Py_LOCAL_INLINE(PyObject *)
384
getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
385
{
386
Py_ssize_t argidx = *p_argidx;
387
if (argidx < arglen) {
388
(*p_argidx)++;
389
if (arglen < 0)
390
return args;
391
else
392
return PyTuple_GetItem(args, argidx);
393
}
394
PyErr_SetString(PyExc_TypeError,
395
"not enough arguments for format string");
396
return NULL;
397
}
398
399
/* Returns a new reference to a PyBytes object, or NULL on failure. */
400
401
static char*
402
formatfloat(PyObject *v, int flags, int prec, int type,
403
PyObject **p_result, _PyBytesWriter *writer, char *str)
404
{
405
char *p;
406
PyObject *result;
407
double x;
408
size_t len;
409
int dtoa_flags = 0;
410
411
x = PyFloat_AsDouble(v);
412
if (x == -1.0 && PyErr_Occurred()) {
413
PyErr_Format(PyExc_TypeError, "float argument required, "
414
"not %.200s", Py_TYPE(v)->tp_name);
415
return NULL;
416
}
417
418
if (prec < 0)
419
prec = 6;
420
421
if (flags & F_ALT) {
422
dtoa_flags |= Py_DTSF_ALT;
423
}
424
p = PyOS_double_to_string(x, type, prec, dtoa_flags, NULL);
425
426
if (p == NULL)
427
return NULL;
428
429
len = strlen(p);
430
if (writer != NULL) {
431
str = _PyBytesWriter_Prepare(writer, str, len);
432
if (str == NULL) {
433
PyMem_Free(p);
434
return NULL;
435
}
436
memcpy(str, p, len);
437
PyMem_Free(p);
438
str += len;
439
return str;
440
}
441
442
result = PyBytes_FromStringAndSize(p, len);
443
PyMem_Free(p);
444
*p_result = result;
445
return result != NULL ? str : NULL;
446
}
447
448
static PyObject *
449
formatlong(PyObject *v, int flags, int prec, int type)
450
{
451
PyObject *result, *iobj;
452
if (type == 'i')
453
type = 'd';
454
if (PyLong_Check(v))
455
return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
456
if (PyNumber_Check(v)) {
457
/* make sure number is a type of integer for o, x, and X */
458
if (type == 'o' || type == 'x' || type == 'X')
459
iobj = _PyNumber_Index(v);
460
else
461
iobj = PyNumber_Long(v);
462
if (iobj != NULL) {
463
assert(PyLong_Check(iobj));
464
result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
465
Py_DECREF(iobj);
466
return result;
467
}
468
if (!PyErr_ExceptionMatches(PyExc_TypeError))
469
return NULL;
470
}
471
PyErr_Format(PyExc_TypeError,
472
"%%%c format: %s is required, not %.200s", type,
473
(type == 'o' || type == 'x' || type == 'X') ? "an integer"
474
: "a real number",
475
Py_TYPE(v)->tp_name);
476
return NULL;
477
}
478
479
static int
480
byte_converter(PyObject *arg, char *p)
481
{
482
if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
483
*p = PyBytes_AS_STRING(arg)[0];
484
return 1;
485
}
486
else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
487
*p = PyByteArray_AS_STRING(arg)[0];
488
return 1;
489
}
490
else {
491
int overflow;
492
long ival = PyLong_AsLongAndOverflow(arg, &overflow);
493
if (ival == -1 && PyErr_Occurred()) {
494
if (PyErr_ExceptionMatches(PyExc_TypeError)) {
495
goto onError;
496
}
497
return 0;
498
}
499
if (!(0 <= ival && ival <= 255)) {
500
/* this includes an overflow in converting to C long */
501
PyErr_SetString(PyExc_OverflowError,
502
"%c arg not in range(256)");
503
return 0;
504
}
505
*p = (char)ival;
506
return 1;
507
}
508
onError:
509
PyErr_SetString(PyExc_TypeError,
510
"%c requires an integer in range(256) or a single byte");
511
return 0;
512
}
513
514
static PyObject *_PyBytes_FromBuffer(PyObject *x);
515
516
static PyObject *
517
format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
518
{
519
PyObject *func, *result;
520
/* is it a bytes object? */
521
if (PyBytes_Check(v)) {
522
*pbuf = PyBytes_AS_STRING(v);
523
*plen = PyBytes_GET_SIZE(v);
524
return Py_NewRef(v);
525
}
526
if (PyByteArray_Check(v)) {
527
*pbuf = PyByteArray_AS_STRING(v);
528
*plen = PyByteArray_GET_SIZE(v);
529
return Py_NewRef(v);
530
}
531
/* does it support __bytes__? */
532
func = _PyObject_LookupSpecial(v, &_Py_ID(__bytes__));
533
if (func != NULL) {
534
result = _PyObject_CallNoArgs(func);
535
Py_DECREF(func);
536
if (result == NULL)
537
return NULL;
538
if (!PyBytes_Check(result)) {
539
PyErr_Format(PyExc_TypeError,
540
"__bytes__ returned non-bytes (type %.200s)",
541
Py_TYPE(result)->tp_name);
542
Py_DECREF(result);
543
return NULL;
544
}
545
*pbuf = PyBytes_AS_STRING(result);
546
*plen = PyBytes_GET_SIZE(result);
547
return result;
548
}
549
/* does it support buffer protocol? */
550
if (PyObject_CheckBuffer(v)) {
551
/* maybe we can avoid making a copy of the buffer object here? */
552
result = _PyBytes_FromBuffer(v);
553
if (result == NULL)
554
return NULL;
555
*pbuf = PyBytes_AS_STRING(result);
556
*plen = PyBytes_GET_SIZE(result);
557
return result;
558
}
559
PyErr_Format(PyExc_TypeError,
560
"%%b requires a bytes-like object, "
561
"or an object that implements __bytes__, not '%.100s'",
562
Py_TYPE(v)->tp_name);
563
return NULL;
564
}
565
566
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
567
568
PyObject *
569
_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
570
PyObject *args, int use_bytearray)
571
{
572
const char *fmt;
573
char *res;
574
Py_ssize_t arglen, argidx;
575
Py_ssize_t fmtcnt;
576
int args_owned = 0;
577
PyObject *dict = NULL;
578
_PyBytesWriter writer;
579
580
if (args == NULL) {
581
PyErr_BadInternalCall();
582
return NULL;
583
}
584
fmt = format;
585
fmtcnt = format_len;
586
587
_PyBytesWriter_Init(&writer);
588
writer.use_bytearray = use_bytearray;
589
590
res = _PyBytesWriter_Alloc(&writer, fmtcnt);
591
if (res == NULL)
592
return NULL;
593
if (!use_bytearray)
594
writer.overallocate = 1;
595
596
if (PyTuple_Check(args)) {
597
arglen = PyTuple_GET_SIZE(args);
598
argidx = 0;
599
}
600
else {
601
arglen = -1;
602
argidx = -2;
603
}
604
if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
605
!PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
606
!PyByteArray_Check(args)) {
607
dict = args;
608
}
609
610
while (--fmtcnt >= 0) {
611
if (*fmt != '%') {
612
Py_ssize_t len;
613
char *pos;
614
615
pos = (char *)memchr(fmt + 1, '%', fmtcnt);
616
if (pos != NULL)
617
len = pos - fmt;
618
else
619
len = fmtcnt + 1;
620
assert(len != 0);
621
622
memcpy(res, fmt, len);
623
res += len;
624
fmt += len;
625
fmtcnt -= (len - 1);
626
}
627
else {
628
/* Got a format specifier */
629
int flags = 0;
630
Py_ssize_t width = -1;
631
int prec = -1;
632
int c = '\0';
633
int fill;
634
PyObject *v = NULL;
635
PyObject *temp = NULL;
636
const char *pbuf = NULL;
637
int sign;
638
Py_ssize_t len = 0;
639
char onechar; /* For byte_converter() */
640
Py_ssize_t alloc;
641
642
fmt++;
643
if (*fmt == '%') {
644
*res++ = '%';
645
fmt++;
646
fmtcnt--;
647
continue;
648
}
649
if (*fmt == '(') {
650
const char *keystart;
651
Py_ssize_t keylen;
652
PyObject *key;
653
int pcount = 1;
654
655
if (dict == NULL) {
656
PyErr_SetString(PyExc_TypeError,
657
"format requires a mapping");
658
goto error;
659
}
660
++fmt;
661
--fmtcnt;
662
keystart = fmt;
663
/* Skip over balanced parentheses */
664
while (pcount > 0 && --fmtcnt >= 0) {
665
if (*fmt == ')')
666
--pcount;
667
else if (*fmt == '(')
668
++pcount;
669
fmt++;
670
}
671
keylen = fmt - keystart - 1;
672
if (fmtcnt < 0 || pcount > 0) {
673
PyErr_SetString(PyExc_ValueError,
674
"incomplete format key");
675
goto error;
676
}
677
key = PyBytes_FromStringAndSize(keystart,
678
keylen);
679
if (key == NULL)
680
goto error;
681
if (args_owned) {
682
Py_DECREF(args);
683
args_owned = 0;
684
}
685
args = PyObject_GetItem(dict, key);
686
Py_DECREF(key);
687
if (args == NULL) {
688
goto error;
689
}
690
args_owned = 1;
691
arglen = -1;
692
argidx = -2;
693
}
694
695
/* Parse flags. Example: "%+i" => flags=F_SIGN. */
696
while (--fmtcnt >= 0) {
697
switch (c = *fmt++) {
698
case '-': flags |= F_LJUST; continue;
699
case '+': flags |= F_SIGN; continue;
700
case ' ': flags |= F_BLANK; continue;
701
case '#': flags |= F_ALT; continue;
702
case '0': flags |= F_ZERO; continue;
703
}
704
break;
705
}
706
707
/* Parse width. Example: "%10s" => width=10 */
708
if (c == '*') {
709
v = getnextarg(args, arglen, &argidx);
710
if (v == NULL)
711
goto error;
712
if (!PyLong_Check(v)) {
713
PyErr_SetString(PyExc_TypeError,
714
"* wants int");
715
goto error;
716
}
717
width = PyLong_AsSsize_t(v);
718
if (width == -1 && PyErr_Occurred())
719
goto error;
720
if (width < 0) {
721
flags |= F_LJUST;
722
width = -width;
723
}
724
if (--fmtcnt >= 0)
725
c = *fmt++;
726
}
727
else if (c >= 0 && isdigit(c)) {
728
width = c - '0';
729
while (--fmtcnt >= 0) {
730
c = Py_CHARMASK(*fmt++);
731
if (!isdigit(c))
732
break;
733
if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
734
PyErr_SetString(
735
PyExc_ValueError,
736
"width too big");
737
goto error;
738
}
739
width = width*10 + (c - '0');
740
}
741
}
742
743
/* Parse precision. Example: "%.3f" => prec=3 */
744
if (c == '.') {
745
prec = 0;
746
if (--fmtcnt >= 0)
747
c = *fmt++;
748
if (c == '*') {
749
v = getnextarg(args, arglen, &argidx);
750
if (v == NULL)
751
goto error;
752
if (!PyLong_Check(v)) {
753
PyErr_SetString(
754
PyExc_TypeError,
755
"* wants int");
756
goto error;
757
}
758
prec = _PyLong_AsInt(v);
759
if (prec == -1 && PyErr_Occurred())
760
goto error;
761
if (prec < 0)
762
prec = 0;
763
if (--fmtcnt >= 0)
764
c = *fmt++;
765
}
766
else if (c >= 0 && isdigit(c)) {
767
prec = c - '0';
768
while (--fmtcnt >= 0) {
769
c = Py_CHARMASK(*fmt++);
770
if (!isdigit(c))
771
break;
772
if (prec > (INT_MAX - ((int)c - '0')) / 10) {
773
PyErr_SetString(
774
PyExc_ValueError,
775
"prec too big");
776
goto error;
777
}
778
prec = prec*10 + (c - '0');
779
}
780
}
781
} /* prec */
782
if (fmtcnt >= 0) {
783
if (c == 'h' || c == 'l' || c == 'L') {
784
if (--fmtcnt >= 0)
785
c = *fmt++;
786
}
787
}
788
if (fmtcnt < 0) {
789
PyErr_SetString(PyExc_ValueError,
790
"incomplete format");
791
goto error;
792
}
793
v = getnextarg(args, arglen, &argidx);
794
if (v == NULL)
795
goto error;
796
797
if (fmtcnt == 0) {
798
/* last write: disable writer overallocation */
799
writer.overallocate = 0;
800
}
801
802
sign = 0;
803
fill = ' ';
804
switch (c) {
805
case 'r':
806
// %r is only for 2/3 code; 3 only code should use %a
807
case 'a':
808
temp = PyObject_ASCII(v);
809
if (temp == NULL)
810
goto error;
811
assert(PyUnicode_IS_ASCII(temp));
812
pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
813
len = PyUnicode_GET_LENGTH(temp);
814
if (prec >= 0 && len > prec)
815
len = prec;
816
break;
817
818
case 's':
819
// %s is only for 2/3 code; 3 only code should use %b
820
case 'b':
821
temp = format_obj(v, &pbuf, &len);
822
if (temp == NULL)
823
goto error;
824
if (prec >= 0 && len > prec)
825
len = prec;
826
break;
827
828
case 'i':
829
case 'd':
830
case 'u':
831
case 'o':
832
case 'x':
833
case 'X':
834
if (PyLong_CheckExact(v)
835
&& width == -1 && prec == -1
836
&& !(flags & (F_SIGN | F_BLANK))
837
&& c != 'X')
838
{
839
/* Fast path */
840
int alternate = flags & F_ALT;
841
int base;
842
843
switch(c)
844
{
845
default:
846
Py_UNREACHABLE();
847
case 'd':
848
case 'i':
849
case 'u':
850
base = 10;
851
break;
852
case 'o':
853
base = 8;
854
break;
855
case 'x':
856
case 'X':
857
base = 16;
858
break;
859
}
860
861
/* Fast path */
862
writer.min_size -= 2; /* size preallocated for "%d" */
863
res = _PyLong_FormatBytesWriter(&writer, res,
864
v, base, alternate);
865
if (res == NULL)
866
goto error;
867
continue;
868
}
869
870
temp = formatlong(v, flags, prec, c);
871
if (!temp)
872
goto error;
873
assert(PyUnicode_IS_ASCII(temp));
874
pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
875
len = PyUnicode_GET_LENGTH(temp);
876
sign = 1;
877
if (flags & F_ZERO)
878
fill = '0';
879
break;
880
881
case 'e':
882
case 'E':
883
case 'f':
884
case 'F':
885
case 'g':
886
case 'G':
887
if (width == -1 && prec == -1
888
&& !(flags & (F_SIGN | F_BLANK)))
889
{
890
/* Fast path */
891
writer.min_size -= 2; /* size preallocated for "%f" */
892
res = formatfloat(v, flags, prec, c, NULL, &writer, res);
893
if (res == NULL)
894
goto error;
895
continue;
896
}
897
898
if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
899
goto error;
900
pbuf = PyBytes_AS_STRING(temp);
901
len = PyBytes_GET_SIZE(temp);
902
sign = 1;
903
if (flags & F_ZERO)
904
fill = '0';
905
break;
906
907
case 'c':
908
pbuf = &onechar;
909
len = byte_converter(v, &onechar);
910
if (!len)
911
goto error;
912
if (width == -1) {
913
/* Fast path */
914
*res++ = onechar;
915
continue;
916
}
917
break;
918
919
default:
920
PyErr_Format(PyExc_ValueError,
921
"unsupported format character '%c' (0x%x) "
922
"at index %zd",
923
c, c,
924
(Py_ssize_t)(fmt - 1 - format));
925
goto error;
926
}
927
928
if (sign) {
929
if (*pbuf == '-' || *pbuf == '+') {
930
sign = *pbuf++;
931
len--;
932
}
933
else if (flags & F_SIGN)
934
sign = '+';
935
else if (flags & F_BLANK)
936
sign = ' ';
937
else
938
sign = 0;
939
}
940
if (width < len)
941
width = len;
942
943
alloc = width;
944
if (sign != 0 && len == width)
945
alloc++;
946
/* 2: size preallocated for %s */
947
if (alloc > 2) {
948
res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
949
if (res == NULL)
950
goto error;
951
}
952
#ifndef NDEBUG
953
char *before = res;
954
#endif
955
956
/* Write the sign if needed */
957
if (sign) {
958
if (fill != ' ')
959
*res++ = sign;
960
if (width > len)
961
width--;
962
}
963
964
/* Write the numeric prefix for "x", "X" and "o" formats
965
if the alternate form is used.
966
For example, write "0x" for the "%#x" format. */
967
if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
968
assert(pbuf[0] == '0');
969
assert(pbuf[1] == c);
970
if (fill != ' ') {
971
*res++ = *pbuf++;
972
*res++ = *pbuf++;
973
}
974
width -= 2;
975
if (width < 0)
976
width = 0;
977
len -= 2;
978
}
979
980
/* Pad left with the fill character if needed */
981
if (width > len && !(flags & F_LJUST)) {
982
memset(res, fill, width - len);
983
res += (width - len);
984
width = len;
985
}
986
987
/* If padding with spaces: write sign if needed and/or numeric
988
prefix if the alternate form is used */
989
if (fill == ' ') {
990
if (sign)
991
*res++ = sign;
992
if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
993
assert(pbuf[0] == '0');
994
assert(pbuf[1] == c);
995
*res++ = *pbuf++;
996
*res++ = *pbuf++;
997
}
998
}
999
1000
/* Copy bytes */
1001
memcpy(res, pbuf, len);
1002
res += len;
1003
1004
/* Pad right with the fill character if needed */
1005
if (width > len) {
1006
memset(res, ' ', width - len);
1007
res += (width - len);
1008
}
1009
1010
if (dict && (argidx < arglen)) {
1011
PyErr_SetString(PyExc_TypeError,
1012
"not all arguments converted during bytes formatting");
1013
Py_XDECREF(temp);
1014
goto error;
1015
}
1016
Py_XDECREF(temp);
1017
1018
#ifndef NDEBUG
1019
/* check that we computed the exact size for this write */
1020
assert((res - before) == alloc);
1021
#endif
1022
} /* '%' */
1023
1024
/* If overallocation was disabled, ensure that it was the last
1025
write. Otherwise, we missed an optimization */
1026
assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
1027
} /* until end */
1028
1029
if (argidx < arglen && !dict) {
1030
PyErr_SetString(PyExc_TypeError,
1031
"not all arguments converted during bytes formatting");
1032
goto error;
1033
}
1034
1035
if (args_owned) {
1036
Py_DECREF(args);
1037
}
1038
return _PyBytesWriter_Finish(&writer, res);
1039
1040
error:
1041
_PyBytesWriter_Dealloc(&writer);
1042
if (args_owned) {
1043
Py_DECREF(args);
1044
}
1045
return NULL;
1046
}
1047
1048
/* Unescape a backslash-escaped string. */
1049
PyObject *_PyBytes_DecodeEscape(const char *s,
1050
Py_ssize_t len,
1051
const char *errors,
1052
const char **first_invalid_escape)
1053
{
1054
int c;
1055
char *p;
1056
const char *end;
1057
_PyBytesWriter writer;
1058
1059
_PyBytesWriter_Init(&writer);
1060
1061
p = _PyBytesWriter_Alloc(&writer, len);
1062
if (p == NULL)
1063
return NULL;
1064
writer.overallocate = 1;
1065
1066
*first_invalid_escape = NULL;
1067
1068
end = s + len;
1069
while (s < end) {
1070
if (*s != '\\') {
1071
*p++ = *s++;
1072
continue;
1073
}
1074
1075
s++;
1076
if (s == end) {
1077
PyErr_SetString(PyExc_ValueError,
1078
"Trailing \\ in string");
1079
goto failed;
1080
}
1081
1082
switch (*s++) {
1083
/* XXX This assumes ASCII! */
1084
case '\n': break;
1085
case '\\': *p++ = '\\'; break;
1086
case '\'': *p++ = '\''; break;
1087
case '\"': *p++ = '\"'; break;
1088
case 'b': *p++ = '\b'; break;
1089
case 'f': *p++ = '\014'; break; /* FF */
1090
case 't': *p++ = '\t'; break;
1091
case 'n': *p++ = '\n'; break;
1092
case 'r': *p++ = '\r'; break;
1093
case 'v': *p++ = '\013'; break; /* VT */
1094
case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1095
case '0': case '1': case '2': case '3':
1096
case '4': case '5': case '6': case '7':
1097
c = s[-1] - '0';
1098
if (s < end && '0' <= *s && *s <= '7') {
1099
c = (c<<3) + *s++ - '0';
1100
if (s < end && '0' <= *s && *s <= '7')
1101
c = (c<<3) + *s++ - '0';
1102
}
1103
if (c > 0377) {
1104
if (*first_invalid_escape == NULL) {
1105
*first_invalid_escape = s-3; /* Back up 3 chars, since we've
1106
already incremented s. */
1107
}
1108
}
1109
*p++ = c;
1110
break;
1111
case 'x':
1112
if (s+1 < end) {
1113
int digit1, digit2;
1114
digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1115
digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1116
if (digit1 < 16 && digit2 < 16) {
1117
*p++ = (unsigned char)((digit1 << 4) + digit2);
1118
s += 2;
1119
break;
1120
}
1121
}
1122
/* invalid hexadecimal digits */
1123
1124
if (!errors || strcmp(errors, "strict") == 0) {
1125
PyErr_Format(PyExc_ValueError,
1126
"invalid \\x escape at position %zd",
1127
s - 2 - (end - len));
1128
goto failed;
1129
}
1130
if (strcmp(errors, "replace") == 0) {
1131
*p++ = '?';
1132
} else if (strcmp(errors, "ignore") == 0)
1133
/* do nothing */;
1134
else {
1135
PyErr_Format(PyExc_ValueError,
1136
"decoding error; unknown "
1137
"error handling code: %.400s",
1138
errors);
1139
goto failed;
1140
}
1141
/* skip \x */
1142
if (s < end && Py_ISXDIGIT(s[0]))
1143
s++; /* and a hexdigit */
1144
break;
1145
1146
default:
1147
if (*first_invalid_escape == NULL) {
1148
*first_invalid_escape = s-1; /* Back up one char, since we've
1149
already incremented s. */
1150
}
1151
*p++ = '\\';
1152
s--;
1153
}
1154
}
1155
1156
return _PyBytesWriter_Finish(&writer, p);
1157
1158
failed:
1159
_PyBytesWriter_Dealloc(&writer);
1160
return NULL;
1161
}
1162
1163
PyObject *PyBytes_DecodeEscape(const char *s,
1164
Py_ssize_t len,
1165
const char *errors,
1166
Py_ssize_t Py_UNUSED(unicode),
1167
const char *Py_UNUSED(recode_encoding))
1168
{
1169
const char* first_invalid_escape;
1170
PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
1171
&first_invalid_escape);
1172
if (result == NULL)
1173
return NULL;
1174
if (first_invalid_escape != NULL) {
1175
unsigned char c = *first_invalid_escape;
1176
if ('4' <= c && c <= '7') {
1177
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1178
"invalid octal escape sequence '\\%.3s'",
1179
first_invalid_escape) < 0)
1180
{
1181
Py_DECREF(result);
1182
return NULL;
1183
}
1184
}
1185
else {
1186
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1187
"invalid escape sequence '\\%c'",
1188
c) < 0)
1189
{
1190
Py_DECREF(result);
1191
return NULL;
1192
}
1193
}
1194
}
1195
return result;
1196
1197
}
1198
/* -------------------------------------------------------------------- */
1199
/* object api */
1200
1201
Py_ssize_t
1202
PyBytes_Size(PyObject *op)
1203
{
1204
if (!PyBytes_Check(op)) {
1205
PyErr_Format(PyExc_TypeError,
1206
"expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1207
return -1;
1208
}
1209
return Py_SIZE(op);
1210
}
1211
1212
char *
1213
PyBytes_AsString(PyObject *op)
1214
{
1215
if (!PyBytes_Check(op)) {
1216
PyErr_Format(PyExc_TypeError,
1217
"expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1218
return NULL;
1219
}
1220
return ((PyBytesObject *)op)->ob_sval;
1221
}
1222
1223
int
1224
PyBytes_AsStringAndSize(PyObject *obj,
1225
char **s,
1226
Py_ssize_t *len)
1227
{
1228
if (s == NULL) {
1229
PyErr_BadInternalCall();
1230
return -1;
1231
}
1232
1233
if (!PyBytes_Check(obj)) {
1234
PyErr_Format(PyExc_TypeError,
1235
"expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1236
return -1;
1237
}
1238
1239
*s = PyBytes_AS_STRING(obj);
1240
if (len != NULL)
1241
*len = PyBytes_GET_SIZE(obj);
1242
else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1243
PyErr_SetString(PyExc_ValueError,
1244
"embedded null byte");
1245
return -1;
1246
}
1247
return 0;
1248
}
1249
1250
/* -------------------------------------------------------------------- */
1251
/* Methods */
1252
1253
#define STRINGLIB_GET_EMPTY() bytes_get_empty()
1254
1255
#include "stringlib/stringdefs.h"
1256
#define STRINGLIB_MUTABLE 0
1257
1258
#include "stringlib/fastsearch.h"
1259
#include "stringlib/count.h"
1260
#include "stringlib/find.h"
1261
#include "stringlib/join.h"
1262
#include "stringlib/partition.h"
1263
#include "stringlib/split.h"
1264
#include "stringlib/ctype.h"
1265
1266
#include "stringlib/transmogrify.h"
1267
1268
#undef STRINGLIB_GET_EMPTY
1269
1270
Py_ssize_t
1271
_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
1272
const char *needle, Py_ssize_t len_needle,
1273
Py_ssize_t offset)
1274
{
1275
return stringlib_find(haystack, len_haystack,
1276
needle, len_needle, offset);
1277
}
1278
1279
Py_ssize_t
1280
_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
1281
const char *needle, Py_ssize_t len_needle,
1282
Py_ssize_t offset)
1283
{
1284
return stringlib_rfind(haystack, len_haystack,
1285
needle, len_needle, offset);
1286
}
1287
1288
PyObject *
1289
PyBytes_Repr(PyObject *obj, int smartquotes)
1290
{
1291
PyBytesObject* op = (PyBytesObject*) obj;
1292
Py_ssize_t i, length = Py_SIZE(op);
1293
Py_ssize_t newsize, squotes, dquotes;
1294
PyObject *v;
1295
unsigned char quote;
1296
const unsigned char *s;
1297
Py_UCS1 *p;
1298
1299
/* Compute size of output string */
1300
squotes = dquotes = 0;
1301
newsize = 3; /* b'' */
1302
s = (const unsigned char*)op->ob_sval;
1303
for (i = 0; i < length; i++) {
1304
Py_ssize_t incr = 1;
1305
switch(s[i]) {
1306
case '\'': squotes++; break;
1307
case '"': dquotes++; break;
1308
case '\\': case '\t': case '\n': case '\r':
1309
incr = 2; break; /* \C */
1310
default:
1311
if (s[i] < ' ' || s[i] >= 0x7f)
1312
incr = 4; /* \xHH */
1313
}
1314
if (newsize > PY_SSIZE_T_MAX - incr)
1315
goto overflow;
1316
newsize += incr;
1317
}
1318
quote = '\'';
1319
if (smartquotes && squotes && !dquotes)
1320
quote = '"';
1321
if (squotes && quote == '\'') {
1322
if (newsize > PY_SSIZE_T_MAX - squotes)
1323
goto overflow;
1324
newsize += squotes;
1325
}
1326
1327
v = PyUnicode_New(newsize, 127);
1328
if (v == NULL) {
1329
return NULL;
1330
}
1331
p = PyUnicode_1BYTE_DATA(v);
1332
1333
*p++ = 'b', *p++ = quote;
1334
for (i = 0; i < length; i++) {
1335
unsigned char c = op->ob_sval[i];
1336
if (c == quote || c == '\\')
1337
*p++ = '\\', *p++ = c;
1338
else if (c == '\t')
1339
*p++ = '\\', *p++ = 't';
1340
else if (c == '\n')
1341
*p++ = '\\', *p++ = 'n';
1342
else if (c == '\r')
1343
*p++ = '\\', *p++ = 'r';
1344
else if (c < ' ' || c >= 0x7f) {
1345
*p++ = '\\';
1346
*p++ = 'x';
1347
*p++ = Py_hexdigits[(c & 0xf0) >> 4];
1348
*p++ = Py_hexdigits[c & 0xf];
1349
}
1350
else
1351
*p++ = c;
1352
}
1353
*p++ = quote;
1354
assert(_PyUnicode_CheckConsistency(v, 1));
1355
return v;
1356
1357
overflow:
1358
PyErr_SetString(PyExc_OverflowError,
1359
"bytes object is too large to make repr");
1360
return NULL;
1361
}
1362
1363
static PyObject *
1364
bytes_repr(PyObject *op)
1365
{
1366
return PyBytes_Repr(op, 1);
1367
}
1368
1369
static PyObject *
1370
bytes_str(PyObject *op)
1371
{
1372
if (_Py_GetConfig()->bytes_warning) {
1373
if (PyErr_WarnEx(PyExc_BytesWarning,
1374
"str() on a bytes instance", 1)) {
1375
return NULL;
1376
}
1377
}
1378
return bytes_repr(op);
1379
}
1380
1381
static Py_ssize_t
1382
bytes_length(PyBytesObject *a)
1383
{
1384
return Py_SIZE(a);
1385
}
1386
1387
/* This is also used by PyBytes_Concat() */
1388
static PyObject *
1389
bytes_concat(PyObject *a, PyObject *b)
1390
{
1391
Py_buffer va, vb;
1392
PyObject *result = NULL;
1393
1394
va.len = -1;
1395
vb.len = -1;
1396
if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1397
PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1398
PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1399
Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1400
goto done;
1401
}
1402
1403
/* Optimize end cases */
1404
if (va.len == 0 && PyBytes_CheckExact(b)) {
1405
result = Py_NewRef(b);
1406
goto done;
1407
}
1408
if (vb.len == 0 && PyBytes_CheckExact(a)) {
1409
result = Py_NewRef(a);
1410
goto done;
1411
}
1412
1413
if (va.len > PY_SSIZE_T_MAX - vb.len) {
1414
PyErr_NoMemory();
1415
goto done;
1416
}
1417
1418
result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1419
if (result != NULL) {
1420
memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1421
memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1422
}
1423
1424
done:
1425
if (va.len != -1)
1426
PyBuffer_Release(&va);
1427
if (vb.len != -1)
1428
PyBuffer_Release(&vb);
1429
return result;
1430
}
1431
1432
static PyObject *
1433
bytes_repeat(PyBytesObject *a, Py_ssize_t n)
1434
{
1435
Py_ssize_t size;
1436
PyBytesObject *op;
1437
size_t nbytes;
1438
if (n < 0)
1439
n = 0;
1440
/* watch out for overflows: the size can overflow int,
1441
* and the # of bytes needed can overflow size_t
1442
*/
1443
if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1444
PyErr_SetString(PyExc_OverflowError,
1445
"repeated bytes are too long");
1446
return NULL;
1447
}
1448
size = Py_SIZE(a) * n;
1449
if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1450
return Py_NewRef(a);
1451
}
1452
nbytes = (size_t)size;
1453
if (nbytes + PyBytesObject_SIZE <= nbytes) {
1454
PyErr_SetString(PyExc_OverflowError,
1455
"repeated bytes are too long");
1456
return NULL;
1457
}
1458
op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + nbytes);
1459
if (op == NULL) {
1460
return PyErr_NoMemory();
1461
}
1462
_PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
1463
_Py_COMP_DIAG_PUSH
1464
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
1465
op->ob_shash = -1;
1466
_Py_COMP_DIAG_POP
1467
op->ob_sval[size] = '\0';
1468
1469
_PyBytes_Repeat(op->ob_sval, size, a->ob_sval, Py_SIZE(a));
1470
1471
return (PyObject *) op;
1472
}
1473
1474
static int
1475
bytes_contains(PyObject *self, PyObject *arg)
1476
{
1477
return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1478
}
1479
1480
static PyObject *
1481
bytes_item(PyBytesObject *a, Py_ssize_t i)
1482
{
1483
if (i < 0 || i >= Py_SIZE(a)) {
1484
PyErr_SetString(PyExc_IndexError, "index out of range");
1485
return NULL;
1486
}
1487
return _PyLong_FromUnsignedChar((unsigned char)a->ob_sval[i]);
1488
}
1489
1490
static int
1491
bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1492
{
1493
int cmp;
1494
Py_ssize_t len;
1495
1496
len = Py_SIZE(a);
1497
if (Py_SIZE(b) != len)
1498
return 0;
1499
1500
if (a->ob_sval[0] != b->ob_sval[0])
1501
return 0;
1502
1503
cmp = memcmp(a->ob_sval, b->ob_sval, len);
1504
return (cmp == 0);
1505
}
1506
1507
static PyObject*
1508
bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
1509
{
1510
int c;
1511
Py_ssize_t len_a, len_b;
1512
Py_ssize_t min_len;
1513
1514
/* Make sure both arguments are strings. */
1515
if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
1516
if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
1517
if (PyUnicode_Check(a) || PyUnicode_Check(b)) {
1518
if (PyErr_WarnEx(PyExc_BytesWarning,
1519
"Comparison between bytes and string", 1))
1520
return NULL;
1521
}
1522
if (PyLong_Check(a) || PyLong_Check(b)) {
1523
if (PyErr_WarnEx(PyExc_BytesWarning,
1524
"Comparison between bytes and int", 1))
1525
return NULL;
1526
}
1527
}
1528
Py_RETURN_NOTIMPLEMENTED;
1529
}
1530
else if (a == b) {
1531
switch (op) {
1532
case Py_EQ:
1533
case Py_LE:
1534
case Py_GE:
1535
/* a byte string is equal to itself */
1536
Py_RETURN_TRUE;
1537
case Py_NE:
1538
case Py_LT:
1539
case Py_GT:
1540
Py_RETURN_FALSE;
1541
default:
1542
PyErr_BadArgument();
1543
return NULL;
1544
}
1545
}
1546
else if (op == Py_EQ || op == Py_NE) {
1547
int eq = bytes_compare_eq(a, b);
1548
eq ^= (op == Py_NE);
1549
return PyBool_FromLong(eq);
1550
}
1551
else {
1552
len_a = Py_SIZE(a);
1553
len_b = Py_SIZE(b);
1554
min_len = Py_MIN(len_a, len_b);
1555
if (min_len > 0) {
1556
c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1557
if (c == 0)
1558
c = memcmp(a->ob_sval, b->ob_sval, min_len);
1559
}
1560
else
1561
c = 0;
1562
if (c != 0)
1563
Py_RETURN_RICHCOMPARE(c, 0, op);
1564
Py_RETURN_RICHCOMPARE(len_a, len_b, op);
1565
}
1566
}
1567
1568
static Py_hash_t
1569
bytes_hash(PyBytesObject *a)
1570
{
1571
_Py_COMP_DIAG_PUSH
1572
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
1573
if (a->ob_shash == -1) {
1574
/* Can't fail */
1575
a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
1576
}
1577
return a->ob_shash;
1578
_Py_COMP_DIAG_POP
1579
}
1580
1581
static PyObject*
1582
bytes_subscript(PyBytesObject* self, PyObject* item)
1583
{
1584
if (_PyIndex_Check(item)) {
1585
Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1586
if (i == -1 && PyErr_Occurred())
1587
return NULL;
1588
if (i < 0)
1589
i += PyBytes_GET_SIZE(self);
1590
if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1591
PyErr_SetString(PyExc_IndexError,
1592
"index out of range");
1593
return NULL;
1594
}
1595
return _PyLong_FromUnsignedChar((unsigned char)self->ob_sval[i]);
1596
}
1597
else if (PySlice_Check(item)) {
1598
Py_ssize_t start, stop, step, slicelength, i;
1599
size_t cur;
1600
const char* source_buf;
1601
char* result_buf;
1602
PyObject* result;
1603
1604
if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1605
return NULL;
1606
}
1607
slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1608
&stop, step);
1609
1610
if (slicelength <= 0) {
1611
return PyBytes_FromStringAndSize("", 0);
1612
}
1613
else if (start == 0 && step == 1 &&
1614
slicelength == PyBytes_GET_SIZE(self) &&
1615
PyBytes_CheckExact(self)) {
1616
return Py_NewRef(self);
1617
}
1618
else if (step == 1) {
1619
return PyBytes_FromStringAndSize(
1620
PyBytes_AS_STRING(self) + start,
1621
slicelength);
1622
}
1623
else {
1624
source_buf = PyBytes_AS_STRING(self);
1625
result = PyBytes_FromStringAndSize(NULL, slicelength);
1626
if (result == NULL)
1627
return NULL;
1628
1629
result_buf = PyBytes_AS_STRING(result);
1630
for (cur = start, i = 0; i < slicelength;
1631
cur += step, i++) {
1632
result_buf[i] = source_buf[cur];
1633
}
1634
1635
return result;
1636
}
1637
}
1638
else {
1639
PyErr_Format(PyExc_TypeError,
1640
"byte indices must be integers or slices, not %.200s",
1641
Py_TYPE(item)->tp_name);
1642
return NULL;
1643
}
1644
}
1645
1646
static int
1647
bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
1648
{
1649
return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1650
1, flags);
1651
}
1652
1653
static PySequenceMethods bytes_as_sequence = {
1654
(lenfunc)bytes_length, /*sq_length*/
1655
(binaryfunc)bytes_concat, /*sq_concat*/
1656
(ssizeargfunc)bytes_repeat, /*sq_repeat*/
1657
(ssizeargfunc)bytes_item, /*sq_item*/
1658
0, /*sq_slice*/
1659
0, /*sq_ass_item*/
1660
0, /*sq_ass_slice*/
1661
(objobjproc)bytes_contains /*sq_contains*/
1662
};
1663
1664
static PyMappingMethods bytes_as_mapping = {
1665
(lenfunc)bytes_length,
1666
(binaryfunc)bytes_subscript,
1667
0,
1668
};
1669
1670
static PyBufferProcs bytes_as_buffer = {
1671
(getbufferproc)bytes_buffer_getbuffer,
1672
NULL,
1673
};
1674
1675
1676
/*[clinic input]
1677
bytes.__bytes__
1678
Convert this value to exact type bytes.
1679
[clinic start generated code]*/
1680
1681
static PyObject *
1682
bytes___bytes___impl(PyBytesObject *self)
1683
/*[clinic end generated code: output=63a306a9bc0caac5 input=34ec5ddba98bd6bb]*/
1684
{
1685
if (PyBytes_CheckExact(self)) {
1686
return Py_NewRef(self);
1687
}
1688
else {
1689
return PyBytes_FromStringAndSize(self->ob_sval, Py_SIZE(self));
1690
}
1691
}
1692
1693
1694
#define LEFTSTRIP 0
1695
#define RIGHTSTRIP 1
1696
#define BOTHSTRIP 2
1697
1698
/*[clinic input]
1699
bytes.split
1700
1701
sep: object = None
1702
The delimiter according which to split the bytes.
1703
None (the default value) means split on ASCII whitespace characters
1704
(space, tab, return, newline, formfeed, vertical tab).
1705
maxsplit: Py_ssize_t = -1
1706
Maximum number of splits to do.
1707
-1 (the default value) means no limit.
1708
1709
Return a list of the sections in the bytes, using sep as the delimiter.
1710
[clinic start generated code]*/
1711
1712
static PyObject *
1713
bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1714
/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1715
{
1716
Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1717
const char *s = PyBytes_AS_STRING(self), *sub;
1718
Py_buffer vsub;
1719
PyObject *list;
1720
1721
if (maxsplit < 0)
1722
maxsplit = PY_SSIZE_T_MAX;
1723
if (sep == Py_None)
1724
return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1725
if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1726
return NULL;
1727
sub = vsub.buf;
1728
n = vsub.len;
1729
1730
list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1731
PyBuffer_Release(&vsub);
1732
return list;
1733
}
1734
1735
/*[clinic input]
1736
bytes.partition
1737
1738
sep: Py_buffer
1739
/
1740
1741
Partition the bytes into three parts using the given separator.
1742
1743
This will search for the separator sep in the bytes. If the separator is found,
1744
returns a 3-tuple containing the part before the separator, the separator
1745
itself, and the part after it.
1746
1747
If the separator is not found, returns a 3-tuple containing the original bytes
1748
object and two empty bytes objects.
1749
[clinic start generated code]*/
1750
1751
static PyObject *
1752
bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1753
/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
1754
{
1755
return stringlib_partition(
1756
(PyObject*) self,
1757
PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1758
sep->obj, (const char *)sep->buf, sep->len
1759
);
1760
}
1761
1762
/*[clinic input]
1763
bytes.rpartition
1764
1765
sep: Py_buffer
1766
/
1767
1768
Partition the bytes into three parts using the given separator.
1769
1770
This will search for the separator sep in the bytes, starting at the end. If
1771
the separator is found, returns a 3-tuple containing the part before the
1772
separator, the separator itself, and the part after it.
1773
1774
If the separator is not found, returns a 3-tuple containing two empty bytes
1775
objects and the original bytes object.
1776
[clinic start generated code]*/
1777
1778
static PyObject *
1779
bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1780
/*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
1781
{
1782
return stringlib_rpartition(
1783
(PyObject*) self,
1784
PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1785
sep->obj, (const char *)sep->buf, sep->len
1786
);
1787
}
1788
1789
/*[clinic input]
1790
bytes.rsplit = bytes.split
1791
1792
Return a list of the sections in the bytes, using sep as the delimiter.
1793
1794
Splitting is done starting at the end of the bytes and working to the front.
1795
[clinic start generated code]*/
1796
1797
static PyObject *
1798
bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1799
/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
1800
{
1801
Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1802
const char *s = PyBytes_AS_STRING(self), *sub;
1803
Py_buffer vsub;
1804
PyObject *list;
1805
1806
if (maxsplit < 0)
1807
maxsplit = PY_SSIZE_T_MAX;
1808
if (sep == Py_None)
1809
return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1810
if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1811
return NULL;
1812
sub = vsub.buf;
1813
n = vsub.len;
1814
1815
list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1816
PyBuffer_Release(&vsub);
1817
return list;
1818
}
1819
1820
1821
/*[clinic input]
1822
bytes.join
1823
1824
iterable_of_bytes: object
1825
/
1826
1827
Concatenate any number of bytes objects.
1828
1829
The bytes whose method is called is inserted in between each pair.
1830
1831
The result is returned as a new bytes object.
1832
1833
Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1834
[clinic start generated code]*/
1835
1836
static PyObject *
1837
bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1838
/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
1839
{
1840
return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1841
}
1842
1843
PyObject *
1844
_PyBytes_Join(PyObject *sep, PyObject *x)
1845
{
1846
assert(sep != NULL && PyBytes_Check(sep));
1847
assert(x != NULL);
1848
return bytes_join((PyBytesObject*)sep, x);
1849
}
1850
1851
static PyObject *
1852
bytes_find(PyBytesObject *self, PyObject *args)
1853
{
1854
return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1855
}
1856
1857
static PyObject *
1858
bytes_index(PyBytesObject *self, PyObject *args)
1859
{
1860
return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1861
}
1862
1863
1864
static PyObject *
1865
bytes_rfind(PyBytesObject *self, PyObject *args)
1866
{
1867
return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1868
}
1869
1870
1871
static PyObject *
1872
bytes_rindex(PyBytesObject *self, PyObject *args)
1873
{
1874
return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1875
}
1876
1877
1878
Py_LOCAL_INLINE(PyObject *)
1879
do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
1880
{
1881
Py_buffer vsep;
1882
const char *s = PyBytes_AS_STRING(self);
1883
Py_ssize_t len = PyBytes_GET_SIZE(self);
1884
char *sep;
1885
Py_ssize_t seplen;
1886
Py_ssize_t i, j;
1887
1888
if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
1889
return NULL;
1890
sep = vsep.buf;
1891
seplen = vsep.len;
1892
1893
i = 0;
1894
if (striptype != RIGHTSTRIP) {
1895
while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1896
i++;
1897
}
1898
}
1899
1900
j = len;
1901
if (striptype != LEFTSTRIP) {
1902
do {
1903
j--;
1904
} while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1905
j++;
1906
}
1907
1908
PyBuffer_Release(&vsep);
1909
1910
if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1911
return Py_NewRef(self);
1912
}
1913
else
1914
return PyBytes_FromStringAndSize(s+i, j-i);
1915
}
1916
1917
1918
Py_LOCAL_INLINE(PyObject *)
1919
do_strip(PyBytesObject *self, int striptype)
1920
{
1921
const char *s = PyBytes_AS_STRING(self);
1922
Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1923
1924
i = 0;
1925
if (striptype != RIGHTSTRIP) {
1926
while (i < len && Py_ISSPACE(s[i])) {
1927
i++;
1928
}
1929
}
1930
1931
j = len;
1932
if (striptype != LEFTSTRIP) {
1933
do {
1934
j--;
1935
} while (j >= i && Py_ISSPACE(s[j]));
1936
j++;
1937
}
1938
1939
if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1940
return Py_NewRef(self);
1941
}
1942
else
1943
return PyBytes_FromStringAndSize(s+i, j-i);
1944
}
1945
1946
1947
Py_LOCAL_INLINE(PyObject *)
1948
do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
1949
{
1950
if (bytes != Py_None) {
1951
return do_xstrip(self, striptype, bytes);
1952
}
1953
return do_strip(self, striptype);
1954
}
1955
1956
/*[clinic input]
1957
bytes.strip
1958
1959
bytes: object = None
1960
/
1961
1962
Strip leading and trailing bytes contained in the argument.
1963
1964
If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1965
[clinic start generated code]*/
1966
1967
static PyObject *
1968
bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
1969
/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
1970
{
1971
return do_argstrip(self, BOTHSTRIP, bytes);
1972
}
1973
1974
/*[clinic input]
1975
bytes.lstrip
1976
1977
bytes: object = None
1978
/
1979
1980
Strip leading bytes contained in the argument.
1981
1982
If the argument is omitted or None, strip leading ASCII whitespace.
1983
[clinic start generated code]*/
1984
1985
static PyObject *
1986
bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
1987
/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
1988
{
1989
return do_argstrip(self, LEFTSTRIP, bytes);
1990
}
1991
1992
/*[clinic input]
1993
bytes.rstrip
1994
1995
bytes: object = None
1996
/
1997
1998
Strip trailing bytes contained in the argument.
1999
2000
If the argument is omitted or None, strip trailing ASCII whitespace.
2001
[clinic start generated code]*/
2002
2003
static PyObject *
2004
bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2005
/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2006
{
2007
return do_argstrip(self, RIGHTSTRIP, bytes);
2008
}
2009
2010
2011
static PyObject *
2012
bytes_count(PyBytesObject *self, PyObject *args)
2013
{
2014
return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2015
}
2016
2017
2018
/*[clinic input]
2019
bytes.translate
2020
2021
table: object
2022
Translation table, which must be a bytes object of length 256.
2023
/
2024
delete as deletechars: object(c_default="NULL") = b''
2025
2026
Return a copy with each character mapped by the given translation table.
2027
2028
All characters occurring in the optional argument delete are removed.
2029
The remaining characters are mapped through the given translation table.
2030
[clinic start generated code]*/
2031
2032
static PyObject *
2033
bytes_translate_impl(PyBytesObject *self, PyObject *table,
2034
PyObject *deletechars)
2035
/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2036
{
2037
const char *input;
2038
char *output;
2039
Py_buffer table_view = {NULL, NULL};
2040
Py_buffer del_table_view = {NULL, NULL};
2041
const char *table_chars;
2042
Py_ssize_t i, c, changed = 0;
2043
PyObject *input_obj = (PyObject*)self;
2044
const char *output_start, *del_table_chars=NULL;
2045
Py_ssize_t inlen, tablen, dellen = 0;
2046
PyObject *result;
2047
int trans_table[256];
2048
2049
if (PyBytes_Check(table)) {
2050
table_chars = PyBytes_AS_STRING(table);
2051
tablen = PyBytes_GET_SIZE(table);
2052
}
2053
else if (table == Py_None) {
2054
table_chars = NULL;
2055
tablen = 256;
2056
}
2057
else {
2058
if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2059
return NULL;
2060
table_chars = table_view.buf;
2061
tablen = table_view.len;
2062
}
2063
2064
if (tablen != 256) {
2065
PyErr_SetString(PyExc_ValueError,
2066
"translation table must be 256 characters long");
2067
PyBuffer_Release(&table_view);
2068
return NULL;
2069
}
2070
2071
if (deletechars != NULL) {
2072
if (PyBytes_Check(deletechars)) {
2073
del_table_chars = PyBytes_AS_STRING(deletechars);
2074
dellen = PyBytes_GET_SIZE(deletechars);
2075
}
2076
else {
2077
if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2078
PyBuffer_Release(&table_view);
2079
return NULL;
2080
}
2081
del_table_chars = del_table_view.buf;
2082
dellen = del_table_view.len;
2083
}
2084
}
2085
else {
2086
del_table_chars = NULL;
2087
dellen = 0;
2088
}
2089
2090
inlen = PyBytes_GET_SIZE(input_obj);
2091
result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2092
if (result == NULL) {
2093
PyBuffer_Release(&del_table_view);
2094
PyBuffer_Release(&table_view);
2095
return NULL;
2096
}
2097
output_start = output = PyBytes_AS_STRING(result);
2098
input = PyBytes_AS_STRING(input_obj);
2099
2100
if (dellen == 0 && table_chars != NULL) {
2101
/* If no deletions are required, use faster code */
2102
for (i = inlen; --i >= 0; ) {
2103
c = Py_CHARMASK(*input++);
2104
if (Py_CHARMASK((*output++ = table_chars[c])) != c)
2105
changed = 1;
2106
}
2107
if (!changed && PyBytes_CheckExact(input_obj)) {
2108
Py_SETREF(result, Py_NewRef(input_obj));
2109
}
2110
PyBuffer_Release(&del_table_view);
2111
PyBuffer_Release(&table_view);
2112
return result;
2113
}
2114
2115
if (table_chars == NULL) {
2116
for (i = 0; i < 256; i++)
2117
trans_table[i] = Py_CHARMASK(i);
2118
} else {
2119
for (i = 0; i < 256; i++)
2120
trans_table[i] = Py_CHARMASK(table_chars[i]);
2121
}
2122
PyBuffer_Release(&table_view);
2123
2124
for (i = 0; i < dellen; i++)
2125
trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2126
PyBuffer_Release(&del_table_view);
2127
2128
for (i = inlen; --i >= 0; ) {
2129
c = Py_CHARMASK(*input++);
2130
if (trans_table[c] != -1)
2131
if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2132
continue;
2133
changed = 1;
2134
}
2135
if (!changed && PyBytes_CheckExact(input_obj)) {
2136
Py_DECREF(result);
2137
return Py_NewRef(input_obj);
2138
}
2139
/* Fix the size of the resulting byte string */
2140
if (inlen > 0)
2141
_PyBytes_Resize(&result, output - output_start);
2142
return result;
2143
}
2144
2145
2146
/*[clinic input]
2147
2148
@staticmethod
2149
bytes.maketrans
2150
2151
frm: Py_buffer
2152
to: Py_buffer
2153
/
2154
2155
Return a translation table usable for the bytes or bytearray translate method.
2156
2157
The returned table will be one where each byte in frm is mapped to the byte at
2158
the same position in to.
2159
2160
The bytes objects frm and to must be of the same length.
2161
[clinic start generated code]*/
2162
2163
static PyObject *
2164
bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2165
/*[clinic end generated code: output=a36f6399d4b77f6f input=a3bd00d430a0979f]*/
2166
{
2167
return _Py_bytes_maketrans(frm, to);
2168
}
2169
2170
2171
/*[clinic input]
2172
bytes.replace
2173
2174
old: Py_buffer
2175
new: Py_buffer
2176
count: Py_ssize_t = -1
2177
Maximum number of occurrences to replace.
2178
-1 (the default value) means replace all occurrences.
2179
/
2180
2181
Return a copy with all occurrences of substring old replaced by new.
2182
2183
If the optional argument count is given, only the first count occurrences are
2184
replaced.
2185
[clinic start generated code]*/
2186
2187
static PyObject *
2188
bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2189
Py_ssize_t count)
2190
/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
2191
{
2192
return stringlib_replace((PyObject *)self,
2193
(const char *)old->buf, old->len,
2194
(const char *)new->buf, new->len, count);
2195
}
2196
2197
/** End DALKE **/
2198
2199
/*[clinic input]
2200
bytes.removeprefix as bytes_removeprefix
2201
2202
prefix: Py_buffer
2203
/
2204
2205
Return a bytes object with the given prefix string removed if present.
2206
2207
If the bytes starts with the prefix string, return bytes[len(prefix):].
2208
Otherwise, return a copy of the original bytes.
2209
[clinic start generated code]*/
2210
2211
static PyObject *
2212
bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix)
2213
/*[clinic end generated code: output=f006865331a06ab6 input=0c93bac817a8502c]*/
2214
{
2215
const char *self_start = PyBytes_AS_STRING(self);
2216
Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2217
const char *prefix_start = prefix->buf;
2218
Py_ssize_t prefix_len = prefix->len;
2219
2220
if (self_len >= prefix_len
2221
&& prefix_len > 0
2222
&& memcmp(self_start, prefix_start, prefix_len) == 0)
2223
{
2224
return PyBytes_FromStringAndSize(self_start + prefix_len,
2225
self_len - prefix_len);
2226
}
2227
2228
if (PyBytes_CheckExact(self)) {
2229
return Py_NewRef(self);
2230
}
2231
2232
return PyBytes_FromStringAndSize(self_start, self_len);
2233
}
2234
2235
/*[clinic input]
2236
bytes.removesuffix as bytes_removesuffix
2237
2238
suffix: Py_buffer
2239
/
2240
2241
Return a bytes object with the given suffix string removed if present.
2242
2243
If the bytes ends with the suffix string and that suffix is not empty,
2244
return bytes[:-len(prefix)]. Otherwise, return a copy of the original
2245
bytes.
2246
[clinic start generated code]*/
2247
2248
static PyObject *
2249
bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
2250
/*[clinic end generated code: output=d887d308e3242eeb input=9f4e1da8c637bbf1]*/
2251
{
2252
const char *self_start = PyBytes_AS_STRING(self);
2253
Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2254
const char *suffix_start = suffix->buf;
2255
Py_ssize_t suffix_len = suffix->len;
2256
2257
if (self_len >= suffix_len
2258
&& suffix_len > 0
2259
&& memcmp(self_start + self_len - suffix_len,
2260
suffix_start, suffix_len) == 0)
2261
{
2262
return PyBytes_FromStringAndSize(self_start,
2263
self_len - suffix_len);
2264
}
2265
2266
if (PyBytes_CheckExact(self)) {
2267
return Py_NewRef(self);
2268
}
2269
2270
return PyBytes_FromStringAndSize(self_start, self_len);
2271
}
2272
2273
static PyObject *
2274
bytes_startswith(PyBytesObject *self, PyObject *args)
2275
{
2276
return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2277
}
2278
2279
static PyObject *
2280
bytes_endswith(PyBytesObject *self, PyObject *args)
2281
{
2282
return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2283
}
2284
2285
2286
/*[clinic input]
2287
bytes.decode
2288
2289
encoding: str(c_default="NULL") = 'utf-8'
2290
The encoding with which to decode the bytes.
2291
errors: str(c_default="NULL") = 'strict'
2292
The error handling scheme to use for the handling of decoding errors.
2293
The default is 'strict' meaning that decoding errors raise a
2294
UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2295
as well as any other name registered with codecs.register_error that
2296
can handle UnicodeDecodeErrors.
2297
2298
Decode the bytes using the codec registered for encoding.
2299
[clinic start generated code]*/
2300
2301
static PyObject *
2302
bytes_decode_impl(PyBytesObject *self, const char *encoding,
2303
const char *errors)
2304
/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2305
{
2306
return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2307
}
2308
2309
2310
/*[clinic input]
2311
bytes.splitlines
2312
2313
keepends: bool = False
2314
2315
Return a list of the lines in the bytes, breaking at line boundaries.
2316
2317
Line breaks are not included in the resulting list unless keepends is given and
2318
true.
2319
[clinic start generated code]*/
2320
2321
static PyObject *
2322
bytes_splitlines_impl(PyBytesObject *self, int keepends)
2323
/*[clinic end generated code: output=3484149a5d880ffb input=5d7b898af2fe55c0]*/
2324
{
2325
return stringlib_splitlines(
2326
(PyObject*) self, PyBytes_AS_STRING(self),
2327
PyBytes_GET_SIZE(self), keepends
2328
);
2329
}
2330
2331
/*[clinic input]
2332
@classmethod
2333
bytes.fromhex
2334
2335
string: unicode
2336
/
2337
2338
Create a bytes object from a string of hexadecimal numbers.
2339
2340
Spaces between two numbers are accepted.
2341
Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2342
[clinic start generated code]*/
2343
2344
static PyObject *
2345
bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2346
/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
2347
{
2348
PyObject *result = _PyBytes_FromHex(string, 0);
2349
if (type != &PyBytes_Type && result != NULL) {
2350
Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
2351
}
2352
return result;
2353
}
2354
2355
PyObject*
2356
_PyBytes_FromHex(PyObject *string, int use_bytearray)
2357
{
2358
char *buf;
2359
Py_ssize_t hexlen, invalid_char;
2360
unsigned int top, bot;
2361
const Py_UCS1 *str, *end;
2362
_PyBytesWriter writer;
2363
2364
_PyBytesWriter_Init(&writer);
2365
writer.use_bytearray = use_bytearray;
2366
2367
assert(PyUnicode_Check(string));
2368
hexlen = PyUnicode_GET_LENGTH(string);
2369
2370
if (!PyUnicode_IS_ASCII(string)) {
2371
const void *data = PyUnicode_DATA(string);
2372
int kind = PyUnicode_KIND(string);
2373
Py_ssize_t i;
2374
2375
/* search for the first non-ASCII character */
2376
for (i = 0; i < hexlen; i++) {
2377
if (PyUnicode_READ(kind, data, i) >= 128)
2378
break;
2379
}
2380
invalid_char = i;
2381
goto error;
2382
}
2383
2384
assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2385
str = PyUnicode_1BYTE_DATA(string);
2386
2387
/* This overestimates if there are spaces */
2388
buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2389
if (buf == NULL)
2390
return NULL;
2391
2392
end = str + hexlen;
2393
while (str < end) {
2394
/* skip over spaces in the input */
2395
if (Py_ISSPACE(*str)) {
2396
do {
2397
str++;
2398
} while (Py_ISSPACE(*str));
2399
if (str >= end)
2400
break;
2401
}
2402
2403
top = _PyLong_DigitValue[*str];
2404
if (top >= 16) {
2405
invalid_char = str - PyUnicode_1BYTE_DATA(string);
2406
goto error;
2407
}
2408
str++;
2409
2410
bot = _PyLong_DigitValue[*str];
2411
if (bot >= 16) {
2412
invalid_char = str - PyUnicode_1BYTE_DATA(string);
2413
goto error;
2414
}
2415
str++;
2416
2417
*buf++ = (unsigned char)((top << 4) + bot);
2418
}
2419
2420
return _PyBytesWriter_Finish(&writer, buf);
2421
2422
error:
2423
PyErr_Format(PyExc_ValueError,
2424
"non-hexadecimal number found in "
2425
"fromhex() arg at position %zd", invalid_char);
2426
_PyBytesWriter_Dealloc(&writer);
2427
return NULL;
2428
}
2429
2430
/*[clinic input]
2431
bytes.hex
2432
2433
sep: object = NULL
2434
An optional single character or byte to separate hex bytes.
2435
bytes_per_sep: int = 1
2436
How many bytes between separators. Positive values count from the
2437
right, negative values count from the left.
2438
2439
Create a string of hexadecimal numbers from a bytes object.
2440
2441
Example:
2442
>>> value = b'\xb9\x01\xef'
2443
>>> value.hex()
2444
'b901ef'
2445
>>> value.hex(':')
2446
'b9:01:ef'
2447
>>> value.hex(':', 2)
2448
'b9:01ef'
2449
>>> value.hex(':', -2)
2450
'b901:ef'
2451
[clinic start generated code]*/
2452
2453
static PyObject *
2454
bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2455
/*[clinic end generated code: output=1f134da504064139 input=1a21282b1f1ae595]*/
2456
{
2457
const char *argbuf = PyBytes_AS_STRING(self);
2458
Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2459
return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
2460
}
2461
2462
static PyObject *
2463
bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
2464
{
2465
return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2466
}
2467
2468
2469
static PyMethodDef
2470
bytes_methods[] = {
2471
{"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2472
BYTES___BYTES___METHODDEF
2473
{"capitalize", stringlib_capitalize, METH_NOARGS,
2474
_Py_capitalize__doc__},
2475
STRINGLIB_CENTER_METHODDEF
2476
{"count", (PyCFunction)bytes_count, METH_VARARGS,
2477
_Py_count__doc__},
2478
BYTES_DECODE_METHODDEF
2479
{"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2480
_Py_endswith__doc__},
2481
STRINGLIB_EXPANDTABS_METHODDEF
2482
{"find", (PyCFunction)bytes_find, METH_VARARGS,
2483
_Py_find__doc__},
2484
BYTES_FROMHEX_METHODDEF
2485
BYTES_HEX_METHODDEF
2486
{"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
2487
{"isalnum", stringlib_isalnum, METH_NOARGS,
2488
_Py_isalnum__doc__},
2489
{"isalpha", stringlib_isalpha, METH_NOARGS,
2490
_Py_isalpha__doc__},
2491
{"isascii", stringlib_isascii, METH_NOARGS,
2492
_Py_isascii__doc__},
2493
{"isdigit", stringlib_isdigit, METH_NOARGS,
2494
_Py_isdigit__doc__},
2495
{"islower", stringlib_islower, METH_NOARGS,
2496
_Py_islower__doc__},
2497
{"isspace", stringlib_isspace, METH_NOARGS,
2498
_Py_isspace__doc__},
2499
{"istitle", stringlib_istitle, METH_NOARGS,
2500
_Py_istitle__doc__},
2501
{"isupper", stringlib_isupper, METH_NOARGS,
2502
_Py_isupper__doc__},
2503
BYTES_JOIN_METHODDEF
2504
STRINGLIB_LJUST_METHODDEF
2505
{"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2506
BYTES_LSTRIP_METHODDEF
2507
BYTES_MAKETRANS_METHODDEF
2508
BYTES_PARTITION_METHODDEF
2509
BYTES_REPLACE_METHODDEF
2510
BYTES_REMOVEPREFIX_METHODDEF
2511
BYTES_REMOVESUFFIX_METHODDEF
2512
{"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2513
{"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
2514
STRINGLIB_RJUST_METHODDEF
2515
BYTES_RPARTITION_METHODDEF
2516
BYTES_RSPLIT_METHODDEF
2517
BYTES_RSTRIP_METHODDEF
2518
BYTES_SPLIT_METHODDEF
2519
BYTES_SPLITLINES_METHODDEF
2520
{"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2521
_Py_startswith__doc__},
2522
BYTES_STRIP_METHODDEF
2523
{"swapcase", stringlib_swapcase, METH_NOARGS,
2524
_Py_swapcase__doc__},
2525
{"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
2526
BYTES_TRANSLATE_METHODDEF
2527
{"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2528
STRINGLIB_ZFILL_METHODDEF
2529
{NULL, NULL} /* sentinel */
2530
};
2531
2532
static PyObject *
2533
bytes_mod(PyObject *self, PyObject *arg)
2534
{
2535
if (!PyBytes_Check(self)) {
2536
Py_RETURN_NOTIMPLEMENTED;
2537
}
2538
return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2539
arg, 0);
2540
}
2541
2542
static PyNumberMethods bytes_as_number = {
2543
0, /*nb_add*/
2544
0, /*nb_subtract*/
2545
0, /*nb_multiply*/
2546
bytes_mod, /*nb_remainder*/
2547
};
2548
2549
static PyObject *
2550
bytes_subtype_new(PyTypeObject *, PyObject *);
2551
2552
/*[clinic input]
2553
@classmethod
2554
bytes.__new__ as bytes_new
2555
2556
source as x: object = NULL
2557
encoding: str = NULL
2558
errors: str = NULL
2559
2560
[clinic start generated code]*/
2561
2562
static PyObject *
2563
bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
2564
const char *errors)
2565
/*[clinic end generated code: output=1e0c471be311a425 input=f0a966d19b7262b4]*/
2566
{
2567
PyObject *bytes;
2568
PyObject *func;
2569
Py_ssize_t size;
2570
2571
if (x == NULL) {
2572
if (encoding != NULL || errors != NULL) {
2573
PyErr_SetString(PyExc_TypeError,
2574
encoding != NULL ?
2575
"encoding without a string argument" :
2576
"errors without a string argument");
2577
return NULL;
2578
}
2579
bytes = PyBytes_FromStringAndSize(NULL, 0);
2580
}
2581
else if (encoding != NULL) {
2582
/* Encode via the codec registry */
2583
if (!PyUnicode_Check(x)) {
2584
PyErr_SetString(PyExc_TypeError,
2585
"encoding without a string argument");
2586
return NULL;
2587
}
2588
bytes = PyUnicode_AsEncodedString(x, encoding, errors);
2589
}
2590
else if (errors != NULL) {
2591
PyErr_SetString(PyExc_TypeError,
2592
PyUnicode_Check(x) ?
2593
"string argument without an encoding" :
2594
"errors without a string argument");
2595
return NULL;
2596
}
2597
/* We'd like to call PyObject_Bytes here, but we need to check for an
2598
integer argument before deferring to PyBytes_FromObject, something
2599
PyObject_Bytes doesn't do. */
2600
else if ((func = _PyObject_LookupSpecial(x, &_Py_ID(__bytes__))) != NULL) {
2601
bytes = _PyObject_CallNoArgs(func);
2602
Py_DECREF(func);
2603
if (bytes == NULL)
2604
return NULL;
2605
if (!PyBytes_Check(bytes)) {
2606
PyErr_Format(PyExc_TypeError,
2607
"__bytes__ returned non-bytes (type %.200s)",
2608
Py_TYPE(bytes)->tp_name);
2609
Py_DECREF(bytes);
2610
return NULL;
2611
}
2612
}
2613
else if (PyErr_Occurred())
2614
return NULL;
2615
else if (PyUnicode_Check(x)) {
2616
PyErr_SetString(PyExc_TypeError,
2617
"string argument without an encoding");
2618
return NULL;
2619
}
2620
/* Is it an integer? */
2621
else if (_PyIndex_Check(x)) {
2622
size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2623
if (size == -1 && PyErr_Occurred()) {
2624
if (!PyErr_ExceptionMatches(PyExc_TypeError))
2625
return NULL;
2626
PyErr_Clear(); /* fall through */
2627
bytes = PyBytes_FromObject(x);
2628
}
2629
else {
2630
if (size < 0) {
2631
PyErr_SetString(PyExc_ValueError, "negative count");
2632
return NULL;
2633
}
2634
bytes = _PyBytes_FromSize(size, 1);
2635
}
2636
}
2637
else {
2638
bytes = PyBytes_FromObject(x);
2639
}
2640
2641
if (bytes != NULL && type != &PyBytes_Type) {
2642
Py_SETREF(bytes, bytes_subtype_new(type, bytes));
2643
}
2644
2645
return bytes;
2646
}
2647
2648
static PyObject*
2649
_PyBytes_FromBuffer(PyObject *x)
2650
{
2651
PyObject *new;
2652
Py_buffer view;
2653
2654
if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2655
return NULL;
2656
2657
new = PyBytes_FromStringAndSize(NULL, view.len);
2658
if (!new)
2659
goto fail;
2660
if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2661
&view, view.len, 'C') < 0)
2662
goto fail;
2663
PyBuffer_Release(&view);
2664
return new;
2665
2666
fail:
2667
Py_XDECREF(new);
2668
PyBuffer_Release(&view);
2669
return NULL;
2670
}
2671
2672
static PyObject*
2673
_PyBytes_FromList(PyObject *x)
2674
{
2675
Py_ssize_t i, size = PyList_GET_SIZE(x);
2676
Py_ssize_t value;
2677
char *str;
2678
PyObject *item;
2679
_PyBytesWriter writer;
2680
2681
_PyBytesWriter_Init(&writer);
2682
str = _PyBytesWriter_Alloc(&writer, size);
2683
if (str == NULL)
2684
return NULL;
2685
writer.overallocate = 1;
2686
size = writer.allocated;
2687
2688
for (i = 0; i < PyList_GET_SIZE(x); i++) {
2689
item = PyList_GET_ITEM(x, i);
2690
Py_INCREF(item);
2691
value = PyNumber_AsSsize_t(item, NULL);
2692
Py_DECREF(item);
2693
if (value == -1 && PyErr_Occurred())
2694
goto error;
2695
2696
if (value < 0 || value >= 256) {
2697
PyErr_SetString(PyExc_ValueError,
2698
"bytes must be in range(0, 256)");
2699
goto error;
2700
}
2701
2702
if (i >= size) {
2703
str = _PyBytesWriter_Resize(&writer, str, size+1);
2704
if (str == NULL)
2705
return NULL;
2706
size = writer.allocated;
2707
}
2708
*str++ = (char) value;
2709
}
2710
return _PyBytesWriter_Finish(&writer, str);
2711
2712
error:
2713
_PyBytesWriter_Dealloc(&writer);
2714
return NULL;
2715
}
2716
2717
static PyObject*
2718
_PyBytes_FromTuple(PyObject *x)
2719
{
2720
PyObject *bytes;
2721
Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2722
Py_ssize_t value;
2723
char *str;
2724
PyObject *item;
2725
2726
bytes = PyBytes_FromStringAndSize(NULL, size);
2727
if (bytes == NULL)
2728
return NULL;
2729
str = ((PyBytesObject *)bytes)->ob_sval;
2730
2731
for (i = 0; i < size; i++) {
2732
item = PyTuple_GET_ITEM(x, i);
2733
value = PyNumber_AsSsize_t(item, NULL);
2734
if (value == -1 && PyErr_Occurred())
2735
goto error;
2736
2737
if (value < 0 || value >= 256) {
2738
PyErr_SetString(PyExc_ValueError,
2739
"bytes must be in range(0, 256)");
2740
goto error;
2741
}
2742
*str++ = (char) value;
2743
}
2744
return bytes;
2745
2746
error:
2747
Py_DECREF(bytes);
2748
return NULL;
2749
}
2750
2751
static PyObject *
2752
_PyBytes_FromIterator(PyObject *it, PyObject *x)
2753
{
2754
char *str;
2755
Py_ssize_t i, size;
2756
_PyBytesWriter writer;
2757
2758
/* For iterator version, create a bytes object and resize as needed */
2759
size = PyObject_LengthHint(x, 64);
2760
if (size == -1 && PyErr_Occurred())
2761
return NULL;
2762
2763
_PyBytesWriter_Init(&writer);
2764
str = _PyBytesWriter_Alloc(&writer, size);
2765
if (str == NULL)
2766
return NULL;
2767
writer.overallocate = 1;
2768
size = writer.allocated;
2769
2770
/* Run the iterator to exhaustion */
2771
for (i = 0; ; i++) {
2772
PyObject *item;
2773
Py_ssize_t value;
2774
2775
/* Get the next item */
2776
item = PyIter_Next(it);
2777
if (item == NULL) {
2778
if (PyErr_Occurred())
2779
goto error;
2780
break;
2781
}
2782
2783
/* Interpret it as an int (__index__) */
2784
value = PyNumber_AsSsize_t(item, NULL);
2785
Py_DECREF(item);
2786
if (value == -1 && PyErr_Occurred())
2787
goto error;
2788
2789
/* Range check */
2790
if (value < 0 || value >= 256) {
2791
PyErr_SetString(PyExc_ValueError,
2792
"bytes must be in range(0, 256)");
2793
goto error;
2794
}
2795
2796
/* Append the byte */
2797
if (i >= size) {
2798
str = _PyBytesWriter_Resize(&writer, str, size+1);
2799
if (str == NULL)
2800
return NULL;
2801
size = writer.allocated;
2802
}
2803
*str++ = (char) value;
2804
}
2805
2806
return _PyBytesWriter_Finish(&writer, str);
2807
2808
error:
2809
_PyBytesWriter_Dealloc(&writer);
2810
return NULL;
2811
}
2812
2813
PyObject *
2814
PyBytes_FromObject(PyObject *x)
2815
{
2816
PyObject *it, *result;
2817
2818
if (x == NULL) {
2819
PyErr_BadInternalCall();
2820
return NULL;
2821
}
2822
2823
if (PyBytes_CheckExact(x)) {
2824
return Py_NewRef(x);
2825
}
2826
2827
/* Use the modern buffer interface */
2828
if (PyObject_CheckBuffer(x))
2829
return _PyBytes_FromBuffer(x);
2830
2831
if (PyList_CheckExact(x))
2832
return _PyBytes_FromList(x);
2833
2834
if (PyTuple_CheckExact(x))
2835
return _PyBytes_FromTuple(x);
2836
2837
if (!PyUnicode_Check(x)) {
2838
it = PyObject_GetIter(x);
2839
if (it != NULL) {
2840
result = _PyBytes_FromIterator(it, x);
2841
Py_DECREF(it);
2842
return result;
2843
}
2844
if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2845
return NULL;
2846
}
2847
}
2848
2849
PyErr_Format(PyExc_TypeError,
2850
"cannot convert '%.200s' object to bytes",
2851
Py_TYPE(x)->tp_name);
2852
return NULL;
2853
}
2854
2855
/* This allocator is needed for subclasses don't want to use __new__.
2856
* See https://github.com/python/cpython/issues/91020#issuecomment-1096793239
2857
*
2858
* This allocator will be removed when ob_shash is removed.
2859
*/
2860
static PyObject *
2861
bytes_alloc(PyTypeObject *self, Py_ssize_t nitems)
2862
{
2863
PyBytesObject *obj = (PyBytesObject*)PyType_GenericAlloc(self, nitems);
2864
if (obj == NULL) {
2865
return NULL;
2866
}
2867
_Py_COMP_DIAG_PUSH
2868
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
2869
obj->ob_shash = -1;
2870
_Py_COMP_DIAG_POP
2871
return (PyObject*)obj;
2872
}
2873
2874
static PyObject *
2875
bytes_subtype_new(PyTypeObject *type, PyObject *tmp)
2876
{
2877
PyObject *pnew;
2878
Py_ssize_t n;
2879
2880
assert(PyType_IsSubtype(type, &PyBytes_Type));
2881
assert(PyBytes_Check(tmp));
2882
n = PyBytes_GET_SIZE(tmp);
2883
pnew = type->tp_alloc(type, n);
2884
if (pnew != NULL) {
2885
memcpy(PyBytes_AS_STRING(pnew),
2886
PyBytes_AS_STRING(tmp), n+1);
2887
_Py_COMP_DIAG_PUSH
2888
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
2889
((PyBytesObject *)pnew)->ob_shash =
2890
((PyBytesObject *)tmp)->ob_shash;
2891
_Py_COMP_DIAG_POP
2892
}
2893
return pnew;
2894
}
2895
2896
PyDoc_STRVAR(bytes_doc,
2897
"bytes(iterable_of_ints) -> bytes\n\
2898
bytes(string, encoding[, errors]) -> bytes\n\
2899
bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2900
bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2901
bytes() -> empty bytes object\n\
2902
\n\
2903
Construct an immutable array of bytes from:\n\
2904
- an iterable yielding integers in range(256)\n\
2905
- a text string encoded using the specified encoding\n\
2906
- any object implementing the buffer API.\n\
2907
- an integer");
2908
2909
static PyObject *bytes_iter(PyObject *seq);
2910
2911
PyTypeObject PyBytes_Type = {
2912
PyVarObject_HEAD_INIT(&PyType_Type, 0)
2913
"bytes",
2914
PyBytesObject_SIZE,
2915
sizeof(char),
2916
0, /* tp_dealloc */
2917
0, /* tp_vectorcall_offset */
2918
0, /* tp_getattr */
2919
0, /* tp_setattr */
2920
0, /* tp_as_async */
2921
(reprfunc)bytes_repr, /* tp_repr */
2922
&bytes_as_number, /* tp_as_number */
2923
&bytes_as_sequence, /* tp_as_sequence */
2924
&bytes_as_mapping, /* tp_as_mapping */
2925
(hashfunc)bytes_hash, /* tp_hash */
2926
0, /* tp_call */
2927
bytes_str, /* tp_str */
2928
PyObject_GenericGetAttr, /* tp_getattro */
2929
0, /* tp_setattro */
2930
&bytes_as_buffer, /* tp_as_buffer */
2931
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2932
Py_TPFLAGS_BYTES_SUBCLASS |
2933
_Py_TPFLAGS_MATCH_SELF, /* tp_flags */
2934
bytes_doc, /* tp_doc */
2935
0, /* tp_traverse */
2936
0, /* tp_clear */
2937
(richcmpfunc)bytes_richcompare, /* tp_richcompare */
2938
0, /* tp_weaklistoffset */
2939
bytes_iter, /* tp_iter */
2940
0, /* tp_iternext */
2941
bytes_methods, /* tp_methods */
2942
0, /* tp_members */
2943
0, /* tp_getset */
2944
0, /* tp_base */
2945
0, /* tp_dict */
2946
0, /* tp_descr_get */
2947
0, /* tp_descr_set */
2948
0, /* tp_dictoffset */
2949
0, /* tp_init */
2950
bytes_alloc, /* tp_alloc */
2951
bytes_new, /* tp_new */
2952
PyObject_Del, /* tp_free */
2953
};
2954
2955
void
2956
PyBytes_Concat(PyObject **pv, PyObject *w)
2957
{
2958
assert(pv != NULL);
2959
if (*pv == NULL)
2960
return;
2961
if (w == NULL) {
2962
Py_CLEAR(*pv);
2963
return;
2964
}
2965
2966
if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2967
/* Only one reference, so we can resize in place */
2968
Py_ssize_t oldsize;
2969
Py_buffer wb;
2970
2971
if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
2972
PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2973
Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2974
Py_CLEAR(*pv);
2975
return;
2976
}
2977
2978
oldsize = PyBytes_GET_SIZE(*pv);
2979
if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2980
PyErr_NoMemory();
2981
goto error;
2982
}
2983
if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2984
goto error;
2985
2986
memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2987
PyBuffer_Release(&wb);
2988
return;
2989
2990
error:
2991
PyBuffer_Release(&wb);
2992
Py_CLEAR(*pv);
2993
return;
2994
}
2995
2996
else {
2997
/* Multiple references, need to create new object */
2998
PyObject *v;
2999
v = bytes_concat(*pv, w);
3000
Py_SETREF(*pv, v);
3001
}
3002
}
3003
3004
void
3005
PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
3006
{
3007
PyBytes_Concat(pv, w);
3008
Py_XDECREF(w);
3009
}
3010
3011
3012
/* The following function breaks the notion that bytes are immutable:
3013
it changes the size of a bytes object. We get away with this only if there
3014
is only one module referencing the object. You can also think of it
3015
as creating a new bytes object and destroying the old one, only
3016
more efficiently. In any case, don't use this if the bytes object may
3017
already be known to some other part of the code...
3018
Note that if there's not enough memory to resize the bytes object, the
3019
original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
3020
memory" exception is set, and -1 is returned. Else (on success) 0 is
3021
returned, and the value in *pv may or may not be the same as on input.
3022
As always, an extra byte is allocated for a trailing \0 byte (newsize
3023
does *not* include that), and a trailing \0 byte is stored.
3024
*/
3025
3026
int
3027
_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3028
{
3029
PyObject *v;
3030
PyBytesObject *sv;
3031
v = *pv;
3032
if (!PyBytes_Check(v) || newsize < 0) {
3033
goto error;
3034
}
3035
if (Py_SIZE(v) == newsize) {
3036
/* return early if newsize equals to v->ob_size */
3037
return 0;
3038
}
3039
if (Py_SIZE(v) == 0) {
3040
if (newsize == 0) {
3041
return 0;
3042
}
3043
*pv = _PyBytes_FromSize(newsize, 0);
3044
Py_DECREF(v);
3045
return (*pv == NULL) ? -1 : 0;
3046
}
3047
if (Py_REFCNT(v) != 1) {
3048
goto error;
3049
}
3050
if (newsize == 0) {
3051
*pv = bytes_new_empty();
3052
Py_DECREF(v);
3053
return 0;
3054
}
3055
#ifdef Py_TRACE_REFS
3056
_Py_ForgetReference(v);
3057
#endif
3058
*pv = (PyObject *)
3059
PyObject_Realloc(v, PyBytesObject_SIZE + newsize);
3060
if (*pv == NULL) {
3061
#ifdef Py_REF_DEBUG
3062
_Py_DecRefTotal(_PyInterpreterState_GET());
3063
#endif
3064
PyObject_Free(v);
3065
PyErr_NoMemory();
3066
return -1;
3067
}
3068
_Py_NewReferenceNoTotal(*pv);
3069
sv = (PyBytesObject *) *pv;
3070
Py_SET_SIZE(sv, newsize);
3071
sv->ob_sval[newsize] = '\0';
3072
_Py_COMP_DIAG_PUSH
3073
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
3074
sv->ob_shash = -1; /* invalidate cached hash value */
3075
_Py_COMP_DIAG_POP
3076
return 0;
3077
error:
3078
*pv = 0;
3079
Py_DECREF(v);
3080
PyErr_BadInternalCall();
3081
return -1;
3082
}
3083
3084
3085
/*********************** Bytes Iterator ****************************/
3086
3087
typedef struct {
3088
PyObject_HEAD
3089
Py_ssize_t it_index;
3090
PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3091
} striterobject;
3092
3093
static void
3094
striter_dealloc(striterobject *it)
3095
{
3096
_PyObject_GC_UNTRACK(it);
3097
Py_XDECREF(it->it_seq);
3098
PyObject_GC_Del(it);
3099
}
3100
3101
static int
3102
striter_traverse(striterobject *it, visitproc visit, void *arg)
3103
{
3104
Py_VISIT(it->it_seq);
3105
return 0;
3106
}
3107
3108
static PyObject *
3109
striter_next(striterobject *it)
3110
{
3111
PyBytesObject *seq;
3112
3113
assert(it != NULL);
3114
seq = it->it_seq;
3115
if (seq == NULL)
3116
return NULL;
3117
assert(PyBytes_Check(seq));
3118
3119
if (it->it_index < PyBytes_GET_SIZE(seq)) {
3120
return _PyLong_FromUnsignedChar(
3121
(unsigned char)seq->ob_sval[it->it_index++]);
3122
}
3123
3124
it->it_seq = NULL;
3125
Py_DECREF(seq);
3126
return NULL;
3127
}
3128
3129
static PyObject *
3130
striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
3131
{
3132
Py_ssize_t len = 0;
3133
if (it->it_seq)
3134
len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3135
return PyLong_FromSsize_t(len);
3136
}
3137
3138
PyDoc_STRVAR(length_hint_doc,
3139
"Private method returning an estimate of len(list(it)).");
3140
3141
static PyObject *
3142
striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
3143
{
3144
PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));
3145
3146
/* _PyEval_GetBuiltin can invoke arbitrary code,
3147
* call must be before access of iterator pointers.
3148
* see issue #101765 */
3149
3150
if (it->it_seq != NULL) {
3151
return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
3152
} else {
3153
return Py_BuildValue("N(())", iter);
3154
}
3155
}
3156
3157
PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3158
3159
static PyObject *
3160
striter_setstate(striterobject *it, PyObject *state)
3161
{
3162
Py_ssize_t index = PyLong_AsSsize_t(state);
3163
if (index == -1 && PyErr_Occurred())
3164
return NULL;
3165
if (it->it_seq != NULL) {
3166
if (index < 0)
3167
index = 0;
3168
else if (index > PyBytes_GET_SIZE(it->it_seq))
3169
index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3170
it->it_index = index;
3171
}
3172
Py_RETURN_NONE;
3173
}
3174
3175
PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3176
3177
static PyMethodDef striter_methods[] = {
3178
{"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3179
length_hint_doc},
3180
{"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3181
reduce_doc},
3182
{"__setstate__", (PyCFunction)striter_setstate, METH_O,
3183
setstate_doc},
3184
{NULL, NULL} /* sentinel */
3185
};
3186
3187
PyTypeObject PyBytesIter_Type = {
3188
PyVarObject_HEAD_INIT(&PyType_Type, 0)
3189
"bytes_iterator", /* tp_name */
3190
sizeof(striterobject), /* tp_basicsize */
3191
0, /* tp_itemsize */
3192
/* methods */
3193
(destructor)striter_dealloc, /* tp_dealloc */
3194
0, /* tp_vectorcall_offset */
3195
0, /* tp_getattr */
3196
0, /* tp_setattr */
3197
0, /* tp_as_async */
3198
0, /* tp_repr */
3199
0, /* tp_as_number */
3200
0, /* tp_as_sequence */
3201
0, /* tp_as_mapping */
3202
0, /* tp_hash */
3203
0, /* tp_call */
3204
0, /* tp_str */
3205
PyObject_GenericGetAttr, /* tp_getattro */
3206
0, /* tp_setattro */
3207
0, /* tp_as_buffer */
3208
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3209
0, /* tp_doc */
3210
(traverseproc)striter_traverse, /* tp_traverse */
3211
0, /* tp_clear */
3212
0, /* tp_richcompare */
3213
0, /* tp_weaklistoffset */
3214
PyObject_SelfIter, /* tp_iter */
3215
(iternextfunc)striter_next, /* tp_iternext */
3216
striter_methods, /* tp_methods */
3217
0,
3218
};
3219
3220
static PyObject *
3221
bytes_iter(PyObject *seq)
3222
{
3223
striterobject *it;
3224
3225
if (!PyBytes_Check(seq)) {
3226
PyErr_BadInternalCall();
3227
return NULL;
3228
}
3229
it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3230
if (it == NULL)
3231
return NULL;
3232
it->it_index = 0;
3233
it->it_seq = (PyBytesObject *)Py_NewRef(seq);
3234
_PyObject_GC_TRACK(it);
3235
return (PyObject *)it;
3236
}
3237
3238
3239
/* _PyBytesWriter API */
3240
3241
#ifdef MS_WINDOWS
3242
/* On Windows, overallocate by 50% is the best factor */
3243
# define OVERALLOCATE_FACTOR 2
3244
#else
3245
/* On Linux, overallocate by 25% is the best factor */
3246
# define OVERALLOCATE_FACTOR 4
3247
#endif
3248
3249
void
3250
_PyBytesWriter_Init(_PyBytesWriter *writer)
3251
{
3252
/* Set all attributes before small_buffer to 0 */
3253
memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
3254
#ifndef NDEBUG
3255
memset(writer->small_buffer, PYMEM_CLEANBYTE,
3256
sizeof(writer->small_buffer));
3257
#endif
3258
}
3259
3260
void
3261
_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3262
{
3263
Py_CLEAR(writer->buffer);
3264
}
3265
3266
Py_LOCAL_INLINE(char*)
3267
_PyBytesWriter_AsString(_PyBytesWriter *writer)
3268
{
3269
if (writer->use_small_buffer) {
3270
assert(writer->buffer == NULL);
3271
return writer->small_buffer;
3272
}
3273
else if (writer->use_bytearray) {
3274
assert(writer->buffer != NULL);
3275
return PyByteArray_AS_STRING(writer->buffer);
3276
}
3277
else {
3278
assert(writer->buffer != NULL);
3279
return PyBytes_AS_STRING(writer->buffer);
3280
}
3281
}
3282
3283
Py_LOCAL_INLINE(Py_ssize_t)
3284
_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
3285
{
3286
const char *start = _PyBytesWriter_AsString(writer);
3287
assert(str != NULL);
3288
assert(str >= start);
3289
assert(str - start <= writer->allocated);
3290
return str - start;
3291
}
3292
3293
#ifndef NDEBUG
3294
Py_LOCAL_INLINE(int)
3295
_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3296
{
3297
const char *start, *end;
3298
3299
if (writer->use_small_buffer) {
3300
assert(writer->buffer == NULL);
3301
}
3302
else {
3303
assert(writer->buffer != NULL);
3304
if (writer->use_bytearray)
3305
assert(PyByteArray_CheckExact(writer->buffer));
3306
else
3307
assert(PyBytes_CheckExact(writer->buffer));
3308
assert(Py_REFCNT(writer->buffer) == 1);
3309
}
3310
3311
if (writer->use_bytearray) {
3312
/* bytearray has its own overallocation algorithm,
3313
writer overallocation must be disabled */
3314
assert(!writer->overallocate);
3315
}
3316
3317
assert(0 <= writer->allocated);
3318
assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
3319
/* the last byte must always be null */
3320
start = _PyBytesWriter_AsString(writer);
3321
assert(start[writer->allocated] == 0);
3322
3323
end = start + writer->allocated;
3324
assert(str != NULL);
3325
assert(start <= str && str <= end);
3326
return 1;
3327
}
3328
#endif
3329
3330
void*
3331
_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3332
{
3333
Py_ssize_t allocated, pos;
3334
3335
assert(_PyBytesWriter_CheckConsistency(writer, str));
3336
assert(writer->allocated < size);
3337
3338
allocated = size;
3339
if (writer->overallocate
3340
&& allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3341
/* overallocate to limit the number of realloc() */
3342
allocated += allocated / OVERALLOCATE_FACTOR;
3343
}
3344
3345
pos = _PyBytesWriter_GetSize(writer, str);
3346
if (!writer->use_small_buffer) {
3347
if (writer->use_bytearray) {
3348
if (PyByteArray_Resize(writer->buffer, allocated))
3349
goto error;
3350
/* writer->allocated can be smaller than writer->buffer->ob_alloc,
3351
but we cannot use ob_alloc because bytes may need to be moved
3352
to use the whole buffer. bytearray uses an internal optimization
3353
to avoid moving or copying bytes when bytes are removed at the
3354
beginning (ex: del bytearray[:1]). */
3355
}
3356
else {
3357
if (_PyBytes_Resize(&writer->buffer, allocated))
3358
goto error;
3359
}
3360
}
3361
else {
3362
/* convert from stack buffer to bytes object buffer */
3363
assert(writer->buffer == NULL);
3364
3365
if (writer->use_bytearray)
3366
writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3367
else
3368
writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3369
if (writer->buffer == NULL)
3370
goto error;
3371
3372
if (pos != 0) {
3373
char *dest;
3374
if (writer->use_bytearray)
3375
dest = PyByteArray_AS_STRING(writer->buffer);
3376
else
3377
dest = PyBytes_AS_STRING(writer->buffer);
3378
memcpy(dest,
3379
writer->small_buffer,
3380
pos);
3381
}
3382
3383
writer->use_small_buffer = 0;
3384
#ifndef NDEBUG
3385
memset(writer->small_buffer, PYMEM_CLEANBYTE,
3386
sizeof(writer->small_buffer));
3387
#endif
3388
}
3389
writer->allocated = allocated;
3390
3391
str = _PyBytesWriter_AsString(writer) + pos;
3392
assert(_PyBytesWriter_CheckConsistency(writer, str));
3393
return str;
3394
3395
error:
3396
_PyBytesWriter_Dealloc(writer);
3397
return NULL;
3398
}
3399
3400
void*
3401
_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3402
{
3403
Py_ssize_t new_min_size;
3404
3405
assert(_PyBytesWriter_CheckConsistency(writer, str));
3406
assert(size >= 0);
3407
3408
if (size == 0) {
3409
/* nothing to do */
3410
return str;
3411
}
3412
3413
if (writer->min_size > PY_SSIZE_T_MAX - size) {
3414
PyErr_NoMemory();
3415
_PyBytesWriter_Dealloc(writer);
3416
return NULL;
3417
}
3418
new_min_size = writer->min_size + size;
3419
3420
if (new_min_size > writer->allocated)
3421
str = _PyBytesWriter_Resize(writer, str, new_min_size);
3422
3423
writer->min_size = new_min_size;
3424
return str;
3425
}
3426
3427
/* Allocate the buffer to write size bytes.
3428
Return the pointer to the beginning of buffer data.
3429
Raise an exception and return NULL on error. */
3430
void*
3431
_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3432
{
3433
/* ensure that _PyBytesWriter_Alloc() is only called once */
3434
assert(writer->min_size == 0 && writer->buffer == NULL);
3435
assert(size >= 0);
3436
3437
writer->use_small_buffer = 1;
3438
#ifndef NDEBUG
3439
writer->allocated = sizeof(writer->small_buffer) - 1;
3440
/* In debug mode, don't use the full small buffer because it is less
3441
efficient than bytes and bytearray objects to detect buffer underflow
3442
and buffer overflow. Use 10 bytes of the small buffer to test also
3443
code using the smaller buffer in debug mode.
3444
3445
Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3446
in debug mode to also be able to detect stack overflow when running
3447
tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3448
if _Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3449
stack overflow. */
3450
writer->allocated = Py_MIN(writer->allocated, 10);
3451
/* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3452
to detect buffer overflow */
3453
writer->small_buffer[writer->allocated] = 0;
3454
#else
3455
writer->allocated = sizeof(writer->small_buffer);
3456
#endif
3457
return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
3458
}
3459
3460
PyObject *
3461
_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
3462
{
3463
Py_ssize_t size;
3464
PyObject *result;
3465
3466
assert(_PyBytesWriter_CheckConsistency(writer, str));
3467
3468
size = _PyBytesWriter_GetSize(writer, str);
3469
if (size == 0 && !writer->use_bytearray) {
3470
Py_CLEAR(writer->buffer);
3471
/* Get the empty byte string singleton */
3472
result = PyBytes_FromStringAndSize(NULL, 0);
3473
}
3474
else if (writer->use_small_buffer) {
3475
if (writer->use_bytearray) {
3476
result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3477
}
3478
else {
3479
result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3480
}
3481
}
3482
else {
3483
result = writer->buffer;
3484
writer->buffer = NULL;
3485
3486
if (size != writer->allocated) {
3487
if (writer->use_bytearray) {
3488
if (PyByteArray_Resize(result, size)) {
3489
Py_DECREF(result);
3490
return NULL;
3491
}
3492
}
3493
else {
3494
if (_PyBytes_Resize(&result, size)) {
3495
assert(result == NULL);
3496
return NULL;
3497
}
3498
}
3499
}
3500
}
3501
return result;
3502
}
3503
3504
void*
3505
_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
3506
const void *bytes, Py_ssize_t size)
3507
{
3508
char *str = (char *)ptr;
3509
3510
str = _PyBytesWriter_Prepare(writer, str, size);
3511
if (str == NULL)
3512
return NULL;
3513
3514
memcpy(str, bytes, size);
3515
str += size;
3516
3517
return str;
3518
}
3519
3520
3521
void
3522
_PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
3523
const char* src, Py_ssize_t len_src)
3524
{
3525
if (len_dest == 0) {
3526
return;
3527
}
3528
if (len_src == 1) {
3529
memset(dest, src[0], len_dest);
3530
}
3531
else {
3532
if (src != dest) {
3533
memcpy(dest, src, len_src);
3534
}
3535
Py_ssize_t copied = len_src;
3536
while (copied < len_dest) {
3537
Py_ssize_t bytes_to_copy = Py_MIN(copied, len_dest - copied);
3538
memcpy(dest + copied, dest, bytes_to_copy);
3539
copied += bytes_to_copy;
3540
}
3541
}
3542
}
3543
3544
3545