Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Objects/stringlib/join.h
12 views
1
/* stringlib: bytes joining implementation */
2
3
#if STRINGLIB_IS_UNICODE
4
#error join.h only compatible with byte-wise strings
5
#endif
6
7
Py_LOCAL_INLINE(PyObject *)
8
STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
9
{
10
const char *sepstr = STRINGLIB_STR(sep);
11
Py_ssize_t seplen = STRINGLIB_LEN(sep);
12
PyObject *res = NULL;
13
char *p;
14
Py_ssize_t seqlen = 0;
15
Py_ssize_t sz = 0;
16
Py_ssize_t i, nbufs;
17
PyObject *seq, *item;
18
Py_buffer *buffers = NULL;
19
#define NB_STATIC_BUFFERS 10
20
Py_buffer static_buffers[NB_STATIC_BUFFERS];
21
#define GIL_THRESHOLD 1048576
22
int drop_gil = 1;
23
PyThreadState *save = NULL;
24
25
seq = PySequence_Fast(iterable, "can only join an iterable");
26
if (seq == NULL) {
27
return NULL;
28
}
29
30
seqlen = PySequence_Fast_GET_SIZE(seq);
31
if (seqlen == 0) {
32
Py_DECREF(seq);
33
return STRINGLIB_NEW(NULL, 0);
34
}
35
#if !STRINGLIB_MUTABLE
36
if (seqlen == 1) {
37
item = PySequence_Fast_GET_ITEM(seq, 0);
38
if (STRINGLIB_CHECK_EXACT(item)) {
39
Py_INCREF(item);
40
Py_DECREF(seq);
41
return item;
42
}
43
}
44
#endif
45
if (seqlen > NB_STATIC_BUFFERS) {
46
buffers = PyMem_NEW(Py_buffer, seqlen);
47
if (buffers == NULL) {
48
Py_DECREF(seq);
49
PyErr_NoMemory();
50
return NULL;
51
}
52
}
53
else {
54
buffers = static_buffers;
55
}
56
57
/* Here is the general case. Do a pre-pass to figure out the total
58
* amount of space we'll need (sz), and see whether all arguments are
59
* bytes-like.
60
*/
61
for (i = 0, nbufs = 0; i < seqlen; i++) {
62
Py_ssize_t itemlen;
63
item = PySequence_Fast_GET_ITEM(seq, i);
64
if (PyBytes_CheckExact(item)) {
65
/* Fast path. */
66
buffers[i].obj = Py_NewRef(item);
67
buffers[i].buf = PyBytes_AS_STRING(item);
68
buffers[i].len = PyBytes_GET_SIZE(item);
69
}
70
else {
71
if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) {
72
PyErr_Format(PyExc_TypeError,
73
"sequence item %zd: expected a bytes-like object, "
74
"%.80s found",
75
i, Py_TYPE(item)->tp_name);
76
goto error;
77
}
78
/* If the backing objects are mutable, then dropping the GIL
79
* opens up race conditions where another thread tries to modify
80
* the object which we hold a buffer on it. Such code has data
81
* races anyway, but this is a conservative approach that avoids
82
* changing the behaviour of that data race.
83
*/
84
drop_gil = 0;
85
}
86
nbufs = i + 1; /* for error cleanup */
87
itemlen = buffers[i].len;
88
if (itemlen > PY_SSIZE_T_MAX - sz) {
89
PyErr_SetString(PyExc_OverflowError,
90
"join() result is too long");
91
goto error;
92
}
93
sz += itemlen;
94
if (i != 0) {
95
if (seplen > PY_SSIZE_T_MAX - sz) {
96
PyErr_SetString(PyExc_OverflowError,
97
"join() result is too long");
98
goto error;
99
}
100
sz += seplen;
101
}
102
if (seqlen != PySequence_Fast_GET_SIZE(seq)) {
103
PyErr_SetString(PyExc_RuntimeError,
104
"sequence changed size during iteration");
105
goto error;
106
}
107
}
108
109
/* Allocate result space. */
110
res = STRINGLIB_NEW(NULL, sz);
111
if (res == NULL)
112
goto error;
113
114
/* Catenate everything. */
115
p = STRINGLIB_STR(res);
116
if (sz < GIL_THRESHOLD) {
117
drop_gil = 0; /* Benefits are likely outweighed by the overheads */
118
}
119
if (drop_gil) {
120
save = PyEval_SaveThread();
121
}
122
if (!seplen) {
123
/* fast path */
124
for (i = 0; i < nbufs; i++) {
125
Py_ssize_t n = buffers[i].len;
126
char *q = buffers[i].buf;
127
memcpy(p, q, n);
128
p += n;
129
}
130
}
131
else {
132
for (i = 0; i < nbufs; i++) {
133
Py_ssize_t n;
134
char *q;
135
if (i) {
136
memcpy(p, sepstr, seplen);
137
p += seplen;
138
}
139
n = buffers[i].len;
140
q = buffers[i].buf;
141
memcpy(p, q, n);
142
p += n;
143
}
144
}
145
if (drop_gil) {
146
PyEval_RestoreThread(save);
147
}
148
goto done;
149
150
error:
151
res = NULL;
152
done:
153
Py_DECREF(seq);
154
for (i = 0; i < nbufs; i++)
155
PyBuffer_Release(&buffers[i]);
156
if (buffers != static_buffers)
157
PyMem_Free(buffers);
158
return res;
159
}
160
161
#undef NB_STATIC_BUFFERS
162
#undef GIL_THRESHOLD
163
164