Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Python/ceval_macros.h
12 views
1
// Macros and other things needed by ceval.c and bytecodes.c
2
3
/* Computed GOTOs, or
4
the-optimization-commonly-but-improperly-known-as-"threaded code"
5
using gcc's labels-as-values extension
6
(http://gcc.gnu.org/onlinedocs/gcc/Labels-as-Values.html).
7
8
The traditional bytecode evaluation loop uses a "switch" statement, which
9
decent compilers will optimize as a single indirect branch instruction
10
combined with a lookup table of jump addresses. However, since the
11
indirect jump instruction is shared by all opcodes, the CPU will have a
12
hard time making the right prediction for where to jump next (actually,
13
it will be always wrong except in the uncommon case of a sequence of
14
several identical opcodes).
15
16
"Threaded code" in contrast, uses an explicit jump table and an explicit
17
indirect jump instruction at the end of each opcode. Since the jump
18
instruction is at a different address for each opcode, the CPU will make a
19
separate prediction for each of these instructions, which is equivalent to
20
predicting the second opcode of each opcode pair. These predictions have
21
a much better chance to turn out valid, especially in small bytecode loops.
22
23
A mispredicted branch on a modern CPU flushes the whole pipeline and
24
can cost several CPU cycles (depending on the pipeline depth),
25
and potentially many more instructions (depending on the pipeline width).
26
A correctly predicted branch, however, is nearly free.
27
28
At the time of this writing, the "threaded code" version is up to 15-20%
29
faster than the normal "switch" version, depending on the compiler and the
30
CPU architecture.
31
32
NOTE: care must be taken that the compiler doesn't try to "optimize" the
33
indirect jumps by sharing them between all opcodes. Such optimizations
34
can be disabled on gcc by using the -fno-gcse flag (or possibly
35
-fno-crossjumping).
36
*/
37
38
/* Use macros rather than inline functions, to make it as clear as possible
39
* to the C compiler that the tracing check is a simple test then branch.
40
* We want to be sure that the compiler knows this before it generates
41
* the CFG.
42
*/
43
44
#ifdef WITH_DTRACE
45
#define OR_DTRACE_LINE | (PyDTrace_LINE_ENABLED() ? 255 : 0)
46
#else
47
#define OR_DTRACE_LINE
48
#endif
49
50
#ifdef HAVE_COMPUTED_GOTOS
51
#ifndef USE_COMPUTED_GOTOS
52
#define USE_COMPUTED_GOTOS 1
53
#endif
54
#else
55
#if defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
56
#error "Computed gotos are not supported on this compiler."
57
#endif
58
#undef USE_COMPUTED_GOTOS
59
#define USE_COMPUTED_GOTOS 0
60
#endif
61
62
#ifdef Py_STATS
63
#define INSTRUCTION_START(op) \
64
do { \
65
frame->prev_instr = next_instr++; \
66
OPCODE_EXE_INC(op); \
67
if (_py_stats) _py_stats->opcode_stats[lastopcode].pair_count[op]++; \
68
lastopcode = op; \
69
} while (0)
70
#else
71
#define INSTRUCTION_START(op) (frame->prev_instr = next_instr++)
72
#endif
73
74
#if USE_COMPUTED_GOTOS
75
# define TARGET(op) TARGET_##op: INSTRUCTION_START(op);
76
# define DISPATCH_GOTO() goto *opcode_targets[opcode]
77
#else
78
# define TARGET(op) case op: TARGET_##op: INSTRUCTION_START(op);
79
# define DISPATCH_GOTO() goto dispatch_opcode
80
#endif
81
82
/* PRE_DISPATCH_GOTO() does lltrace if enabled. Normally a no-op */
83
#ifdef LLTRACE
84
#define PRE_DISPATCH_GOTO() if (lltrace) { \
85
lltrace_instruction(frame, stack_pointer, next_instr); }
86
#else
87
#define PRE_DISPATCH_GOTO() ((void)0)
88
#endif
89
90
91
/* Do interpreter dispatch accounting for tracing and instrumentation */
92
#define DISPATCH() \
93
{ \
94
NEXTOPARG(); \
95
PRE_DISPATCH_GOTO(); \
96
DISPATCH_GOTO(); \
97
}
98
99
#define DISPATCH_SAME_OPARG() \
100
{ \
101
opcode = next_instr->op.code; \
102
PRE_DISPATCH_GOTO(); \
103
DISPATCH_GOTO(); \
104
}
105
106
#define DISPATCH_INLINED(NEW_FRAME) \
107
do { \
108
assert(tstate->interp->eval_frame == NULL); \
109
_PyFrame_SetStackPointer(frame, stack_pointer); \
110
frame->prev_instr = next_instr - 1; \
111
(NEW_FRAME)->previous = frame; \
112
frame = cframe.current_frame = (NEW_FRAME); \
113
CALL_STAT_INC(inlined_py_calls); \
114
goto start_frame; \
115
} while (0)
116
117
#define CHECK_EVAL_BREAKER() \
118
_Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); \
119
if (_Py_atomic_load_relaxed_int32(&tstate->interp->ceval.eval_breaker)) { \
120
goto handle_eval_breaker; \
121
}
122
123
124
/* Tuple access macros */
125
126
#ifndef Py_DEBUG
127
#define GETITEM(v, i) PyTuple_GET_ITEM((v), (i))
128
#else
129
static inline PyObject *
130
GETITEM(PyObject *v, Py_ssize_t i) {
131
assert(PyTuple_Check(v));
132
assert(i >= 0);
133
assert(i < PyTuple_GET_SIZE(v));
134
return PyTuple_GET_ITEM(v, i);
135
}
136
#endif
137
138
/* Code access macros */
139
140
/* The integer overflow is checked by an assertion below. */
141
#define INSTR_OFFSET() ((int)(next_instr - _PyCode_CODE(_PyFrame_GetCode(frame))))
142
#define NEXTOPARG() do { \
143
_Py_CODEUNIT word = *next_instr; \
144
opcode = word.op.code; \
145
oparg = word.op.arg; \
146
} while (0)
147
#define JUMPTO(x) (next_instr = _PyCode_CODE(_PyFrame_GetCode(frame)) + (x))
148
149
/* JUMPBY makes the generator identify the instruction as a jump. SKIP_OVER is
150
* for advancing to the next instruction, taking into account cache entries
151
* and skipped instructions.
152
*/
153
#define JUMPBY(x) (next_instr += (x))
154
#define SKIP_OVER(x) (next_instr += (x))
155
156
/* OpCode prediction macros
157
Some opcodes tend to come in pairs thus making it possible to
158
predict the second code when the first is run. For example,
159
COMPARE_OP is often followed by POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE.
160
161
Verifying the prediction costs a single high-speed test of a register
162
variable against a constant. If the pairing was good, then the
163
processor's own internal branch predication has a high likelihood of
164
success, resulting in a nearly zero-overhead transition to the
165
next opcode. A successful prediction saves a trip through the eval-loop
166
including its unpredictable switch-case branch. Combined with the
167
processor's internal branch prediction, a successful PREDICT has the
168
effect of making the two opcodes run as if they were a single new opcode
169
with the bodies combined.
170
171
If collecting opcode statistics, your choices are to either keep the
172
predictions turned-on and interpret the results as if some opcodes
173
had been combined or turn-off predictions so that the opcode frequency
174
counter updates for both opcodes.
175
176
Opcode prediction is disabled with threaded code, since the latter allows
177
the CPU to record separate branch prediction information for each
178
opcode.
179
180
*/
181
182
#define PREDICT_ID(op) PRED_##op
183
#define PREDICTED(op) PREDICT_ID(op):
184
185
186
/* Stack manipulation macros */
187
188
/* The stack can grow at most MAXINT deep, as co_nlocals and
189
co_stacksize are ints. */
190
#define STACK_LEVEL() ((int)(stack_pointer - _PyFrame_Stackbase(frame)))
191
#define STACK_SIZE() (_PyFrame_GetCode(frame)->co_stacksize)
192
#define EMPTY() (STACK_LEVEL() == 0)
193
#define TOP() (stack_pointer[-1])
194
#define SECOND() (stack_pointer[-2])
195
#define THIRD() (stack_pointer[-3])
196
#define FOURTH() (stack_pointer[-4])
197
#define PEEK(n) (stack_pointer[-(n)])
198
#define POKE(n, v) (stack_pointer[-(n)] = (v))
199
#define SET_TOP(v) (stack_pointer[-1] = (v))
200
#define SET_SECOND(v) (stack_pointer[-2] = (v))
201
#define BASIC_STACKADJ(n) (stack_pointer += n)
202
#define BASIC_PUSH(v) (*stack_pointer++ = (v))
203
#define BASIC_POP() (*--stack_pointer)
204
205
#ifdef Py_DEBUG
206
#define PUSH(v) do { \
207
BASIC_PUSH(v); \
208
assert(STACK_LEVEL() <= STACK_SIZE()); \
209
} while (0)
210
#define POP() (assert(STACK_LEVEL() > 0), BASIC_POP())
211
#define STACK_GROW(n) do { \
212
assert(n >= 0); \
213
BASIC_STACKADJ(n); \
214
assert(STACK_LEVEL() <= STACK_SIZE()); \
215
} while (0)
216
#define STACK_SHRINK(n) do { \
217
assert(n >= 0); \
218
assert(STACK_LEVEL() >= n); \
219
BASIC_STACKADJ(-(n)); \
220
} while (0)
221
#else
222
#define PUSH(v) BASIC_PUSH(v)
223
#define POP() BASIC_POP()
224
#define STACK_GROW(n) BASIC_STACKADJ(n)
225
#define STACK_SHRINK(n) BASIC_STACKADJ(-(n))
226
#endif
227
228
229
/* Data access macros */
230
#define FRAME_CO_CONSTS (_PyFrame_GetCode(frame)->co_consts)
231
#define FRAME_CO_NAMES (_PyFrame_GetCode(frame)->co_names)
232
233
/* Local variable macros */
234
235
#define GETLOCAL(i) (frame->localsplus[i])
236
237
/* The SETLOCAL() macro must not DECREF the local variable in-place and
238
then store the new value; it must copy the old value to a temporary
239
value, then store the new value, and then DECREF the temporary value.
240
This is because it is possible that during the DECREF the frame is
241
accessed by other code (e.g. a __del__ method or gc.collect()) and the
242
variable would be pointing to already-freed memory. */
243
#define SETLOCAL(i, value) do { PyObject *tmp = GETLOCAL(i); \
244
GETLOCAL(i) = value; \
245
Py_XDECREF(tmp); } while (0)
246
247
#define GO_TO_INSTRUCTION(op) goto PREDICT_ID(op)
248
249
#ifdef Py_STATS
250
#define UPDATE_MISS_STATS(INSTNAME) \
251
do { \
252
STAT_INC(opcode, miss); \
253
STAT_INC((INSTNAME), miss); \
254
/* The counter is always the first cache entry: */ \
255
if (ADAPTIVE_COUNTER_IS_ZERO(next_instr->cache)) { \
256
STAT_INC((INSTNAME), deopt); \
257
} \
258
else { \
259
/* This is about to be (incorrectly) incremented: */ \
260
STAT_DEC((INSTNAME), deferred); \
261
} \
262
} while (0)
263
#else
264
#define UPDATE_MISS_STATS(INSTNAME) ((void)0)
265
#endif
266
267
#define DEOPT_IF(COND, INSTNAME) \
268
if ((COND)) { \
269
/* This is only a single jump on release builds! */ \
270
UPDATE_MISS_STATS((INSTNAME)); \
271
assert(_PyOpcode_Deopt[opcode] == (INSTNAME)); \
272
GO_TO_INSTRUCTION(INSTNAME); \
273
}
274
275
276
#define GLOBALS() frame->f_globals
277
#define BUILTINS() frame->f_builtins
278
#define LOCALS() frame->f_locals
279
#define CONSTS() _PyFrame_GetCode(frame)->co_consts
280
#define NAMES() _PyFrame_GetCode(frame)->co_names
281
282
#define DTRACE_FUNCTION_ENTRY() \
283
if (PyDTrace_FUNCTION_ENTRY_ENABLED()) { \
284
dtrace_function_entry(frame); \
285
}
286
287
#define ADAPTIVE_COUNTER_IS_ZERO(COUNTER) \
288
(((COUNTER) >> ADAPTIVE_BACKOFF_BITS) == 0)
289
290
#define ADAPTIVE_COUNTER_IS_MAX(COUNTER) \
291
(((COUNTER) >> ADAPTIVE_BACKOFF_BITS) == ((1 << MAX_BACKOFF_VALUE) - 1))
292
293
#define DECREMENT_ADAPTIVE_COUNTER(COUNTER) \
294
do { \
295
assert(!ADAPTIVE_COUNTER_IS_ZERO((COUNTER))); \
296
(COUNTER) -= (1 << ADAPTIVE_BACKOFF_BITS); \
297
} while (0);
298
299
#define INCREMENT_ADAPTIVE_COUNTER(COUNTER) \
300
do { \
301
(COUNTER) += (1 << ADAPTIVE_BACKOFF_BITS); \
302
} while (0);
303
304
#define NAME_ERROR_MSG "name '%.200s' is not defined"
305
306
#define KWNAMES_LEN() \
307
(kwnames == NULL ? 0 : ((int)PyTuple_GET_SIZE(kwnames)))
308
309
#define DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dval, result) \
310
do { \
311
if (Py_REFCNT(left) == 1) { \
312
((PyFloatObject *)left)->ob_fval = (dval); \
313
_Py_DECREF_SPECIALIZED(right, _PyFloat_ExactDealloc);\
314
result = (left); \
315
} \
316
else if (Py_REFCNT(right) == 1) {\
317
((PyFloatObject *)right)->ob_fval = (dval); \
318
_Py_DECREF_NO_DEALLOC(left); \
319
result = (right); \
320
}\
321
else { \
322
result = PyFloat_FromDouble(dval); \
323
if ((result) == NULL) goto error; \
324
_Py_DECREF_NO_DEALLOC(left); \
325
_Py_DECREF_NO_DEALLOC(right); \
326
} \
327
} while (0)
328
329
// If a trace function sets a new f_lineno and
330
// *then* raises, we use the destination when searching
331
// for an exception handler, displaying the traceback, and so on
332
#define INSTRUMENTED_JUMP(src, dest, event) \
333
do { \
334
_PyFrame_SetStackPointer(frame, stack_pointer); \
335
next_instr = _Py_call_instrumentation_jump(tstate, event, frame, src, dest); \
336
stack_pointer = _PyFrame_GetStackPointer(frame); \
337
if (next_instr == NULL) { \
338
next_instr = (dest)+1; \
339
goto error; \
340
} \
341
} while (0);
342
343
typedef PyObject *(*convertion_func_ptr)(PyObject *);
344
345
static const convertion_func_ptr CONVERSION_FUNCTIONS[4] = {
346
[FVC_STR] = PyObject_Str,
347
[FVC_REPR] = PyObject_Repr,
348
[FVC_ASCII] = PyObject_ASCII
349
};
350
351