Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Tools/gdb/libpython.py
12 views
1
#!/usr/bin/python
2
'''
3
From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb
4
to be extended with Python code e.g. for library-specific data visualizations,
5
such as for the C++ STL types. Documentation on this API can be seen at:
6
http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html
7
8
9
This python module deals with the case when the process being debugged (the
10
"inferior process" in gdb parlance) is itself python, or more specifically,
11
linked against libpython. In this situation, almost every item of data is a
12
(PyObject*), and having the debugger merely print their addresses is not very
13
enlightening.
14
15
This module embeds knowledge about the implementation details of libpython so
16
that we can emit useful visualizations e.g. a string, a list, a dict, a frame
17
giving file/line information and the state of local variables
18
19
In particular, given a gdb.Value corresponding to a PyObject* in the inferior
20
process, we can generate a "proxy value" within the gdb process. For example,
21
given a PyObject* in the inferior process that is in fact a PyListObject*
22
holding three PyObject* that turn out to be PyBytesObject* instances, we can
23
generate a proxy value within the gdb process that is a list of bytes
24
instances:
25
[b"foo", b"bar", b"baz"]
26
27
Doing so can be expensive for complicated graphs of objects, and could take
28
some time, so we also have a "write_repr" method that writes a representation
29
of the data to a file-like object. This allows us to stop the traversal by
30
having the file-like object raise an exception if it gets too much data.
31
32
With both "proxyval" and "write_repr" we keep track of the set of all addresses
33
visited so far in the traversal, to avoid infinite recursion due to cycles in
34
the graph of object references.
35
36
We try to defer gdb.lookup_type() invocations for python types until as late as
37
possible: for a dynamically linked python binary, when the process starts in
38
the debugger, the libpython.so hasn't been dynamically loaded yet, so none of
39
the type names are known to the debugger
40
41
The module also extends gdb with some python-specific commands.
42
'''
43
44
import gdb
45
import os
46
import locale
47
import sys
48
49
50
# Look up the gdb.Type for some standard types:
51
# Those need to be refreshed as types (pointer sizes) may change when
52
# gdb loads different executables
53
54
def _type_char_ptr():
55
return gdb.lookup_type('char').pointer() # char*
56
57
58
def _type_unsigned_char_ptr():
59
return gdb.lookup_type('unsigned char').pointer() # unsigned char*
60
61
62
def _type_unsigned_short_ptr():
63
return gdb.lookup_type('unsigned short').pointer()
64
65
66
def _type_unsigned_int_ptr():
67
return gdb.lookup_type('unsigned int').pointer()
68
69
70
def _sizeof_void_p():
71
return gdb.lookup_type('void').pointer().sizeof
72
73
74
Py_TPFLAGS_MANAGED_DICT = (1 << 4)
75
Py_TPFLAGS_HEAPTYPE = (1 << 9)
76
Py_TPFLAGS_LONG_SUBCLASS = (1 << 24)
77
Py_TPFLAGS_LIST_SUBCLASS = (1 << 25)
78
Py_TPFLAGS_TUPLE_SUBCLASS = (1 << 26)
79
Py_TPFLAGS_BYTES_SUBCLASS = (1 << 27)
80
Py_TPFLAGS_UNICODE_SUBCLASS = (1 << 28)
81
Py_TPFLAGS_DICT_SUBCLASS = (1 << 29)
82
Py_TPFLAGS_BASE_EXC_SUBCLASS = (1 << 30)
83
Py_TPFLAGS_TYPE_SUBCLASS = (1 << 31)
84
85
#From pycore_frame.h
86
FRAME_OWNED_BY_CSTACK = 3
87
88
MAX_OUTPUT_LEN=1024
89
90
hexdigits = "0123456789abcdef"
91
92
ENCODING = locale.getpreferredencoding()
93
94
FRAME_INFO_OPTIMIZED_OUT = '(frame information optimized out)'
95
UNABLE_READ_INFO_PYTHON_FRAME = 'Unable to read information on python frame'
96
EVALFRAME = '_PyEval_EvalFrameDefault'
97
98
class NullPyObjectPtr(RuntimeError):
99
pass
100
101
102
def safety_limit(val):
103
# Given an integer value from the process being debugged, limit it to some
104
# safety threshold so that arbitrary breakage within said process doesn't
105
# break the gdb process too much (e.g. sizes of iterations, sizes of lists)
106
return min(val, 1000)
107
108
109
def safe_range(val):
110
# As per range, but don't trust the value too much: cap it to a safety
111
# threshold in case the data was corrupted
112
return range(safety_limit(int(val)))
113
114
class StringTruncated(RuntimeError):
115
pass
116
117
class TruncatedStringIO(object):
118
'''Similar to io.StringIO, but can truncate the output by raising a
119
StringTruncated exception'''
120
def __init__(self, maxlen=None):
121
self._val = ''
122
self.maxlen = maxlen
123
124
def write(self, data):
125
if self.maxlen:
126
if len(data) + len(self._val) > self.maxlen:
127
# Truncation:
128
self._val += data[0:self.maxlen - len(self._val)]
129
raise StringTruncated()
130
131
self._val += data
132
133
def getvalue(self):
134
return self._val
135
136
class PyObjectPtr(object):
137
"""
138
Class wrapping a gdb.Value that's either a (PyObject*) within the
139
inferior process, or some subclass pointer e.g. (PyBytesObject*)
140
141
There will be a subclass for every refined PyObject type that we care
142
about.
143
144
Note that at every stage the underlying pointer could be NULL, point
145
to corrupt data, etc; this is the debugger, after all.
146
"""
147
_typename = 'PyObject'
148
149
def __init__(self, gdbval, cast_to=None):
150
if cast_to:
151
self._gdbval = gdbval.cast(cast_to)
152
else:
153
self._gdbval = gdbval
154
155
def field(self, name):
156
'''
157
Get the gdb.Value for the given field within the PyObject.
158
159
Various libpython types are defined using the "PyObject_HEAD" and
160
"PyObject_VAR_HEAD" macros.
161
162
In Python, this is defined as an embedded PyVarObject type thus:
163
PyVarObject ob_base;
164
so that the "ob_size" field is located insize the "ob_base" field, and
165
the "ob_type" is most easily accessed by casting back to a (PyObject*).
166
'''
167
if self.is_null():
168
raise NullPyObjectPtr(self)
169
170
if name == 'ob_type':
171
pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type())
172
return pyo_ptr.dereference()[name]
173
174
if name == 'ob_size':
175
pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type())
176
return pyo_ptr.dereference()[name]
177
178
# General case: look it up inside the object:
179
return self._gdbval.dereference()[name]
180
181
def pyop_field(self, name):
182
'''
183
Get a PyObjectPtr for the given PyObject* field within this PyObject.
184
'''
185
return PyObjectPtr.from_pyobject_ptr(self.field(name))
186
187
def write_field_repr(self, name, out, visited):
188
'''
189
Extract the PyObject* field named "name", and write its representation
190
to file-like object "out"
191
'''
192
field_obj = self.pyop_field(name)
193
field_obj.write_repr(out, visited)
194
195
def get_truncated_repr(self, maxlen):
196
'''
197
Get a repr-like string for the data, but truncate it at "maxlen" bytes
198
(ending the object graph traversal as soon as you do)
199
'''
200
out = TruncatedStringIO(maxlen)
201
try:
202
self.write_repr(out, set())
203
except StringTruncated:
204
# Truncation occurred:
205
return out.getvalue() + '...(truncated)'
206
207
# No truncation occurred:
208
return out.getvalue()
209
210
def type(self):
211
return PyTypeObjectPtr(self.field('ob_type'))
212
213
def is_null(self):
214
return 0 == int(self._gdbval)
215
216
def is_optimized_out(self):
217
'''
218
Is the value of the underlying PyObject* visible to the debugger?
219
220
This can vary with the precise version of the compiler used to build
221
Python, and the precise version of gdb.
222
223
See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with
224
PyEval_EvalFrameEx's "f"
225
'''
226
return self._gdbval.is_optimized_out
227
228
def safe_tp_name(self):
229
try:
230
ob_type = self.type()
231
tp_name = ob_type.field('tp_name')
232
return tp_name.string()
233
# NullPyObjectPtr: NULL tp_name?
234
# RuntimeError: Can't even read the object at all?
235
# UnicodeDecodeError: Failed to decode tp_name bytestring
236
except (NullPyObjectPtr, RuntimeError, UnicodeDecodeError):
237
return 'unknown'
238
239
def proxyval(self, visited):
240
'''
241
Scrape a value from the inferior process, and try to represent it
242
within the gdb process, whilst (hopefully) avoiding crashes when
243
the remote data is corrupt.
244
245
Derived classes will override this.
246
247
For example, a PyIntObject* with ob_ival 42 in the inferior process
248
should result in an int(42) in this process.
249
250
visited: a set of all gdb.Value pyobject pointers already visited
251
whilst generating this value (to guard against infinite recursion when
252
visiting object graphs with loops). Analogous to Py_ReprEnter and
253
Py_ReprLeave
254
'''
255
256
class FakeRepr(object):
257
"""
258
Class representing a non-descript PyObject* value in the inferior
259
process for when we don't have a custom scraper, intended to have
260
a sane repr().
261
"""
262
263
def __init__(self, tp_name, address):
264
self.tp_name = tp_name
265
self.address = address
266
267
def __repr__(self):
268
# For the NULL pointer, we have no way of knowing a type, so
269
# special-case it as per
270
# http://bugs.python.org/issue8032#msg100882
271
if self.address == 0:
272
return '0x0'
273
return '<%s at remote 0x%x>' % (self.tp_name, self.address)
274
275
return FakeRepr(self.safe_tp_name(),
276
int(self._gdbval))
277
278
def write_repr(self, out, visited):
279
'''
280
Write a string representation of the value scraped from the inferior
281
process to "out", a file-like object.
282
'''
283
# Default implementation: generate a proxy value and write its repr
284
# However, this could involve a lot of work for complicated objects,
285
# so for derived classes we specialize this
286
return out.write(repr(self.proxyval(visited)))
287
288
@classmethod
289
def subclass_from_type(cls, t):
290
'''
291
Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a
292
(PyTypeObject*), determine the corresponding subclass of PyObjectPtr
293
to use
294
295
Ideally, we would look up the symbols for the global types, but that
296
isn't working yet:
297
(gdb) python print gdb.lookup_symbol('PyList_Type')[0].value
298
Traceback (most recent call last):
299
File "<string>", line 1, in <module>
300
NotImplementedError: Symbol type not yet supported in Python scripts.
301
Error while executing Python code.
302
303
For now, we use tp_flags, after doing some string comparisons on the
304
tp_name for some special-cases that don't seem to be visible through
305
flags
306
'''
307
try:
308
tp_name = t.field('tp_name').string()
309
tp_flags = int(t.field('tp_flags'))
310
# RuntimeError: NULL pointers
311
# UnicodeDecodeError: string() fails to decode the bytestring
312
except (RuntimeError, UnicodeDecodeError):
313
# Handle any kind of error e.g. NULL ptrs by simply using the base
314
# class
315
return cls
316
317
#print('tp_flags = 0x%08x' % tp_flags)
318
#print('tp_name = %r' % tp_name)
319
320
name_map = {'bool': PyBoolObjectPtr,
321
'classobj': PyClassObjectPtr,
322
'NoneType': PyNoneStructPtr,
323
'frame': PyFrameObjectPtr,
324
'set' : PySetObjectPtr,
325
'frozenset' : PySetObjectPtr,
326
'builtin_function_or_method' : PyCFunctionObjectPtr,
327
'method-wrapper': wrapperobject,
328
}
329
if tp_name in name_map:
330
return name_map[tp_name]
331
332
if tp_flags & Py_TPFLAGS_HEAPTYPE:
333
return HeapTypeObjectPtr
334
335
if tp_flags & Py_TPFLAGS_LONG_SUBCLASS:
336
return PyLongObjectPtr
337
if tp_flags & Py_TPFLAGS_LIST_SUBCLASS:
338
return PyListObjectPtr
339
if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS:
340
return PyTupleObjectPtr
341
if tp_flags & Py_TPFLAGS_BYTES_SUBCLASS:
342
return PyBytesObjectPtr
343
if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS:
344
return PyUnicodeObjectPtr
345
if tp_flags & Py_TPFLAGS_DICT_SUBCLASS:
346
return PyDictObjectPtr
347
if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS:
348
return PyBaseExceptionObjectPtr
349
#if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS:
350
# return PyTypeObjectPtr
351
352
# Use the base class:
353
return cls
354
355
@classmethod
356
def from_pyobject_ptr(cls, gdbval):
357
'''
358
Try to locate the appropriate derived class dynamically, and cast
359
the pointer accordingly.
360
'''
361
try:
362
p = PyObjectPtr(gdbval)
363
cls = cls.subclass_from_type(p.type())
364
return cls(gdbval, cast_to=cls.get_gdb_type())
365
except RuntimeError:
366
# Handle any kind of error e.g. NULL ptrs by simply using the base
367
# class
368
pass
369
return cls(gdbval)
370
371
@classmethod
372
def get_gdb_type(cls):
373
return gdb.lookup_type(cls._typename).pointer()
374
375
def as_address(self):
376
return int(self._gdbval)
377
378
class PyVarObjectPtr(PyObjectPtr):
379
_typename = 'PyVarObject'
380
381
class ProxyAlreadyVisited(object):
382
'''
383
Placeholder proxy to use when protecting against infinite recursion due to
384
loops in the object graph.
385
386
Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave
387
'''
388
def __init__(self, rep):
389
self._rep = rep
390
391
def __repr__(self):
392
return self._rep
393
394
395
def _write_instance_repr(out, visited, name, pyop_attrdict, address):
396
'''Shared code for use by all classes:
397
write a representation to file-like object "out"'''
398
out.write('<')
399
out.write(name)
400
401
# Write dictionary of instance attributes:
402
if isinstance(pyop_attrdict, (PyKeysValuesPair, PyDictObjectPtr)):
403
out.write('(')
404
first = True
405
items = pyop_attrdict.iteritems()
406
for pyop_arg, pyop_val in items:
407
if not first:
408
out.write(', ')
409
first = False
410
out.write(pyop_arg.proxyval(visited))
411
out.write('=')
412
pyop_val.write_repr(out, visited)
413
out.write(')')
414
out.write(' at remote 0x%x>' % address)
415
416
417
class InstanceProxy(object):
418
419
def __init__(self, cl_name, attrdict, address):
420
self.cl_name = cl_name
421
self.attrdict = attrdict
422
self.address = address
423
424
def __repr__(self):
425
if isinstance(self.attrdict, dict):
426
kwargs = ', '.join(["%s=%r" % (arg, val)
427
for arg, val in self.attrdict.items()])
428
return '<%s(%s) at remote 0x%x>' % (self.cl_name,
429
kwargs, self.address)
430
else:
431
return '<%s at remote 0x%x>' % (self.cl_name,
432
self.address)
433
434
def _PyObject_VAR_SIZE(typeobj, nitems):
435
if _PyObject_VAR_SIZE._type_size_t is None:
436
_PyObject_VAR_SIZE._type_size_t = gdb.lookup_type('size_t')
437
438
return ( ( typeobj.field('tp_basicsize') +
439
nitems * typeobj.field('tp_itemsize') +
440
(_sizeof_void_p() - 1)
441
) & ~(_sizeof_void_p() - 1)
442
).cast(_PyObject_VAR_SIZE._type_size_t)
443
_PyObject_VAR_SIZE._type_size_t = None
444
445
class HeapTypeObjectPtr(PyObjectPtr):
446
_typename = 'PyObject'
447
448
def get_attr_dict(self):
449
'''
450
Get the PyDictObject ptr representing the attribute dictionary
451
(or None if there's a problem)
452
'''
453
try:
454
typeobj = self.type()
455
dictoffset = int_from_int(typeobj.field('tp_dictoffset'))
456
if dictoffset != 0:
457
if dictoffset < 0:
458
if int_from_int(typeobj.field('tp_flags')) & Py_TPFLAGS_MANAGED_DICT:
459
assert dictoffset == -1
460
dictoffset = -3 * _sizeof_void_p()
461
else:
462
type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer()
463
tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size'])
464
if tsize < 0:
465
tsize = -tsize
466
size = _PyObject_VAR_SIZE(typeobj, tsize)
467
dictoffset += size
468
assert dictoffset % _sizeof_void_p() == 0
469
470
dictptr = self._gdbval.cast(_type_char_ptr()) + dictoffset
471
PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer()
472
dictptr = dictptr.cast(PyObjectPtrPtr)
473
if int(dictptr.dereference()) & 1:
474
return None
475
return PyObjectPtr.from_pyobject_ptr(dictptr.dereference())
476
except RuntimeError:
477
# Corrupt data somewhere; fail safe
478
pass
479
480
# Not found, or some kind of error:
481
return None
482
483
def get_keys_values(self):
484
typeobj = self.type()
485
has_values = int_from_int(typeobj.field('tp_flags')) & Py_TPFLAGS_MANAGED_DICT
486
if not has_values:
487
return None
488
charptrptr_t = _type_char_ptr().pointer()
489
ptr = self._gdbval.cast(charptrptr_t) - 3
490
char_ptr = ptr.dereference()
491
if (int(char_ptr) & 1) == 0:
492
return None
493
char_ptr += 1
494
values_ptr = char_ptr.cast(gdb.lookup_type("PyDictValues").pointer())
495
values = values_ptr['values']
496
return PyKeysValuesPair(self.get_cached_keys(), values)
497
498
def get_cached_keys(self):
499
typeobj = self.type()
500
HeapTypePtr = gdb.lookup_type("PyHeapTypeObject").pointer()
501
return typeobj._gdbval.cast(HeapTypePtr)['ht_cached_keys']
502
503
def proxyval(self, visited):
504
'''
505
Support for classes.
506
507
Currently we just locate the dictionary using a transliteration to
508
python of _PyObject_GetDictPtr, ignoring descriptors
509
'''
510
# Guard against infinite loops:
511
if self.as_address() in visited:
512
return ProxyAlreadyVisited('<...>')
513
visited.add(self.as_address())
514
515
keys_values = self.get_keys_values()
516
if keys_values:
517
attr_dict = keys_values.proxyval(visited)
518
else:
519
pyop_attr_dict = self.get_attr_dict()
520
if pyop_attr_dict:
521
attr_dict = pyop_attr_dict.proxyval(visited)
522
else:
523
attr_dict = {}
524
tp_name = self.safe_tp_name()
525
526
# Class:
527
return InstanceProxy(tp_name, attr_dict, int(self._gdbval))
528
529
def write_repr(self, out, visited):
530
# Guard against infinite loops:
531
if self.as_address() in visited:
532
out.write('<...>')
533
return
534
visited.add(self.as_address())
535
536
pyop_attrs = self.get_keys_values()
537
if not pyop_attrs:
538
pyop_attrs = self.get_attr_dict()
539
_write_instance_repr(out, visited,
540
self.safe_tp_name(), pyop_attrs, self.as_address())
541
542
class ProxyException(Exception):
543
def __init__(self, tp_name, args):
544
self.tp_name = tp_name
545
self.args = args
546
547
def __repr__(self):
548
return '%s%r' % (self.tp_name, self.args)
549
550
class PyBaseExceptionObjectPtr(PyObjectPtr):
551
"""
552
Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception
553
within the process being debugged.
554
"""
555
_typename = 'PyBaseExceptionObject'
556
557
def proxyval(self, visited):
558
# Guard against infinite loops:
559
if self.as_address() in visited:
560
return ProxyAlreadyVisited('(...)')
561
visited.add(self.as_address())
562
arg_proxy = self.pyop_field('args').proxyval(visited)
563
return ProxyException(self.safe_tp_name(),
564
arg_proxy)
565
566
def write_repr(self, out, visited):
567
# Guard against infinite loops:
568
if self.as_address() in visited:
569
out.write('(...)')
570
return
571
visited.add(self.as_address())
572
573
out.write(self.safe_tp_name())
574
self.write_field_repr('args', out, visited)
575
576
class PyClassObjectPtr(PyObjectPtr):
577
"""
578
Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj>
579
instance within the process being debugged.
580
"""
581
_typename = 'PyClassObject'
582
583
584
class BuiltInFunctionProxy(object):
585
def __init__(self, ml_name):
586
self.ml_name = ml_name
587
588
def __repr__(self):
589
return "<built-in function %s>" % self.ml_name
590
591
class BuiltInMethodProxy(object):
592
def __init__(self, ml_name, pyop_m_self):
593
self.ml_name = ml_name
594
self.pyop_m_self = pyop_m_self
595
596
def __repr__(self):
597
return ('<built-in method %s of %s object at remote 0x%x>'
598
% (self.ml_name,
599
self.pyop_m_self.safe_tp_name(),
600
self.pyop_m_self.as_address())
601
)
602
603
class PyCFunctionObjectPtr(PyObjectPtr):
604
"""
605
Class wrapping a gdb.Value that's a PyCFunctionObject*
606
(see Include/methodobject.h and Objects/methodobject.c)
607
"""
608
_typename = 'PyCFunctionObject'
609
610
def proxyval(self, visited):
611
m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*)
612
try:
613
ml_name = m_ml['ml_name'].string()
614
except UnicodeDecodeError:
615
ml_name = '<ml_name:UnicodeDecodeError>'
616
617
pyop_m_self = self.pyop_field('m_self')
618
if pyop_m_self.is_null():
619
return BuiltInFunctionProxy(ml_name)
620
else:
621
return BuiltInMethodProxy(ml_name, pyop_m_self)
622
623
# Python implementation of location table parsing algorithm
624
def read(it):
625
return ord(next(it))
626
627
def read_varint(it):
628
b = read(it)
629
val = b & 63;
630
shift = 0;
631
while b & 64:
632
b = read(it)
633
shift += 6
634
val |= (b&63) << shift
635
return val
636
637
def read_signed_varint(it):
638
uval = read_varint(it)
639
if uval & 1:
640
return -(uval >> 1)
641
else:
642
return uval >> 1
643
644
def parse_location_table(firstlineno, linetable):
645
line = firstlineno
646
addr = 0
647
it = iter(linetable)
648
while True:
649
try:
650
first_byte = read(it)
651
except StopIteration:
652
return
653
code = (first_byte >> 3) & 15
654
length = (first_byte & 7) + 1
655
end_addr = addr + length
656
if code == 15:
657
yield addr, end_addr, None
658
addr = end_addr
659
continue
660
elif code == 14: # Long form
661
line_delta = read_signed_varint(it)
662
line += line_delta
663
end_line = line + read_varint(it)
664
col = read_varint(it)
665
end_col = read_varint(it)
666
elif code == 13: # No column
667
line_delta = read_signed_varint(it)
668
line += line_delta
669
elif code in (10, 11, 12): # new line
670
line_delta = code - 10
671
line += line_delta
672
column = read(it)
673
end_column = read(it)
674
else:
675
assert (0 <= code < 10)
676
second_byte = read(it)
677
column = code << 3 | (second_byte >> 4)
678
yield addr, end_addr, line
679
addr = end_addr
680
681
class PyCodeObjectPtr(PyObjectPtr):
682
"""
683
Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance
684
within the process being debugged.
685
"""
686
_typename = 'PyCodeObject'
687
688
def addr2line(self, addrq):
689
'''
690
Get the line number for a given bytecode offset
691
692
Analogous to PyCode_Addr2Line; translated from pseudocode in
693
Objects/lnotab_notes.txt
694
'''
695
co_linetable = self.pyop_field('co_linetable').proxyval(set())
696
697
# Initialize lineno to co_firstlineno as per PyCode_Addr2Line
698
# not 0, as lnotab_notes.txt has it:
699
lineno = int_from_int(self.field('co_firstlineno'))
700
701
if addrq < 0:
702
return lineno
703
addr = 0
704
for addr, end_addr, line in parse_location_table(lineno, co_linetable):
705
if addr <= addrq and end_addr > addrq:
706
return line
707
assert False, "Unreachable"
708
709
710
def items_from_keys_and_values(keys, values):
711
entries, nentries = PyDictObjectPtr._get_entries(keys)
712
for i in safe_range(nentries):
713
ep = entries[i]
714
pyop_value = PyObjectPtr.from_pyobject_ptr(values[i])
715
if not pyop_value.is_null():
716
pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key'])
717
yield (pyop_key, pyop_value)
718
719
class PyKeysValuesPair:
720
721
def __init__(self, keys, values):
722
self.keys = keys
723
self.values = values
724
725
def iteritems(self):
726
return items_from_keys_and_values(self.keys, self.values)
727
728
def proxyval(self, visited):
729
result = {}
730
for pyop_key, pyop_value in self.iteritems():
731
proxy_key = pyop_key.proxyval(visited)
732
proxy_value = pyop_value.proxyval(visited)
733
result[proxy_key] = proxy_value
734
return result
735
736
class PyDictObjectPtr(PyObjectPtr):
737
"""
738
Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance
739
within the process being debugged.
740
"""
741
_typename = 'PyDictObject'
742
743
def iteritems(self):
744
'''
745
Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs,
746
analogous to dict.iteritems()
747
'''
748
keys = self.field('ma_keys')
749
values = self.field('ma_values')
750
has_values = int(values)
751
if has_values:
752
values = values['values']
753
if has_values:
754
for item in items_from_keys_and_values(keys, values):
755
yield item
756
return
757
entries, nentries = self._get_entries(keys)
758
for i in safe_range(nentries):
759
ep = entries[i]
760
pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value'])
761
if not pyop_value.is_null():
762
pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key'])
763
yield (pyop_key, pyop_value)
764
765
def proxyval(self, visited):
766
# Guard against infinite loops:
767
if self.as_address() in visited:
768
return ProxyAlreadyVisited('{...}')
769
visited.add(self.as_address())
770
771
result = {}
772
for pyop_key, pyop_value in self.iteritems():
773
proxy_key = pyop_key.proxyval(visited)
774
proxy_value = pyop_value.proxyval(visited)
775
result[proxy_key] = proxy_value
776
return result
777
778
def write_repr(self, out, visited):
779
# Guard against infinite loops:
780
if self.as_address() in visited:
781
out.write('{...}')
782
return
783
visited.add(self.as_address())
784
785
out.write('{')
786
first = True
787
for pyop_key, pyop_value in self.iteritems():
788
if not first:
789
out.write(', ')
790
first = False
791
pyop_key.write_repr(out, visited)
792
out.write(': ')
793
pyop_value.write_repr(out, visited)
794
out.write('}')
795
796
@staticmethod
797
def _get_entries(keys):
798
dk_nentries = int(keys['dk_nentries'])
799
dk_size = 1<<int(keys['dk_log2_size'])
800
801
if dk_size <= 0xFF:
802
offset = dk_size
803
elif dk_size <= 0xFFFF:
804
offset = 2 * dk_size
805
elif dk_size <= 0xFFFFFFFF:
806
offset = 4 * dk_size
807
else:
808
offset = 8 * dk_size
809
810
ent_addr = keys['dk_indices'].address
811
ent_addr = ent_addr.cast(_type_unsigned_char_ptr()) + offset
812
if int(keys['dk_kind']) == 0: # DICT_KEYS_GENERAL
813
ent_ptr_t = gdb.lookup_type('PyDictKeyEntry').pointer()
814
else:
815
ent_ptr_t = gdb.lookup_type('PyDictUnicodeEntry').pointer()
816
ent_addr = ent_addr.cast(ent_ptr_t)
817
818
return ent_addr, dk_nentries
819
820
821
class PyListObjectPtr(PyObjectPtr):
822
_typename = 'PyListObject'
823
824
def __getitem__(self, i):
825
# Get the gdb.Value for the (PyObject*) with the given index:
826
field_ob_item = self.field('ob_item')
827
return field_ob_item[i]
828
829
def proxyval(self, visited):
830
# Guard against infinite loops:
831
if self.as_address() in visited:
832
return ProxyAlreadyVisited('[...]')
833
visited.add(self.as_address())
834
835
result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
836
for i in safe_range(int_from_int(self.field('ob_size')))]
837
return result
838
839
def write_repr(self, out, visited):
840
# Guard against infinite loops:
841
if self.as_address() in visited:
842
out.write('[...]')
843
return
844
visited.add(self.as_address())
845
846
out.write('[')
847
for i in safe_range(int_from_int(self.field('ob_size'))):
848
if i > 0:
849
out.write(', ')
850
element = PyObjectPtr.from_pyobject_ptr(self[i])
851
element.write_repr(out, visited)
852
out.write(']')
853
854
class PyLongObjectPtr(PyObjectPtr):
855
_typename = 'PyLongObject'
856
857
def proxyval(self, visited):
858
'''
859
Python's Include/longobjrep.h has this declaration:
860
861
typedef struct _PyLongValue {
862
uintptr_t lv_tag; /* Number of digits, sign and flags */
863
digit ob_digit[1];
864
} _PyLongValue;
865
866
struct _longobject {
867
PyObject_HEAD
868
_PyLongValue long_value;
869
};
870
871
with this description:
872
The absolute value of a number is equal to
873
SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
874
Negative numbers are represented with ob_size < 0;
875
zero is represented by ob_size == 0.
876
877
where SHIFT can be either:
878
#define PyLong_SHIFT 30
879
#define PyLong_SHIFT 15
880
'''
881
long_value = self.field('long_value')
882
lv_tag = int(long_value['lv_tag'])
883
size = lv_tag >> 3
884
if size == 0:
885
return 0
886
887
ob_digit = long_value['ob_digit']
888
889
if gdb.lookup_type('digit').sizeof == 2:
890
SHIFT = 15
891
else:
892
SHIFT = 30
893
894
digits = [int(ob_digit[i]) * 2**(SHIFT*i)
895
for i in safe_range(size)]
896
result = sum(digits)
897
if (lv_tag & 3) == 2:
898
result = -result
899
return result
900
901
def write_repr(self, out, visited):
902
# Write this out as a Python int literal
903
proxy = self.proxyval(visited)
904
out.write("%s" % proxy)
905
906
907
class PyBoolObjectPtr(PyLongObjectPtr):
908
"""
909
Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
910
<bool> instances (Py_True/Py_False) within the process being debugged.
911
"""
912
def proxyval(self, visited):
913
if PyLongObjectPtr.proxyval(self, visited):
914
return True
915
else:
916
return False
917
918
class PyNoneStructPtr(PyObjectPtr):
919
"""
920
Class wrapping a gdb.Value that's a PyObject* pointing to the
921
singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type
922
"""
923
_typename = 'PyObject'
924
925
def proxyval(self, visited):
926
return None
927
928
class PyFrameObjectPtr(PyObjectPtr):
929
_typename = 'PyFrameObject'
930
931
def __init__(self, gdbval, cast_to=None):
932
PyObjectPtr.__init__(self, gdbval, cast_to)
933
934
if not self.is_optimized_out():
935
self._frame = PyFramePtr(self.field('f_frame'))
936
937
def iter_locals(self):
938
'''
939
Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
940
the local variables of this frame
941
'''
942
if self.is_optimized_out():
943
return
944
return self._frame.iter_locals()
945
946
def iter_globals(self):
947
'''
948
Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
949
the global variables of this frame
950
'''
951
if self.is_optimized_out():
952
return ()
953
return self._frame.iter_globals()
954
955
def iter_builtins(self):
956
'''
957
Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
958
the builtin variables
959
'''
960
if self.is_optimized_out():
961
return ()
962
return self._frame.iter_builtins()
963
964
def get_var_by_name(self, name):
965
966
if self.is_optimized_out():
967
return None, None
968
return self._frame.get_var_by_name(name)
969
970
def filename(self):
971
'''Get the path of the current Python source file, as a string'''
972
if self.is_optimized_out():
973
return FRAME_INFO_OPTIMIZED_OUT
974
return self._frame.filename()
975
976
def current_line_num(self):
977
'''Get current line number as an integer (1-based)
978
979
Translated from PyFrame_GetLineNumber and PyCode_Addr2Line
980
981
See Objects/lnotab_notes.txt
982
'''
983
if self.is_optimized_out():
984
return None
985
return self._frame.current_line_num()
986
987
def current_line(self):
988
'''Get the text of the current source line as a string, with a trailing
989
newline character'''
990
if self.is_optimized_out():
991
return FRAME_INFO_OPTIMIZED_OUT
992
return self._frame.current_line()
993
994
def write_repr(self, out, visited):
995
if self.is_optimized_out():
996
out.write(FRAME_INFO_OPTIMIZED_OUT)
997
return
998
return self._frame.write_repr(out, visited)
999
1000
def print_traceback(self):
1001
if self.is_optimized_out():
1002
sys.stdout.write(' %s\n' % FRAME_INFO_OPTIMIZED_OUT)
1003
return
1004
return self._frame.print_traceback()
1005
1006
class PyFramePtr:
1007
1008
def __init__(self, gdbval):
1009
self._gdbval = gdbval
1010
1011
if not self.is_optimized_out():
1012
try:
1013
self.co = self._f_code()
1014
self.co_name = self.co.pyop_field('co_name')
1015
self.co_filename = self.co.pyop_field('co_filename')
1016
1017
self.f_lasti = self._f_lasti()
1018
self.co_nlocals = int_from_int(self.co.field('co_nlocals'))
1019
pnames = self.co.field('co_localsplusnames')
1020
self.co_localsplusnames = PyTupleObjectPtr.from_pyobject_ptr(pnames)
1021
self._is_code = True
1022
except:
1023
self._is_code = False
1024
1025
def is_optimized_out(self):
1026
return self._gdbval.is_optimized_out
1027
1028
def iter_locals(self):
1029
'''
1030
Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
1031
the local variables of this frame
1032
'''
1033
if self.is_optimized_out():
1034
return
1035
1036
1037
obj_ptr_ptr = gdb.lookup_type("PyObject").pointer().pointer()
1038
1039
localsplus = self._gdbval["localsplus"].cast(obj_ptr_ptr)
1040
1041
for i in safe_range(self.co_nlocals):
1042
pyop_value = PyObjectPtr.from_pyobject_ptr(localsplus[i])
1043
if pyop_value.is_null():
1044
continue
1045
pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_localsplusnames[i])
1046
yield (pyop_name, pyop_value)
1047
1048
def _f_special(self, name, convert=PyObjectPtr.from_pyobject_ptr):
1049
return convert(self._gdbval[name])
1050
1051
def _f_globals(self):
1052
return self._f_special("f_globals")
1053
1054
def _f_builtins(self):
1055
return self._f_special("f_builtins")
1056
1057
def _f_code(self):
1058
return self._f_special("f_executable", PyCodeObjectPtr.from_pyobject_ptr)
1059
1060
def _f_executable(self):
1061
return self._f_special("f_executable")
1062
1063
def _f_nlocalsplus(self):
1064
return self._f_special("nlocalsplus", int_from_int)
1065
1066
def _f_lasti(self):
1067
codeunit_p = gdb.lookup_type("_Py_CODEUNIT").pointer()
1068
prev_instr = self._gdbval["prev_instr"]
1069
first_instr = self._f_code().field("co_code_adaptive").cast(codeunit_p)
1070
return int(prev_instr - first_instr)
1071
1072
def is_shim(self):
1073
return self._f_special("owner", int) == FRAME_OWNED_BY_CSTACK
1074
1075
def previous(self):
1076
return self._f_special("previous", PyFramePtr)
1077
1078
def iter_globals(self):
1079
'''
1080
Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
1081
the global variables of this frame
1082
'''
1083
if self.is_optimized_out():
1084
return ()
1085
1086
pyop_globals = self._f_globals()
1087
return pyop_globals.iteritems()
1088
1089
def iter_builtins(self):
1090
'''
1091
Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
1092
the builtin variables
1093
'''
1094
if self.is_optimized_out():
1095
return ()
1096
1097
pyop_builtins = self._f_builtins()
1098
return pyop_builtins.iteritems()
1099
1100
def get_var_by_name(self, name):
1101
'''
1102
Look for the named local variable, returning a (PyObjectPtr, scope) pair
1103
where scope is a string 'local', 'global', 'builtin'
1104
1105
If not found, return (None, None)
1106
'''
1107
for pyop_name, pyop_value in self.iter_locals():
1108
if name == pyop_name.proxyval(set()):
1109
return pyop_value, 'local'
1110
for pyop_name, pyop_value in self.iter_globals():
1111
if name == pyop_name.proxyval(set()):
1112
return pyop_value, 'global'
1113
for pyop_name, pyop_value in self.iter_builtins():
1114
if name == pyop_name.proxyval(set()):
1115
return pyop_value, 'builtin'
1116
return None, None
1117
1118
def filename(self):
1119
'''Get the path of the current Python source file, as a string'''
1120
if self.is_optimized_out():
1121
return FRAME_INFO_OPTIMIZED_OUT
1122
return self.co_filename.proxyval(set())
1123
1124
def current_line_num(self):
1125
'''Get current line number as an integer (1-based)
1126
1127
Translated from PyFrame_GetLineNumber and PyCode_Addr2Line
1128
1129
See Objects/lnotab_notes.txt
1130
'''
1131
if self.is_optimized_out():
1132
return None
1133
try:
1134
return self.co.addr2line(self.f_lasti)
1135
except Exception as ex:
1136
# bpo-34989: addr2line() is a complex function, it can fail in many
1137
# ways. For example, it fails with a TypeError on "FakeRepr" if
1138
# gdb fails to load debug symbols. Use a catch-all "except
1139
# Exception" to make the whole function safe. The caller has to
1140
# handle None anyway for optimized Python.
1141
return None
1142
1143
def current_line(self):
1144
'''Get the text of the current source line as a string, with a trailing
1145
newline character'''
1146
if self.is_optimized_out():
1147
return FRAME_INFO_OPTIMIZED_OUT
1148
1149
lineno = self.current_line_num()
1150
if lineno is None:
1151
return '(failed to get frame line number)'
1152
1153
filename = self.filename()
1154
try:
1155
with open(os.fsencode(filename), 'r', encoding="utf-8") as fp:
1156
lines = fp.readlines()
1157
except IOError:
1158
return None
1159
1160
try:
1161
# Convert from 1-based current_line_num to 0-based list offset
1162
return lines[lineno - 1]
1163
except IndexError:
1164
return None
1165
1166
def write_repr(self, out, visited):
1167
if self.is_optimized_out():
1168
out.write(FRAME_INFO_OPTIMIZED_OUT)
1169
return
1170
lineno = self.current_line_num()
1171
lineno = str(lineno) if lineno is not None else "?"
1172
out.write('Frame 0x%x, for file %s, line %s, in %s ('
1173
% (self.as_address(),
1174
self.co_filename.proxyval(visited),
1175
lineno,
1176
self.co_name.proxyval(visited)))
1177
first = True
1178
for pyop_name, pyop_value in self.iter_locals():
1179
if not first:
1180
out.write(', ')
1181
first = False
1182
1183
out.write(pyop_name.proxyval(visited))
1184
out.write('=')
1185
pyop_value.write_repr(out, visited)
1186
1187
out.write(')')
1188
1189
def as_address(self):
1190
return int(self._gdbval)
1191
1192
def print_traceback(self):
1193
if self.is_optimized_out():
1194
sys.stdout.write(' %s\n' % FRAME_INFO_OPTIMIZED_OUT)
1195
return
1196
visited = set()
1197
lineno = self.current_line_num()
1198
lineno = str(lineno) if lineno is not None else "?"
1199
sys.stdout.write(' File "%s", line %s, in %s\n'
1200
% (self.co_filename.proxyval(visited),
1201
lineno,
1202
self.co_name.proxyval(visited)))
1203
1204
def get_truncated_repr(self, maxlen):
1205
'''
1206
Get a repr-like string for the data, but truncate it at "maxlen" bytes
1207
(ending the object graph traversal as soon as you do)
1208
'''
1209
out = TruncatedStringIO(maxlen)
1210
try:
1211
self.write_repr(out, set())
1212
except StringTruncated:
1213
# Truncation occurred:
1214
return out.getvalue() + '...(truncated)'
1215
1216
# No truncation occurred:
1217
return out.getvalue()
1218
1219
class PySetObjectPtr(PyObjectPtr):
1220
_typename = 'PySetObject'
1221
1222
@classmethod
1223
def _dummy_key(self):
1224
return gdb.lookup_global_symbol('_PySet_Dummy').value()
1225
1226
def __iter__(self):
1227
dummy_ptr = self._dummy_key()
1228
table = self.field('table')
1229
for i in safe_range(self.field('mask') + 1):
1230
setentry = table[i]
1231
key = setentry['key']
1232
if key != 0 and key != dummy_ptr:
1233
yield PyObjectPtr.from_pyobject_ptr(key)
1234
1235
def proxyval(self, visited):
1236
# Guard against infinite loops:
1237
if self.as_address() in visited:
1238
return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name())
1239
visited.add(self.as_address())
1240
1241
members = (key.proxyval(visited) for key in self)
1242
if self.safe_tp_name() == 'frozenset':
1243
return frozenset(members)
1244
else:
1245
return set(members)
1246
1247
def write_repr(self, out, visited):
1248
# Emulate Python's set_repr
1249
tp_name = self.safe_tp_name()
1250
1251
# Guard against infinite loops:
1252
if self.as_address() in visited:
1253
out.write('(...)')
1254
return
1255
visited.add(self.as_address())
1256
1257
# Python's set_repr special-cases the empty set:
1258
if not self.field('used'):
1259
out.write(tp_name)
1260
out.write('()')
1261
return
1262
1263
# Python uses {} for set literals:
1264
if tp_name != 'set':
1265
out.write(tp_name)
1266
out.write('(')
1267
1268
out.write('{')
1269
first = True
1270
for key in self:
1271
if not first:
1272
out.write(', ')
1273
first = False
1274
key.write_repr(out, visited)
1275
out.write('}')
1276
1277
if tp_name != 'set':
1278
out.write(')')
1279
1280
1281
class PyBytesObjectPtr(PyObjectPtr):
1282
_typename = 'PyBytesObject'
1283
1284
def __str__(self):
1285
field_ob_size = self.field('ob_size')
1286
field_ob_sval = self.field('ob_sval')
1287
char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr())
1288
return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)])
1289
1290
def proxyval(self, visited):
1291
return str(self)
1292
1293
def write_repr(self, out, visited):
1294
# Write this out as a Python bytes literal, i.e. with a "b" prefix
1295
1296
# Get a PyStringObject* within the Python gdb process:
1297
proxy = self.proxyval(visited)
1298
1299
# Transliteration of Python's Objects/bytesobject.c:PyBytes_Repr
1300
# to Python code:
1301
quote = "'"
1302
if "'" in proxy and not '"' in proxy:
1303
quote = '"'
1304
out.write('b')
1305
out.write(quote)
1306
for byte in proxy:
1307
if byte == quote or byte == '\\':
1308
out.write('\\')
1309
out.write(byte)
1310
elif byte == '\t':
1311
out.write('\\t')
1312
elif byte == '\n':
1313
out.write('\\n')
1314
elif byte == '\r':
1315
out.write('\\r')
1316
elif byte < ' ' or ord(byte) >= 0x7f:
1317
out.write('\\x')
1318
out.write(hexdigits[(ord(byte) & 0xf0) >> 4])
1319
out.write(hexdigits[ord(byte) & 0xf])
1320
else:
1321
out.write(byte)
1322
out.write(quote)
1323
1324
class PyTupleObjectPtr(PyObjectPtr):
1325
_typename = 'PyTupleObject'
1326
1327
def __getitem__(self, i):
1328
# Get the gdb.Value for the (PyObject*) with the given index:
1329
field_ob_item = self.field('ob_item')
1330
return field_ob_item[i]
1331
1332
def proxyval(self, visited):
1333
# Guard against infinite loops:
1334
if self.as_address() in visited:
1335
return ProxyAlreadyVisited('(...)')
1336
visited.add(self.as_address())
1337
1338
result = tuple(PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
1339
for i in safe_range(int_from_int(self.field('ob_size'))))
1340
return result
1341
1342
def write_repr(self, out, visited):
1343
# Guard against infinite loops:
1344
if self.as_address() in visited:
1345
out.write('(...)')
1346
return
1347
visited.add(self.as_address())
1348
1349
out.write('(')
1350
for i in safe_range(int_from_int(self.field('ob_size'))):
1351
if i > 0:
1352
out.write(', ')
1353
element = PyObjectPtr.from_pyobject_ptr(self[i])
1354
element.write_repr(out, visited)
1355
if self.field('ob_size') == 1:
1356
out.write(',)')
1357
else:
1358
out.write(')')
1359
1360
class PyTypeObjectPtr(PyObjectPtr):
1361
_typename = 'PyTypeObject'
1362
1363
1364
def _unichr_is_printable(char):
1365
# Logic adapted from Python's Tools/unicode/makeunicodedata.py
1366
if char == u" ":
1367
return True
1368
import unicodedata
1369
return unicodedata.category(char) not in ("C", "Z")
1370
1371
1372
class PyUnicodeObjectPtr(PyObjectPtr):
1373
_typename = 'PyUnicodeObject'
1374
1375
def proxyval(self, visited):
1376
compact = self.field('_base')
1377
ascii = compact['_base']
1378
state = ascii['state']
1379
is_compact_ascii = (int(state['ascii']) and int(state['compact']))
1380
field_length = int(ascii['length'])
1381
if is_compact_ascii:
1382
field_str = ascii.address + 1
1383
elif int(state['compact']):
1384
field_str = compact.address + 1
1385
else:
1386
field_str = self.field('data')['any']
1387
repr_kind = int(state['kind'])
1388
if repr_kind == 1:
1389
field_str = field_str.cast(_type_unsigned_char_ptr())
1390
elif repr_kind == 2:
1391
field_str = field_str.cast(_type_unsigned_short_ptr())
1392
elif repr_kind == 4:
1393
field_str = field_str.cast(_type_unsigned_int_ptr())
1394
1395
# Gather a list of ints from the code point array; these are either
1396
# UCS-1, UCS-2 or UCS-4 code points:
1397
code_points = [int(field_str[i]) for i in safe_range(field_length)]
1398
1399
# Convert the int code points to unicode characters, and generate a
1400
# local unicode instance.
1401
result = ''.join(map(chr, code_points))
1402
return result
1403
1404
def write_repr(self, out, visited):
1405
# Write this out as a Python str literal
1406
1407
# Get a PyUnicodeObject* within the Python gdb process:
1408
proxy = self.proxyval(visited)
1409
1410
# Transliteration of Python's Object/unicodeobject.c:unicode_repr
1411
# to Python:
1412
if "'" in proxy and '"' not in proxy:
1413
quote = '"'
1414
else:
1415
quote = "'"
1416
out.write(quote)
1417
1418
i = 0
1419
while i < len(proxy):
1420
ch = proxy[i]
1421
i += 1
1422
1423
# Escape quotes and backslashes
1424
if ch == quote or ch == '\\':
1425
out.write('\\')
1426
out.write(ch)
1427
1428
# Map special whitespace to '\t', \n', '\r'
1429
elif ch == '\t':
1430
out.write('\\t')
1431
elif ch == '\n':
1432
out.write('\\n')
1433
elif ch == '\r':
1434
out.write('\\r')
1435
1436
# Map non-printable US ASCII to '\xhh' */
1437
elif ch < ' ' or ord(ch) == 0x7F:
1438
out.write('\\x')
1439
out.write(hexdigits[(ord(ch) >> 4) & 0x000F])
1440
out.write(hexdigits[ord(ch) & 0x000F])
1441
1442
# Copy ASCII characters as-is
1443
elif ord(ch) < 0x7F:
1444
out.write(ch)
1445
1446
# Non-ASCII characters
1447
else:
1448
ucs = ch
1449
ch2 = None
1450
1451
printable = ucs.isprintable()
1452
if printable:
1453
try:
1454
ucs.encode(ENCODING)
1455
except UnicodeEncodeError:
1456
printable = False
1457
1458
# Map Unicode whitespace and control characters
1459
# (categories Z* and C* except ASCII space)
1460
if not printable:
1461
if ch2 is not None:
1462
# Match Python's representation of non-printable
1463
# wide characters.
1464
code = (ord(ch) & 0x03FF) << 10
1465
code |= ord(ch2) & 0x03FF
1466
code += 0x00010000
1467
else:
1468
code = ord(ucs)
1469
1470
# Map 8-bit characters to '\\xhh'
1471
if code <= 0xff:
1472
out.write('\\x')
1473
out.write(hexdigits[(code >> 4) & 0x000F])
1474
out.write(hexdigits[code & 0x000F])
1475
# Map 21-bit characters to '\U00xxxxxx'
1476
elif code >= 0x10000:
1477
out.write('\\U')
1478
out.write(hexdigits[(code >> 28) & 0x0000000F])
1479
out.write(hexdigits[(code >> 24) & 0x0000000F])
1480
out.write(hexdigits[(code >> 20) & 0x0000000F])
1481
out.write(hexdigits[(code >> 16) & 0x0000000F])
1482
out.write(hexdigits[(code >> 12) & 0x0000000F])
1483
out.write(hexdigits[(code >> 8) & 0x0000000F])
1484
out.write(hexdigits[(code >> 4) & 0x0000000F])
1485
out.write(hexdigits[code & 0x0000000F])
1486
# Map 16-bit characters to '\uxxxx'
1487
else:
1488
out.write('\\u')
1489
out.write(hexdigits[(code >> 12) & 0x000F])
1490
out.write(hexdigits[(code >> 8) & 0x000F])
1491
out.write(hexdigits[(code >> 4) & 0x000F])
1492
out.write(hexdigits[code & 0x000F])
1493
else:
1494
# Copy characters as-is
1495
out.write(ch)
1496
if ch2 is not None:
1497
out.write(ch2)
1498
1499
out.write(quote)
1500
1501
1502
class wrapperobject(PyObjectPtr):
1503
_typename = 'wrapperobject'
1504
1505
def safe_name(self):
1506
try:
1507
name = self.field('descr')['d_base']['name'].string()
1508
return repr(name)
1509
except (NullPyObjectPtr, RuntimeError, UnicodeDecodeError):
1510
return '<unknown name>'
1511
1512
def safe_tp_name(self):
1513
try:
1514
return self.field('self')['ob_type']['tp_name'].string()
1515
except (NullPyObjectPtr, RuntimeError, UnicodeDecodeError):
1516
return '<unknown tp_name>'
1517
1518
def safe_self_addresss(self):
1519
try:
1520
address = int(self.field('self'))
1521
return '%#x' % address
1522
except (NullPyObjectPtr, RuntimeError):
1523
return '<failed to get self address>'
1524
1525
def proxyval(self, visited):
1526
name = self.safe_name()
1527
tp_name = self.safe_tp_name()
1528
self_address = self.safe_self_addresss()
1529
return ("<method-wrapper %s of %s object at %s>"
1530
% (name, tp_name, self_address))
1531
1532
def write_repr(self, out, visited):
1533
proxy = self.proxyval(visited)
1534
out.write(proxy)
1535
1536
1537
def int_from_int(gdbval):
1538
return int(gdbval)
1539
1540
1541
def stringify(val):
1542
# TODO: repr() puts everything on one line; pformat can be nicer, but
1543
# can lead to v.long results; this function isolates the choice
1544
if True:
1545
return repr(val)
1546
else:
1547
from pprint import pformat
1548
return pformat(val)
1549
1550
1551
class PyObjectPtrPrinter:
1552
"Prints a (PyObject*)"
1553
1554
def __init__ (self, gdbval):
1555
self.gdbval = gdbval
1556
1557
def to_string (self):
1558
pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval)
1559
if True:
1560
return pyop.get_truncated_repr(MAX_OUTPUT_LEN)
1561
else:
1562
# Generate full proxy value then stringify it.
1563
# Doing so could be expensive
1564
proxyval = pyop.proxyval(set())
1565
return stringify(proxyval)
1566
1567
def pretty_printer_lookup(gdbval):
1568
type = gdbval.type.unqualified()
1569
if type.code != gdb.TYPE_CODE_PTR:
1570
return None
1571
1572
type = type.target().unqualified()
1573
t = str(type)
1574
if t in ("PyObject", "PyFrameObject", "PyUnicodeObject", "wrapperobject"):
1575
return PyObjectPtrPrinter(gdbval)
1576
1577
"""
1578
During development, I've been manually invoking the code in this way:
1579
(gdb) python
1580
1581
import sys
1582
sys.path.append('/home/david/coding/python-gdb')
1583
import libpython
1584
end
1585
1586
then reloading it after each edit like this:
1587
(gdb) python reload(libpython)
1588
1589
The following code should ensure that the prettyprinter is registered
1590
if the code is autoloaded by gdb when visiting libpython.so, provided
1591
that this python file is installed to the same path as the library (or its
1592
.debug file) plus a "-gdb.py" suffix, e.g:
1593
/usr/lib/libpython3.12.so.1.0-gdb.py
1594
/usr/lib/debug/usr/lib/libpython3.12.so.1.0.debug-gdb.py
1595
"""
1596
def register (obj):
1597
if obj is None:
1598
obj = gdb
1599
1600
# Wire up the pretty-printer
1601
obj.pretty_printers.append(pretty_printer_lookup)
1602
1603
register (gdb.current_objfile ())
1604
1605
1606
1607
# Unfortunately, the exact API exposed by the gdb module varies somewhat
1608
# from build to build
1609
# See http://bugs.python.org/issue8279?#msg102276
1610
1611
class Frame(object):
1612
'''
1613
Wrapper for gdb.Frame, adding various methods
1614
'''
1615
def __init__(self, gdbframe):
1616
self._gdbframe = gdbframe
1617
1618
def older(self):
1619
older = self._gdbframe.older()
1620
if older:
1621
return Frame(older)
1622
else:
1623
return None
1624
1625
def newer(self):
1626
newer = self._gdbframe.newer()
1627
if newer:
1628
return Frame(newer)
1629
else:
1630
return None
1631
1632
def select(self):
1633
'''If supported, select this frame and return True; return False if unsupported
1634
1635
Not all builds have a gdb.Frame.select method; seems to be present on Fedora 12
1636
onwards, but absent on Ubuntu buildbot'''
1637
if not hasattr(self._gdbframe, 'select'):
1638
print ('Unable to select frame: '
1639
'this build of gdb does not expose a gdb.Frame.select method')
1640
return False
1641
self._gdbframe.select()
1642
return True
1643
1644
def get_index(self):
1645
'''Calculate index of frame, starting at 0 for the newest frame within
1646
this thread'''
1647
index = 0
1648
# Go down until you reach the newest frame:
1649
iter_frame = self
1650
while iter_frame.newer():
1651
index += 1
1652
iter_frame = iter_frame.newer()
1653
return index
1654
1655
# We divide frames into:
1656
# - "python frames":
1657
# - "bytecode frames" i.e. PyEval_EvalFrameEx
1658
# - "other python frames": things that are of interest from a python
1659
# POV, but aren't bytecode (e.g. GC, GIL)
1660
# - everything else
1661
1662
def is_python_frame(self):
1663
'''Is this a _PyEval_EvalFrameDefault frame, or some other important
1664
frame? (see is_other_python_frame for what "important" means in this
1665
context)'''
1666
if self.is_evalframe():
1667
return True
1668
if self.is_other_python_frame():
1669
return True
1670
return False
1671
1672
def is_evalframe(self):
1673
'''Is this a _PyEval_EvalFrameDefault frame?'''
1674
if self._gdbframe.name() == EVALFRAME:
1675
'''
1676
I believe we also need to filter on the inline
1677
struct frame_id.inline_depth, only regarding frames with
1678
an inline depth of 0 as actually being this function
1679
1680
So we reject those with type gdb.INLINE_FRAME
1681
'''
1682
if self._gdbframe.type() == gdb.NORMAL_FRAME:
1683
# We have a _PyEval_EvalFrameDefault frame:
1684
return True
1685
1686
return False
1687
1688
def is_other_python_frame(self):
1689
'''Is this frame worth displaying in python backtraces?
1690
Examples:
1691
- waiting on the GIL
1692
- garbage-collecting
1693
- within a CFunction
1694
If it is, return a descriptive string
1695
For other frames, return False
1696
'''
1697
if self.is_waiting_for_gil():
1698
return 'Waiting for the GIL'
1699
1700
if self.is_gc_collect():
1701
return 'Garbage-collecting'
1702
1703
# Detect invocations of PyCFunction instances:
1704
frame = self._gdbframe
1705
caller = frame.name()
1706
if not caller:
1707
return False
1708
1709
if (caller.startswith('cfunction_vectorcall_') or
1710
caller == 'cfunction_call'):
1711
arg_name = 'func'
1712
# Within that frame:
1713
# "func" is the local containing the PyObject* of the
1714
# PyCFunctionObject instance
1715
# "f" is the same value, but cast to (PyCFunctionObject*)
1716
# "self" is the (PyObject*) of the 'self'
1717
try:
1718
# Use the prettyprinter for the func:
1719
func = frame.read_var(arg_name)
1720
return str(func)
1721
except ValueError:
1722
return ('PyCFunction invocation (unable to read %s: '
1723
'missing debuginfos?)' % arg_name)
1724
except RuntimeError:
1725
return 'PyCFunction invocation (unable to read %s)' % arg_name
1726
1727
if caller == 'wrapper_call':
1728
arg_name = 'wp'
1729
try:
1730
func = frame.read_var(arg_name)
1731
return str(func)
1732
except ValueError:
1733
return ('<wrapper_call invocation (unable to read %s: '
1734
'missing debuginfos?)>' % arg_name)
1735
except RuntimeError:
1736
return '<wrapper_call invocation (unable to read %s)>' % arg_name
1737
1738
# This frame isn't worth reporting:
1739
return False
1740
1741
def is_waiting_for_gil(self):
1742
'''Is this frame waiting on the GIL?'''
1743
# This assumes the _POSIX_THREADS version of Python/ceval_gil.c:
1744
name = self._gdbframe.name()
1745
if name:
1746
return (name == 'take_gil')
1747
1748
def is_gc_collect(self):
1749
'''Is this frame gc_collect_main() within the garbage-collector?'''
1750
return self._gdbframe.name() in ('collect', 'gc_collect_main')
1751
1752
def get_pyop(self):
1753
try:
1754
frame = self._gdbframe.read_var('frame')
1755
frame = PyFramePtr(frame)
1756
if not frame.is_optimized_out():
1757
return frame
1758
cframe = self._gdbframe.read_var('cframe')
1759
if cframe is None:
1760
return None
1761
frame = PyFramePtr(cframe["current_frame"])
1762
if frame and not frame.is_optimized_out():
1763
return frame
1764
return None
1765
except ValueError:
1766
return None
1767
1768
@classmethod
1769
def get_selected_frame(cls):
1770
_gdbframe = gdb.selected_frame()
1771
if _gdbframe:
1772
return Frame(_gdbframe)
1773
return None
1774
1775
@classmethod
1776
def get_selected_python_frame(cls):
1777
'''Try to obtain the Frame for the python-related code in the selected
1778
frame, or None'''
1779
try:
1780
frame = cls.get_selected_frame()
1781
except gdb.error:
1782
# No frame: Python didn't start yet
1783
return None
1784
1785
while frame:
1786
if frame.is_python_frame():
1787
return frame
1788
frame = frame.older()
1789
1790
# Not found:
1791
return None
1792
1793
@classmethod
1794
def get_selected_bytecode_frame(cls):
1795
'''Try to obtain the Frame for the python bytecode interpreter in the
1796
selected GDB frame, or None'''
1797
frame = cls.get_selected_frame()
1798
1799
while frame:
1800
if frame.is_evalframe():
1801
return frame
1802
frame = frame.older()
1803
1804
# Not found:
1805
return None
1806
1807
def print_summary(self):
1808
if self.is_evalframe():
1809
interp_frame = self.get_pyop()
1810
while True:
1811
if interp_frame:
1812
if interp_frame.is_shim():
1813
break
1814
line = interp_frame.get_truncated_repr(MAX_OUTPUT_LEN)
1815
sys.stdout.write('#%i %s\n' % (self.get_index(), line))
1816
if not interp_frame.is_optimized_out():
1817
line = interp_frame.current_line()
1818
if line is not None:
1819
sys.stdout.write(' %s\n' % line.strip())
1820
else:
1821
sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index())
1822
break
1823
interp_frame = interp_frame.previous()
1824
else:
1825
info = self.is_other_python_frame()
1826
if info:
1827
sys.stdout.write('#%i %s\n' % (self.get_index(), info))
1828
else:
1829
sys.stdout.write('#%i\n' % self.get_index())
1830
1831
def print_traceback(self):
1832
if self.is_evalframe():
1833
interp_frame = self.get_pyop()
1834
while True:
1835
if interp_frame:
1836
if interp_frame.is_shim():
1837
break
1838
interp_frame.print_traceback()
1839
if not interp_frame.is_optimized_out():
1840
line = interp_frame.current_line()
1841
if line is not None:
1842
sys.stdout.write(' %s\n' % line.strip())
1843
else:
1844
sys.stdout.write(' (unable to read python frame information)\n')
1845
break
1846
interp_frame = interp_frame.previous()
1847
else:
1848
info = self.is_other_python_frame()
1849
if info:
1850
sys.stdout.write(' %s\n' % info)
1851
else:
1852
sys.stdout.write(' (not a python frame)\n')
1853
1854
class PyList(gdb.Command):
1855
'''List the current Python source code, if any
1856
1857
Use
1858
py-list START
1859
to list at a different line number within the python source.
1860
1861
Use
1862
py-list START, END
1863
to list a specific range of lines within the python source.
1864
'''
1865
1866
def __init__(self):
1867
gdb.Command.__init__ (self,
1868
"py-list",
1869
gdb.COMMAND_FILES,
1870
gdb.COMPLETE_NONE)
1871
1872
1873
def invoke(self, args, from_tty):
1874
import re
1875
1876
start = None
1877
end = None
1878
1879
m = re.match(r'\s*(\d+)\s*', args)
1880
if m:
1881
start = int(m.group(0))
1882
end = start + 10
1883
1884
m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args)
1885
if m:
1886
start, end = map(int, m.groups())
1887
1888
# py-list requires an actual PyEval_EvalFrameEx frame:
1889
frame = Frame.get_selected_bytecode_frame()
1890
if not frame:
1891
print('Unable to locate gdb frame for python bytecode interpreter')
1892
return
1893
1894
pyop = frame.get_pyop()
1895
if not pyop or pyop.is_optimized_out():
1896
print(UNABLE_READ_INFO_PYTHON_FRAME)
1897
return
1898
1899
filename = pyop.filename()
1900
lineno = pyop.current_line_num()
1901
if lineno is None:
1902
print('Unable to read python frame line number')
1903
return
1904
1905
if start is None:
1906
start = lineno - 5
1907
end = lineno + 5
1908
1909
if start<1:
1910
start = 1
1911
1912
try:
1913
f = open(os.fsencode(filename), 'r', encoding="utf-8")
1914
except IOError as err:
1915
sys.stdout.write('Unable to open %s: %s\n'
1916
% (filename, err))
1917
return
1918
with f:
1919
all_lines = f.readlines()
1920
# start and end are 1-based, all_lines is 0-based;
1921
# so [start-1:end] as a python slice gives us [start, end] as a
1922
# closed interval
1923
for i, line in enumerate(all_lines[start-1:end]):
1924
linestr = str(i+start)
1925
# Highlight current line:
1926
if i + start == lineno:
1927
linestr = '>' + linestr
1928
sys.stdout.write('%4s %s' % (linestr, line))
1929
1930
1931
# ...and register the command:
1932
PyList()
1933
1934
def move_in_stack(move_up):
1935
'''Move up or down the stack (for the py-up/py-down command)'''
1936
# Important:
1937
# The amount of frames that are printed out depends on how many frames are inlined
1938
# in the same evaluation loop. As this command links directly the C stack with the
1939
# Python stack, the results are sensitive to the number of inlined frames and this
1940
# is likely to change between versions and optimizations.
1941
frame = Frame.get_selected_python_frame()
1942
if not frame:
1943
print('Unable to locate python frame')
1944
return
1945
while frame:
1946
if move_up:
1947
iter_frame = frame.older()
1948
else:
1949
iter_frame = frame.newer()
1950
1951
if not iter_frame:
1952
break
1953
1954
if iter_frame.is_python_frame():
1955
# Result:
1956
if iter_frame.select():
1957
iter_frame.print_summary()
1958
return
1959
1960
frame = iter_frame
1961
1962
if move_up:
1963
print('Unable to find an older python frame')
1964
else:
1965
print('Unable to find a newer python frame')
1966
1967
1968
class PyUp(gdb.Command):
1969
'Select and print all python stack frame in the same eval loop starting from the one that called this one (if any)'
1970
def __init__(self):
1971
gdb.Command.__init__ (self,
1972
"py-up",
1973
gdb.COMMAND_STACK,
1974
gdb.COMPLETE_NONE)
1975
1976
1977
def invoke(self, args, from_tty):
1978
move_in_stack(move_up=True)
1979
1980
class PyDown(gdb.Command):
1981
'Select and print all python stack frame in the same eval loop starting from the one called this one (if any)'
1982
def __init__(self):
1983
gdb.Command.__init__ (self,
1984
"py-down",
1985
gdb.COMMAND_STACK,
1986
gdb.COMPLETE_NONE)
1987
1988
1989
def invoke(self, args, from_tty):
1990
move_in_stack(move_up=False)
1991
1992
# Not all builds of gdb have gdb.Frame.select
1993
if hasattr(gdb.Frame, 'select'):
1994
PyUp()
1995
PyDown()
1996
1997
class PyBacktraceFull(gdb.Command):
1998
'Display the current python frame and all the frames within its call stack (if any)'
1999
def __init__(self):
2000
gdb.Command.__init__ (self,
2001
"py-bt-full",
2002
gdb.COMMAND_STACK,
2003
gdb.COMPLETE_NONE)
2004
2005
2006
def invoke(self, args, from_tty):
2007
frame = Frame.get_selected_python_frame()
2008
if not frame:
2009
print('Unable to locate python frame')
2010
return
2011
2012
while frame:
2013
if frame.is_python_frame():
2014
frame.print_summary()
2015
frame = frame.older()
2016
2017
PyBacktraceFull()
2018
2019
class PyBacktrace(gdb.Command):
2020
'Display the current python frame and all the frames within its call stack (if any)'
2021
def __init__(self):
2022
gdb.Command.__init__ (self,
2023
"py-bt",
2024
gdb.COMMAND_STACK,
2025
gdb.COMPLETE_NONE)
2026
2027
2028
def invoke(self, args, from_tty):
2029
frame = Frame.get_selected_python_frame()
2030
if not frame:
2031
print('Unable to locate python frame')
2032
return
2033
2034
sys.stdout.write('Traceback (most recent call first):\n')
2035
while frame:
2036
if frame.is_python_frame():
2037
frame.print_traceback()
2038
frame = frame.older()
2039
2040
PyBacktrace()
2041
2042
class PyPrint(gdb.Command):
2043
'Look up the given python variable name, and print it'
2044
def __init__(self):
2045
gdb.Command.__init__ (self,
2046
"py-print",
2047
gdb.COMMAND_DATA,
2048
gdb.COMPLETE_NONE)
2049
2050
2051
def invoke(self, args, from_tty):
2052
name = str(args)
2053
2054
frame = Frame.get_selected_python_frame()
2055
if not frame:
2056
print('Unable to locate python frame')
2057
return
2058
2059
pyop_frame = frame.get_pyop()
2060
if not pyop_frame:
2061
print(UNABLE_READ_INFO_PYTHON_FRAME)
2062
return
2063
2064
pyop_var, scope = pyop_frame.get_var_by_name(name)
2065
2066
if pyop_var:
2067
print('%s %r = %s'
2068
% (scope,
2069
name,
2070
pyop_var.get_truncated_repr(MAX_OUTPUT_LEN)))
2071
else:
2072
print('%r not found' % name)
2073
2074
PyPrint()
2075
2076
class PyLocals(gdb.Command):
2077
'Look up the given python variable name, and print it'
2078
def __init__(self):
2079
gdb.Command.__init__ (self,
2080
"py-locals",
2081
gdb.COMMAND_DATA,
2082
gdb.COMPLETE_NONE)
2083
2084
2085
def invoke(self, args, from_tty):
2086
name = str(args)
2087
2088
frame = Frame.get_selected_python_frame()
2089
if not frame:
2090
print('Unable to locate python frame')
2091
return
2092
2093
pyop_frame = frame.get_pyop()
2094
while True:
2095
if not pyop_frame:
2096
print(UNABLE_READ_INFO_PYTHON_FRAME)
2097
break
2098
if pyop_frame.is_shim():
2099
break
2100
2101
sys.stdout.write('Locals for %s\n' % (pyop_frame.co_name.proxyval(set())))
2102
2103
for pyop_name, pyop_value in pyop_frame.iter_locals():
2104
print('%s = %s'
2105
% (pyop_name.proxyval(set()),
2106
pyop_value.get_truncated_repr(MAX_OUTPUT_LEN)))
2107
2108
2109
pyop_frame = pyop_frame.previous()
2110
2111
PyLocals()
2112
2113