Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Tools/patchcheck/reindent.py
12 views
1
#! /usr/bin/env python3
2
3
# Released to the public domain, by Tim Peters, 03 October 2000.
4
5
"""reindent [-d][-r][-v] [ path ... ]
6
7
-d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
8
-r (--recurse) Recurse. Search for all .py files in subdirectories too.
9
-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
10
-v (--verbose) Verbose. Print informative msgs; else no output.
11
(--newline) Newline. Specify the newline character to use (CRLF, LF).
12
Default is the same as the original file.
13
-h (--help) Help. Print this usage information and exit.
14
15
Change Python (.py) files to use 4-space indents and no hard tab characters.
16
Also trim excess spaces and tabs from ends of lines, and remove empty lines
17
at the end of files. Also ensure the last line ends with a newline.
18
19
If no paths are given on the command line, reindent operates as a filter,
20
reading a single source file from standard input and writing the transformed
21
source to standard output. In this case, the -d, -r and -v flags are
22
ignored.
23
24
You can pass one or more file and/or directory paths. When a directory
25
path, all .py files within the directory will be examined, and, if the -r
26
option is given, likewise recursively for subdirectories.
27
28
If output is not to standard output, reindent overwrites files in place,
29
renaming the originals with a .bak extension. If it finds nothing to
30
change, the file is left alone. If reindent does change a file, the changed
31
file is a fixed-point for future runs (i.e., running reindent on the
32
resulting .py file won't change it again).
33
34
The hard part of reindenting is figuring out what to do with comment
35
lines. So long as the input files get a clean bill of health from
36
tabnanny.py, reindent should do a good job.
37
38
The backup file is a copy of the one that is being reindented. The ".bak"
39
file is generated with shutil.copy(), but some corner cases regarding
40
user/group and permissions could leave the backup file more readable than
41
you'd prefer. You can always use the --nobackup option to prevent this.
42
"""
43
44
__version__ = "1"
45
46
import tokenize
47
import os
48
import shutil
49
import sys
50
51
verbose = False
52
recurse = False
53
dryrun = False
54
makebackup = True
55
# A specified newline to be used in the output (set by --newline option)
56
spec_newline = None
57
58
59
def usage(msg=None):
60
if msg is None:
61
msg = __doc__
62
print(msg, file=sys.stderr)
63
64
65
def errprint(*args):
66
sys.stderr.write(" ".join(str(arg) for arg in args))
67
sys.stderr.write("\n")
68
69
def main():
70
import getopt
71
global verbose, recurse, dryrun, makebackup, spec_newline
72
try:
73
opts, args = getopt.getopt(sys.argv[1:], "drnvh",
74
["dryrun", "recurse", "nobackup", "verbose", "newline=", "help"])
75
except getopt.error as msg:
76
usage(msg)
77
return
78
for o, a in opts:
79
if o in ('-d', '--dryrun'):
80
dryrun = True
81
elif o in ('-r', '--recurse'):
82
recurse = True
83
elif o in ('-n', '--nobackup'):
84
makebackup = False
85
elif o in ('-v', '--verbose'):
86
verbose = True
87
elif o in ('--newline',):
88
if not a.upper() in ('CRLF', 'LF'):
89
usage()
90
return
91
spec_newline = dict(CRLF='\r\n', LF='\n')[a.upper()]
92
elif o in ('-h', '--help'):
93
usage()
94
return
95
if not args:
96
r = Reindenter(sys.stdin)
97
r.run()
98
r.write(sys.stdout)
99
return
100
for arg in args:
101
check(arg)
102
103
104
def check(file):
105
if os.path.isdir(file) and not os.path.islink(file):
106
if verbose:
107
print("listing directory", file)
108
names = os.listdir(file)
109
for name in names:
110
fullname = os.path.join(file, name)
111
if ((recurse and os.path.isdir(fullname) and
112
not os.path.islink(fullname) and
113
not os.path.split(fullname)[1].startswith("."))
114
or name.lower().endswith(".py")):
115
check(fullname)
116
return
117
118
if verbose:
119
print("checking", file, "...", end=' ')
120
with open(file, 'rb') as f:
121
try:
122
encoding, _ = tokenize.detect_encoding(f.readline)
123
except SyntaxError as se:
124
errprint("%s: SyntaxError: %s" % (file, str(se)))
125
return
126
try:
127
with open(file, encoding=encoding) as f:
128
r = Reindenter(f)
129
except IOError as msg:
130
errprint("%s: I/O Error: %s" % (file, str(msg)))
131
return
132
133
newline = spec_newline if spec_newline else r.newlines
134
if isinstance(newline, tuple):
135
errprint("%s: mixed newlines detected; cannot continue without --newline" % file)
136
return
137
138
if r.run():
139
if verbose:
140
print("changed.")
141
if dryrun:
142
print("But this is a dry run, so leaving it alone.")
143
if not dryrun:
144
bak = file + ".bak"
145
if makebackup:
146
shutil.copyfile(file, bak)
147
if verbose:
148
print("backed up", file, "to", bak)
149
with open(file, "w", encoding=encoding, newline=newline) as f:
150
r.write(f)
151
if verbose:
152
print("wrote new", file)
153
return True
154
else:
155
if verbose:
156
print("unchanged.")
157
return False
158
159
160
def _rstrip(line, JUNK='\n \t'):
161
"""Return line stripped of trailing spaces, tabs, newlines.
162
163
Note that line.rstrip() instead also strips sundry control characters,
164
but at least one known Emacs user expects to keep junk like that, not
165
mentioning Barry by name or anything <wink>.
166
"""
167
168
i = len(line)
169
while i > 0 and line[i - 1] in JUNK:
170
i -= 1
171
return line[:i]
172
173
174
class Reindenter:
175
176
def __init__(self, f):
177
self.find_stmt = 1 # next token begins a fresh stmt?
178
self.level = 0 # current indent level
179
180
# Raw file lines.
181
self.raw = f.readlines()
182
183
# File lines, rstripped & tab-expanded. Dummy at start is so
184
# that we can use tokenize's 1-based line numbering easily.
185
# Note that a line is all-blank iff it's "\n".
186
self.lines = [_rstrip(line).expandtabs() + "\n"
187
for line in self.raw]
188
self.lines.insert(0, None)
189
self.index = 1 # index into self.lines of next line
190
191
# List of (lineno, indentlevel) pairs, one for each stmt and
192
# comment line. indentlevel is -1 for comment lines, as a
193
# signal that tokenize doesn't know what to do about them;
194
# indeed, they're our headache!
195
self.stats = []
196
197
# Save the newlines found in the file so they can be used to
198
# create output without mutating the newlines.
199
self.newlines = f.newlines
200
201
def run(self):
202
tokens = tokenize.generate_tokens(self.getline)
203
for _token in tokens:
204
self.tokeneater(*_token)
205
# Remove trailing empty lines.
206
lines = self.lines
207
while lines and lines[-1] == "\n":
208
lines.pop()
209
# Sentinel.
210
stats = self.stats
211
stats.append((len(lines), 0))
212
# Map count of leading spaces to # we want.
213
have2want = {}
214
# Program after transformation.
215
after = self.after = []
216
# Copy over initial empty lines -- there's nothing to do until
217
# we see a line with *something* on it.
218
i = stats[0][0]
219
after.extend(lines[1:i])
220
for i in range(len(stats) - 1):
221
thisstmt, thislevel = stats[i]
222
nextstmt = stats[i + 1][0]
223
have = getlspace(lines[thisstmt])
224
want = thislevel * 4
225
if want < 0:
226
# A comment line.
227
if have:
228
# An indented comment line. If we saw the same
229
# indentation before, reuse what it most recently
230
# mapped to.
231
want = have2want.get(have, -1)
232
if want < 0:
233
# Then it probably belongs to the next real stmt.
234
for j in range(i + 1, len(stats) - 1):
235
jline, jlevel = stats[j]
236
if jlevel >= 0:
237
if have == getlspace(lines[jline]):
238
want = jlevel * 4
239
break
240
if want < 0: # Maybe it's a hanging
241
# comment like this one,
242
# in which case we should shift it like its base
243
# line got shifted.
244
for j in range(i - 1, -1, -1):
245
jline, jlevel = stats[j]
246
if jlevel >= 0:
247
want = have + (getlspace(after[jline - 1]) -
248
getlspace(lines[jline]))
249
break
250
if want < 0:
251
# Still no luck -- leave it alone.
252
want = have
253
else:
254
want = 0
255
assert want >= 0
256
have2want[have] = want
257
diff = want - have
258
if diff == 0 or have == 0:
259
after.extend(lines[thisstmt:nextstmt])
260
else:
261
for line in lines[thisstmt:nextstmt]:
262
if diff > 0:
263
if line == "\n":
264
after.append(line)
265
else:
266
after.append(" " * diff + line)
267
else:
268
remove = min(getlspace(line), -diff)
269
after.append(line[remove:])
270
return self.raw != self.after
271
272
def write(self, f):
273
f.writelines(self.after)
274
275
# Line-getter for tokenize.
276
def getline(self):
277
if self.index >= len(self.lines):
278
line = ""
279
else:
280
line = self.lines[self.index]
281
self.index += 1
282
return line
283
284
# Line-eater for tokenize.
285
def tokeneater(self, type, token, slinecol, end, line,
286
INDENT=tokenize.INDENT,
287
DEDENT=tokenize.DEDENT,
288
NEWLINE=tokenize.NEWLINE,
289
COMMENT=tokenize.COMMENT,
290
NL=tokenize.NL):
291
292
if type == NEWLINE:
293
# A program statement, or ENDMARKER, will eventually follow,
294
# after some (possibly empty) run of tokens of the form
295
# (NL | COMMENT)* (INDENT | DEDENT+)?
296
self.find_stmt = 1
297
298
elif type == INDENT:
299
self.find_stmt = 1
300
self.level += 1
301
302
elif type == DEDENT:
303
self.find_stmt = 1
304
self.level -= 1
305
306
elif type == COMMENT:
307
if self.find_stmt:
308
self.stats.append((slinecol[0], -1))
309
# but we're still looking for a new stmt, so leave
310
# find_stmt alone
311
312
elif type == NL:
313
pass
314
315
elif self.find_stmt:
316
# This is the first "real token" following a NEWLINE, so it
317
# must be the first token of the next program statement, or an
318
# ENDMARKER.
319
self.find_stmt = 0
320
if line: # not endmarker
321
self.stats.append((slinecol[0], self.level))
322
323
324
# Count number of leading blanks.
325
def getlspace(line):
326
i, n = 0, len(line)
327
while i < n and line[i] == " ":
328
i += 1
329
return i
330
331
332
if __name__ == '__main__':
333
main()
334
335