CoCalc -- reindent.py

GitHub Repository: allendowney/cpython
Path: blob/main/Tools/patchcheck/reindent.py
¹² views
1
#! /usr/bin/env python3
2

3
# Released to the public domain, by Tim Peters, 03 October 2000.
4

5
"""reindent [-d][-r][-v] [ path ... ]
6

7
-d (--dryrun)   Dry run.   Analyze, but don't make any changes to, files.
8
-r (--recurse)  Recurse.   Search for all .py files in subdirectories too.
9
-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
10
-v (--verbose)  Verbose.   Print informative msgs; else no output.
11
   (--newline)  Newline.   Specify the newline character to use (CRLF, LF).
12
                           Default is the same as the original file.
13
-h (--help)     Help.      Print this usage information and exit.
14

15
Change Python (.py) files to use 4-space indents and no hard tab characters.
16
Also trim excess spaces and tabs from ends of lines, and remove empty lines
17
at the end of files.  Also ensure the last line ends with a newline.
18

19
If no paths are given on the command line, reindent operates as a filter,
20
reading a single source file from standard input and writing the transformed
21
source to standard output.  In this case, the -d, -r and -v flags are
22
ignored.
23

24
You can pass one or more file and/or directory paths.  When a directory
25
path, all .py files within the directory will be examined, and, if the -r
26
option is given, likewise recursively for subdirectories.
27

28
If output is not to standard output, reindent overwrites files in place,
29
renaming the originals with a .bak extension.  If it finds nothing to
30
change, the file is left alone.  If reindent does change a file, the changed
31
file is a fixed-point for future runs (i.e., running reindent on the
32
resulting .py file won't change it again).
33

34
The hard part of reindenting is figuring out what to do with comment
35
lines.  So long as the input files get a clean bill of health from
36
tabnanny.py, reindent should do a good job.
37

38
The backup file is a copy of the one that is being reindented. The ".bak"
39
file is generated with shutil.copy(), but some corner cases regarding
40
user/group and permissions could leave the backup file more readable than
41
you'd prefer. You can always use the --nobackup option to prevent this.
42
"""
43

44
__version__ = "1"
45

46
import tokenize
47
import os
48
import shutil
49
import sys
50

51
verbose = False
52
recurse = False
53
dryrun = False
54
makebackup = True
55
# A specified newline to be used in the output (set by --newline option)
56
spec_newline = None
57

58

59
def usage(msg=None):
60
    if msg is None:
61
        msg = __doc__
62
    print(msg, file=sys.stderr)
63

64

65
def errprint(*args):
66
    sys.stderr.write(" ".join(str(arg) for arg in args))
67
    sys.stderr.write("\n")
68

69
def main():
70
    import getopt
71
    global verbose, recurse, dryrun, makebackup, spec_newline
72
    try:
73
        opts, args = getopt.getopt(sys.argv[1:], "drnvh",
74
            ["dryrun", "recurse", "nobackup", "verbose", "newline=", "help"])
75
    except getopt.error as msg:
76
        usage(msg)
77
        return
78
    for o, a in opts:
79
        if o in ('-d', '--dryrun'):
80
            dryrun = True
81
        elif o in ('-r', '--recurse'):
82
            recurse = True
83
        elif o in ('-n', '--nobackup'):
84
            makebackup = False
85
        elif o in ('-v', '--verbose'):
86
            verbose = True
87
        elif o in ('--newline',):
88
            if not a.upper() in ('CRLF', 'LF'):
89
                usage()
90
                return
91
            spec_newline = dict(CRLF='\r\n', LF='\n')[a.upper()]
92
        elif o in ('-h', '--help'):
93
            usage()
94
            return
95
    if not args:
96
        r = Reindenter(sys.stdin)
97
        r.run()
98
        r.write(sys.stdout)
99
        return
100
    for arg in args:
101
        check(arg)
102

103

104
def check(file):
105
    if os.path.isdir(file) and not os.path.islink(file):
106
        if verbose:
107
            print("listing directory", file)
108
        names = os.listdir(file)
109
        for name in names:
110
            fullname = os.path.join(file, name)
111
            if ((recurse and os.path.isdir(fullname) and
112
                 not os.path.islink(fullname) and
113
                 not os.path.split(fullname)[1].startswith("."))
114
                or name.lower().endswith(".py")):
115
                check(fullname)
116
        return
117

118
    if verbose:
119
        print("checking", file, "...", end=' ')
120
    with open(file, 'rb') as f:
121
        try:
122
            encoding, _ = tokenize.detect_encoding(f.readline)
123
        except SyntaxError as se:
124
            errprint("%s: SyntaxError: %s" % (file, str(se)))
125
            return
126
    try:
127
        with open(file, encoding=encoding) as f:
128
            r = Reindenter(f)
129
    except IOError as msg:
130
        errprint("%s: I/O Error: %s" % (file, str(msg)))
131
        return
132

133
    newline = spec_newline if spec_newline else r.newlines
134
    if isinstance(newline, tuple):
135
        errprint("%s: mixed newlines detected; cannot continue without --newline" % file)
136
        return
137

138
    if r.run():
139
        if verbose:
140
            print("changed.")
141
            if dryrun:
142
                print("But this is a dry run, so leaving it alone.")
143
        if not dryrun:
144
            bak = file + ".bak"
145
            if makebackup:
146
                shutil.copyfile(file, bak)
147
                if verbose:
148
                    print("backed up", file, "to", bak)
149
            with open(file, "w", encoding=encoding, newline=newline) as f:
150
                r.write(f)
151
            if verbose:
152
                print("wrote new", file)
153
        return True
154
    else:
155
        if verbose:
156
            print("unchanged.")
157
        return False
158

159

160
def _rstrip(line, JUNK='\n \t'):
161
    """Return line stripped of trailing spaces, tabs, newlines.
162

163
    Note that line.rstrip() instead also strips sundry control characters,
164
    but at least one known Emacs user expects to keep junk like that, not
165
    mentioning Barry by name or anything <wink>.
166
    """
167

168
    i = len(line)
169
    while i > 0 and line[i - 1] in JUNK:
170
        i -= 1
171
    return line[:i]
172

173

174
class Reindenter:
175

176
    def __init__(self, f):
177
        self.find_stmt = 1  # next token begins a fresh stmt?
178
        self.level = 0      # current indent level
179

180
        # Raw file lines.
181
        self.raw = f.readlines()
182

183
        # File lines, rstripped & tab-expanded.  Dummy at start is so
184
        # that we can use tokenize's 1-based line numbering easily.
185
        # Note that a line is all-blank iff it's "\n".
186
        self.lines = [_rstrip(line).expandtabs() + "\n"
187
                      for line in self.raw]
188
        self.lines.insert(0, None)
189
        self.index = 1  # index into self.lines of next line
190

191
        # List of (lineno, indentlevel) pairs, one for each stmt and
192
        # comment line.  indentlevel is -1 for comment lines, as a
193
        # signal that tokenize doesn't know what to do about them;
194
        # indeed, they're our headache!
195
        self.stats = []
196

197
        # Save the newlines found in the file so they can be used to
198
        #  create output without mutating the newlines.
199
        self.newlines = f.newlines
200

201
    def run(self):
202
        tokens = tokenize.generate_tokens(self.getline)
203
        for _token in tokens:
204
            self.tokeneater(*_token)
205
        # Remove trailing empty lines.
206
        lines = self.lines
207
        while lines and lines[-1] == "\n":
208
            lines.pop()
209
        # Sentinel.
210
        stats = self.stats
211
        stats.append((len(lines), 0))
212
        # Map count of leading spaces to # we want.
213
        have2want = {}
214
        # Program after transformation.
215
        after = self.after = []
216
        # Copy over initial empty lines -- there's nothing to do until
217
        # we see a line with *something* on it.
218
        i = stats[0][0]
219
        after.extend(lines[1:i])
220
        for i in range(len(stats) - 1):
221
            thisstmt, thislevel = stats[i]
222
            nextstmt = stats[i + 1][0]
223
            have = getlspace(lines[thisstmt])
224
            want = thislevel * 4
225
            if want < 0:
226
                # A comment line.
227
                if have:
228
                    # An indented comment line.  If we saw the same
229
                    # indentation before, reuse what it most recently
230
                    # mapped to.
231
                    want = have2want.get(have, -1)
232
                    if want < 0:
233
                        # Then it probably belongs to the next real stmt.
234
                        for j in range(i + 1, len(stats) - 1):
235
                            jline, jlevel = stats[j]
236
                            if jlevel >= 0:
237
                                if have == getlspace(lines[jline]):
238
                                    want = jlevel * 4
239
                                break
240
                    if want < 0:           # Maybe it's a hanging
241
                                           # comment like this one,
242
                        # in which case we should shift it like its base
243
                        # line got shifted.
244
                        for j in range(i - 1, -1, -1):
245
                            jline, jlevel = stats[j]
246
                            if jlevel >= 0:
247
                                want = have + (getlspace(after[jline - 1]) -
248
                                               getlspace(lines[jline]))
249
                                break
250
                    if want < 0:
251
                        # Still no luck -- leave it alone.
252
                        want = have
253
                else:
254
                    want = 0
255
            assert want >= 0
256
            have2want[have] = want
257
            diff = want - have
258
            if diff == 0 or have == 0:
259
                after.extend(lines[thisstmt:nextstmt])
260
            else:
261
                for line in lines[thisstmt:nextstmt]:
262
                    if diff > 0:
263
                        if line == "\n":
264
                            after.append(line)
265
                        else:
266
                            after.append(" " * diff + line)
267
                    else:
268
                        remove = min(getlspace(line), -diff)
269
                        after.append(line[remove:])
270
        return self.raw != self.after
271

272
    def write(self, f):
273
        f.writelines(self.after)
274

275
    # Line-getter for tokenize.
276
    def getline(self):
277
        if self.index >= len(self.lines):
278
            line = ""
279
        else:
280
            line = self.lines[self.index]
281
            self.index += 1
282
        return line
283

284
    # Line-eater for tokenize.
285
    def tokeneater(self, type, token, slinecol, end, line,
286
                   INDENT=tokenize.INDENT,
287
                   DEDENT=tokenize.DEDENT,
288
                   NEWLINE=tokenize.NEWLINE,
289
                   COMMENT=tokenize.COMMENT,
290
                   NL=tokenize.NL):
291

292
        if type == NEWLINE:
293
            # A program statement, or ENDMARKER, will eventually follow,
294
            # after some (possibly empty) run of tokens of the form
295
            #     (NL | COMMENT)* (INDENT | DEDENT+)?
296
            self.find_stmt = 1
297

298
        elif type == INDENT:
299
            self.find_stmt = 1
300
            self.level += 1
301

302
        elif type == DEDENT:
303
            self.find_stmt = 1
304
            self.level -= 1
305

306
        elif type == COMMENT:
307
            if self.find_stmt:
308
                self.stats.append((slinecol[0], -1))
309
                # but we're still looking for a new stmt, so leave
310
                # find_stmt alone
311

312
        elif type == NL:
313
            pass
314

315
        elif self.find_stmt:
316
            # This is the first "real token" following a NEWLINE, so it
317
            # must be the first token of the next program statement, or an
318
            # ENDMARKER.
319
            self.find_stmt = 0
320
            if line:   # not endmarker
321
                self.stats.append((slinecol[0], self.level))
322

323

324
# Count number of leading blanks.
325
def getlspace(line):
326
    i, n = 0, len(line)
327
    while i < n and line[i] == " ":
328
        i += 1
329
    return i
330

331

332
if __name__ == '__main__':
333
    main()
334

335
Product

Resources

Company