Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/docs/checktransupdate.py
38179 views
1
#!/usr/bin/env python3
2
# SPDX-License-Identifier: GPL-2.0
3
4
"""
5
This script helps track the translation status of the documentation
6
in different locales, e.g., zh_CN. More specially, it uses `git log`
7
commit to find the latest english commit from the translation commit
8
(order by author date) and the latest english commits from HEAD. If
9
differences occur, report the file and commits that need to be updated.
10
11
The usage is as follows:
12
- tools/docs/checktransupdate.py -l zh_CN
13
This will print all the files that need to be updated or translated in the zh_CN locale.
14
- tools/docs/checktransupdate.py Documentation/translations/zh_CN/dev-tools/testing-overview.rst
15
This will only print the status of the specified file.
16
17
The output is something like:
18
Documentation/dev-tools/kfence.rst
19
No translation in the locale of zh_CN
20
21
Documentation/translations/zh_CN/dev-tools/testing-overview.rst
22
commit 42fb9cfd5b18 ("Documentation: dev-tools: Add link to RV docs")
23
1 commits needs resolving in total
24
"""
25
26
import os
27
import re
28
import time
29
import logging
30
from argparse import ArgumentParser, ArgumentTypeError, BooleanOptionalAction
31
from datetime import datetime
32
33
34
def get_origin_path(file_path):
35
"""Get the origin path from the translation path"""
36
paths = file_path.split("/")
37
tidx = paths.index("translations")
38
opaths = paths[:tidx]
39
opaths += paths[tidx + 2 :]
40
return "/".join(opaths)
41
42
43
def get_latest_commit_from(file_path, commit):
44
"""Get the latest commit from the specified commit for the specified file"""
45
command = f"git log --pretty=format:%H%n%aD%n%cD%n%n%B {commit} -1 -- {file_path}"
46
logging.debug(command)
47
pipe = os.popen(command)
48
result = pipe.read()
49
result = result.split("\n")
50
if len(result) <= 1:
51
return None
52
53
logging.debug("Result: %s", result[0])
54
55
return {
56
"hash": result[0],
57
"author_date": datetime.strptime(result[1], "%a, %d %b %Y %H:%M:%S %z"),
58
"commit_date": datetime.strptime(result[2], "%a, %d %b %Y %H:%M:%S %z"),
59
"message": result[4:],
60
}
61
62
63
def get_origin_from_trans(origin_path, t_from_head):
64
"""Get the latest origin commit from the translation commit"""
65
o_from_t = get_latest_commit_from(origin_path, t_from_head["hash"])
66
while o_from_t is not None and o_from_t["author_date"] > t_from_head["author_date"]:
67
o_from_t = get_latest_commit_from(origin_path, o_from_t["hash"] + "^")
68
if o_from_t is not None:
69
logging.debug("tracked origin commit id: %s", o_from_t["hash"])
70
return o_from_t
71
72
73
def get_origin_from_trans_smartly(origin_path, t_from_head):
74
"""Get the latest origin commit from the formatted translation commit:
75
(1) update to commit HASH (TITLE)
76
(2) Update the translation through commit HASH (TITLE)
77
"""
78
# catch flag for 12-bit commit hash
79
HASH = r'([0-9a-f]{12})'
80
# pattern 1: contains "update to commit HASH"
81
pat_update_to = re.compile(rf'update to commit {HASH}')
82
# pattern 2: contains "Update the translation through commit HASH"
83
pat_update_translation = re.compile(rf'Update the translation through commit {HASH}')
84
85
origin_commit_hash = None
86
for line in t_from_head["message"]:
87
# check if the line matches the first pattern
88
match = pat_update_to.search(line)
89
if match:
90
origin_commit_hash = match.group(1)
91
break
92
# check if the line matches the second pattern
93
match = pat_update_translation.search(line)
94
if match:
95
origin_commit_hash = match.group(1)
96
break
97
if origin_commit_hash is None:
98
return None
99
o_from_t = get_latest_commit_from(origin_path, origin_commit_hash)
100
if o_from_t is not None:
101
logging.debug("tracked origin commit id: %s", o_from_t["hash"])
102
return o_from_t
103
104
105
def get_commits_count_between(opath, commit1, commit2):
106
"""Get the commits count between two commits for the specified file"""
107
command = f"git log --pretty=format:%H {commit1}...{commit2} -- {opath}"
108
logging.debug(command)
109
pipe = os.popen(command)
110
result = pipe.read().split("\n")
111
# filter out empty lines
112
result = list(filter(lambda x: x != "", result))
113
return result
114
115
116
def pretty_output(commit):
117
"""Pretty print the commit message"""
118
command = f"git log --pretty='format:%h (\"%s\")' -1 {commit}"
119
logging.debug(command)
120
pipe = os.popen(command)
121
return pipe.read()
122
123
124
def valid_commit(commit):
125
"""Check if the commit is valid or not"""
126
msg = pretty_output(commit)
127
return "Merge tag" not in msg
128
129
def check_per_file(file_path):
130
"""Check the translation status for the specified file"""
131
opath = get_origin_path(file_path)
132
133
if not os.path.isfile(opath):
134
logging.error("Cannot find the origin path for {file_path}")
135
return
136
137
o_from_head = get_latest_commit_from(opath, "HEAD")
138
t_from_head = get_latest_commit_from(file_path, "HEAD")
139
140
if o_from_head is None or t_from_head is None:
141
logging.error("Cannot find the latest commit for %s", file_path)
142
return
143
144
o_from_t = get_origin_from_trans_smartly(opath, t_from_head)
145
# notice, o_from_t from get_*_smartly() is always more accurate than from get_*()
146
if o_from_t is None:
147
o_from_t = get_origin_from_trans(opath, t_from_head)
148
149
if o_from_t is None:
150
logging.error("Error: Cannot find the latest origin commit for %s", file_path)
151
return
152
153
if o_from_head["hash"] == o_from_t["hash"]:
154
logging.debug("No update needed for %s", file_path)
155
else:
156
logging.info(file_path)
157
commits = get_commits_count_between(
158
opath, o_from_t["hash"], o_from_head["hash"]
159
)
160
count = 0
161
for commit in commits:
162
if valid_commit(commit):
163
logging.info("commit %s", pretty_output(commit))
164
count += 1
165
logging.info("%d commits needs resolving in total\n", count)
166
167
168
def valid_locales(locale):
169
"""Check if the locale is valid or not"""
170
script_path = os.path.dirname(os.path.abspath(__file__))
171
linux_path = os.path.join(script_path, "../..")
172
if not os.path.isdir(f"{linux_path}/Documentation/translations/{locale}"):
173
raise ArgumentTypeError("Invalid locale: {locale}")
174
return locale
175
176
177
def list_files_with_excluding_folders(folder, exclude_folders, include_suffix):
178
"""List all files with the specified suffix in the folder and its subfolders"""
179
files = []
180
stack = [folder]
181
182
while stack:
183
pwd = stack.pop()
184
# filter out the exclude folders
185
if os.path.basename(pwd) in exclude_folders:
186
continue
187
# list all files and folders
188
for item in os.listdir(pwd):
189
ab_item = os.path.join(pwd, item)
190
if os.path.isdir(ab_item):
191
stack.append(ab_item)
192
else:
193
if ab_item.endswith(include_suffix):
194
files.append(ab_item)
195
196
return files
197
198
199
class DmesgFormatter(logging.Formatter):
200
"""Custom dmesg logging formatter"""
201
def format(self, record):
202
timestamp = time.time()
203
formatted_time = f"[{timestamp:>10.6f}]"
204
log_message = f"{formatted_time} {record.getMessage()}"
205
return log_message
206
207
208
def config_logging(log_level, log_file="checktransupdate.log"):
209
"""configure logging based on the log level"""
210
# set up the root logger
211
logger = logging.getLogger()
212
logger.setLevel(log_level)
213
214
# Create console handler
215
console_handler = logging.StreamHandler()
216
console_handler.setLevel(log_level)
217
218
# Create file handler
219
file_handler = logging.FileHandler(log_file)
220
file_handler.setLevel(log_level)
221
222
# Create formatter and add it to the handlers
223
formatter = DmesgFormatter()
224
console_handler.setFormatter(formatter)
225
file_handler.setFormatter(formatter)
226
227
# Add the handler to the logger
228
logger.addHandler(console_handler)
229
logger.addHandler(file_handler)
230
231
232
def main():
233
"""Main function of the script"""
234
script_path = os.path.dirname(os.path.abspath(__file__))
235
linux_path = os.path.join(script_path, "../..")
236
237
parser = ArgumentParser(description="Check the translation update")
238
parser.add_argument(
239
"-l",
240
"--locale",
241
default="zh_CN",
242
type=valid_locales,
243
help="Locale to check when files are not specified",
244
)
245
246
parser.add_argument(
247
"--print-missing-translations",
248
action=BooleanOptionalAction,
249
default=True,
250
help="Print files that do not have translations",
251
)
252
253
parser.add_argument(
254
'--log',
255
default='INFO',
256
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
257
help='Set the logging level')
258
259
parser.add_argument(
260
'--logfile',
261
default='checktransupdate.log',
262
help='Set the logging file (default: checktransupdate.log)')
263
264
parser.add_argument(
265
"files", nargs="*", help="Files to check, if not specified, check all files"
266
)
267
args = parser.parse_args()
268
269
# Configure logging based on the --log argument
270
log_level = getattr(logging, args.log.upper(), logging.INFO)
271
config_logging(log_level)
272
273
# Get files related to linux path
274
files = args.files
275
if len(files) == 0:
276
offical_files = list_files_with_excluding_folders(
277
os.path.join(linux_path, "Documentation"), ["translations", "output"], "rst"
278
)
279
280
for file in offical_files:
281
# split the path into parts
282
path_parts = file.split(os.sep)
283
# find the index of the "Documentation" directory
284
kindex = path_parts.index("Documentation")
285
# insert the translations and locale after the Documentation directory
286
new_path_parts = path_parts[:kindex + 1] + ["translations", args.locale] \
287
+ path_parts[kindex + 1 :]
288
# join the path parts back together
289
new_file = os.sep.join(new_path_parts)
290
if os.path.isfile(new_file):
291
files.append(new_file)
292
else:
293
if args.print_missing_translations:
294
logging.info(os.path.relpath(os.path.abspath(file), linux_path))
295
logging.info("No translation in the locale of %s\n", args.locale)
296
297
files = list(map(lambda x: os.path.relpath(os.path.abspath(x), linux_path), files))
298
299
# cd to linux root directory
300
os.chdir(linux_path)
301
302
for file in files:
303
check_per_file(file)
304
305
306
if __name__ == "__main__":
307
main()
308
309