Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
duyuefeng0708
GitHub Repository: duyuefeng0708/Cryptography-From-First-Principle
Path: blob/main/scripts/clean_print_formatting.py
483 views
unlisted
1
#!/usr/bin/env python3
2
"""Clean up ugly ASCII-table formatting in Jupyter notebook code cells.
3
4
Targets:
5
1. Separator bars: print("=" * N) and print("-" * N) for N >= 20
6
2. Column-header lines: print(f"{'header':>N} | ...")
7
3. Excessive width specifiers: {:>N}, {:<N}, {:^N} for N >= 5
8
"""
9
10
import json
11
import re
12
import sys
13
from pathlib import Path
14
15
16
def is_separator_line(line: str) -> bool:
17
"""Check if a line is a pure separator bar like print("=" * 55)."""
18
stripped = line.strip()
19
# print("=" * 55) or print("-" * 55)
20
if re.match(r'^print\(["\'][=\-]["\'] \* \d+\)$', stripped):
21
n = int(re.search(r'\d+', stripped.split('*')[1]).group())
22
return n >= 20
23
return False
24
25
26
def is_column_header_line(line: str) -> bool:
27
"""Check if a line is a pure column header with alignment specs and pipes."""
28
stripped = line.strip()
29
# Lines like: print(f"{'p':>5} | {'|G|=p-1':>8} | ...")
30
# Must have multiple | separators and alignment specs
31
if not stripped.startswith('print(f"') and not stripped.startswith("print(f'"):
32
return False
33
# Count alignment specs and pipe separators
34
align_specs = len(re.findall(r":\s*[><\^]\d+\}", stripped))
35
pipes = stripped.count(' | ')
36
# It's a header if it has 3+ alignment specs and 2+ pipes
37
return align_specs >= 3 and pipes >= 2
38
39
40
def clean_width_specifiers(line: str) -> str:
41
"""Remove excessive width specifiers from f-string print lines.
42
43
{:>8} -> {} for N >= 4
44
{:>2} -> {:>2} (keep small padding for digit alignment)
45
{val:>8} -> {val}
46
"""
47
# Match f-string width specs like :>N}, :<N}, :^N} where N >= 4
48
def replace_spec(m):
49
prefix = m.group(1) # everything before the colon
50
width = int(m.group(3))
51
suffix = m.group(4) # closing brace
52
if width >= 2:
53
return prefix + suffix
54
return m.group(0) # keep :>1 etc (unlikely)
55
56
# Pattern: {expr:>N} or {expr:<N} or {expr:^N}
57
result = re.sub(r'(\{[^}]*?)(:[><\^])(\d+)(\})', replace_spec, line)
58
return result
59
60
61
def clean_constant_fstrings(line: str) -> str:
62
"""Replace f"{'constant'}" patterns with just the constant in print lines.
63
64
print(f'{"message"} {"encrypted"}') -> print('message encrypted')
65
Only applies to print lines where ALL {} contain string literals.
66
"""
67
if 'print(f' not in line:
68
return line
69
70
# Replace {"string_literal"} and {'string_literal'} with just the string
71
# This pattern matches f-string expressions that are just quoted constants
72
result = re.sub(r'\{"([^"{}]*)"\}', r'\1', line)
73
result = re.sub(r"\{'([^'{}]*)'\}", r'\1', result)
74
75
# If there are no remaining { } expressions, downgrade f-string to plain string
76
# But only if no other { remain (besides the ones we replaced)
77
if result != line:
78
# Check if there are still f-string expressions
79
# Find the print(f'...' or print(f"..." part
80
match = re.match(r"(\s*print\(f)(['\"])(.*)\2\)(.*)$", result)
81
if match:
82
prefix, quote, body, suffix = match.groups()
83
# If no { remain in body, downgrade to plain string
84
if '{' not in body:
85
result = f"{prefix[:-1]}{quote}{body}{quote}){suffix}"
86
87
return result
88
89
90
def clean_str_concat_in_fstring(line: str) -> str:
91
"""Replace {"text" + str(var) + "text"} with text{var}text in f-strings.
92
93
Example:
94
f'{"a^" + str(p-1) + " mod " + str(p)}' -> f'a^{p-1} mod {p}'
95
"""
96
if 'str(' not in line:
97
return line
98
99
# Pattern: {"literal" + str(expr) + "literal" ...}
100
# This can chain: {"a" + str(x) + " b " + str(y) + "c"}
101
# Strategy: find expressions like {"..." + str(...) + "..."} and simplify
102
103
def simplify_concat(m):
104
"""Simplify a single {concat_expression}."""
105
inner = m.group(1)
106
107
# Split on ' + ' or " + " while respecting quotes
108
# Simple approach: try to parse the concatenation
109
parts = []
110
remaining = inner.strip()
111
112
while remaining:
113
remaining = remaining.strip()
114
if remaining.startswith('"'):
115
# String literal with double quotes
116
end = remaining.index('"', 1)
117
parts.append(('str', remaining[1:end]))
118
remaining = remaining[end+1:].strip()
119
if remaining.startswith('+'):
120
remaining = remaining[1:].strip()
121
elif remaining.startswith("'"):
122
# String literal with single quotes
123
end = remaining.index("'", 1)
124
parts.append(('str', remaining[1:end]))
125
remaining = remaining[end+1:].strip()
126
if remaining.startswith('+'):
127
remaining = remaining[1:].strip()
128
elif remaining.startswith('str('):
129
# str(expr) call
130
depth = 0
131
i = 4 # skip 'str('
132
depth = 1
133
while i < len(remaining) and depth > 0:
134
if remaining[i] == '(':
135
depth += 1
136
elif remaining[i] == ')':
137
depth -= 1
138
i += 1
139
expr = remaining[4:i-1]
140
parts.append(('expr', expr))
141
remaining = remaining[i:].strip()
142
if remaining.startswith('+'):
143
remaining = remaining[1:].strip()
144
else:
145
# Can't parse, bail out
146
return m.group(0)
147
148
if not parts:
149
return m.group(0)
150
151
# Rebuild as plain text + {expr} sequences
152
result = ''
153
for kind, val in parts:
154
if kind == 'str':
155
result += val
156
elif kind == 'expr':
157
result += '{' + val + '}'
158
159
return result
160
161
# Find all {expression} blocks in f-strings that contain str() + concatenation
162
result = re.sub(r'\{(".*?str\(.*?")\}', simplify_concat, line)
163
result = re.sub(r"\{('.*?str\(.*?')\}", simplify_concat, result)
164
165
return result
166
167
168
def clean_cell_source(source_lines: list[str]) -> tuple[list[str], int]:
169
"""Clean formatting in a single cell's source lines.
170
171
Returns (cleaned_lines, fix_count).
172
"""
173
fixes = 0
174
cleaned = []
175
176
for line in source_lines:
177
# Remove separator bars
178
if is_separator_line(line):
179
fixes += 1
180
continue
181
182
# Remove pure column header lines
183
if is_column_header_line(line):
184
fixes += 1
185
continue
186
187
# Clean width specifiers in print lines
188
new_line = clean_width_specifiers(line)
189
if new_line != line:
190
fixes += 1
191
line = new_line
192
193
# Clean constant f-string expressions like f"{'header'}"
194
new_line = clean_constant_fstrings(line)
195
if new_line != line:
196
fixes += 1
197
line = new_line
198
199
# Clean string concatenation in f-strings: {"a" + str(x) + "b"} -> a{x}b
200
new_line = clean_str_concat_in_fstring(line)
201
if new_line != line:
202
fixes += 1
203
line = new_line
204
205
cleaned.append(line)
206
207
return cleaned, fixes
208
209
210
def process_notebook(path: Path, dry_run: bool = False) -> int:
211
"""Process a single notebook. Returns number of fixes."""
212
with open(path) as f:
213
nb = json.load(f)
214
215
total_fixes = 0
216
modified = False
217
218
for cell in nb.get('cells', []):
219
if cell.get('cell_type') != 'code':
220
continue
221
222
source = cell.get('source', [])
223
if not source:
224
continue
225
226
cleaned, fixes = clean_cell_source(source)
227
if fixes > 0:
228
total_fixes += fixes
229
if not dry_run:
230
cell['source'] = cleaned
231
modified = True
232
233
if modified and not dry_run:
234
with open(path, 'w') as f:
235
json.dump(nb, f, indent=1, ensure_ascii=False)
236
f.write('\n')
237
238
return total_fixes
239
240
241
def main():
242
dry_run = '--dry-run' in sys.argv
243
repo = Path(__file__).parent.parent
244
245
notebooks = sorted(repo.glob('**/*.ipynb'))
246
# Skip checkpoints
247
notebooks = [nb for nb in notebooks if '.ipynb_checkpoints' not in str(nb)]
248
249
total = 0
250
changed_files = 0
251
252
for nb_path in notebooks:
253
fixes = process_notebook(nb_path, dry_run=dry_run)
254
if fixes > 0:
255
rel = nb_path.relative_to(repo)
256
print(f" {rel}: {fixes} fixes")
257
total += fixes
258
changed_files += 1
259
260
mode = "DRY RUN" if dry_run else "APPLIED"
261
print(f"\n{mode}: {total} fixes across {changed_files} files")
262
263
264
if __name__ == '__main__':
265
main()
266
267