CoCalc -- clean_print

GitHub Repository: duyuefeng0708/Cryptography-From-First-Principle
Path: blob/main/scripts/clean_print_formatting.py
⁴⁸³ views
unlisted
1
#!/usr/bin/env python3
2
"""Clean up ugly ASCII-table formatting in Jupyter notebook code cells.
3

4
Targets:
5
1. Separator bars: print("=" * N) and print("-" * N) for N >= 20
6
2. Column-header lines: print(f"{'header':>N} | ...")
7
3. Excessive width specifiers: {:>N}, {:<N}, {:^N} for N >= 5
8
"""
9

10
import json
11
import re
12
import sys
13
from pathlib import Path
14

15

16
def is_separator_line(line: str) -> bool:
17
    """Check if a line is a pure separator bar like print("=" * 55)."""
18
    stripped = line.strip()
19
    # print("=" * 55) or print("-" * 55)
20
    if re.match(r'^print\(["\'][=\-]["\'] \* \d+\)$', stripped):
21
        n = int(re.search(r'\d+', stripped.split('*')[1]).group())
22
        return n >= 20
23
    return False
24

25

26
def is_column_header_line(line: str) -> bool:
27
    """Check if a line is a pure column header with alignment specs and pipes."""
28
    stripped = line.strip()
29
    # Lines like: print(f"{'p':>5} | {'|G|=p-1':>8} | ...")
30
    # Must have multiple | separators and alignment specs
31
    if not stripped.startswith('print(f"') and not stripped.startswith("print(f'"):
32
        return False
33
    # Count alignment specs and pipe separators
34
    align_specs = len(re.findall(r":\s*[><\^]\d+\}", stripped))
35
    pipes = stripped.count(' | ')
36
    # It's a header if it has 3+ alignment specs and 2+ pipes
37
    return align_specs >= 3 and pipes >= 2
38

39

40
def clean_width_specifiers(line: str) -> str:
41
    """Remove excessive width specifiers from f-string print lines.
42

43
    {:>8} -> {} for N >= 4
44
    {:>2} -> {:>2} (keep small padding for digit alignment)
45
    {val:>8} -> {val}
46
    """
47
    # Match f-string width specs like :>N}, :<N}, :^N} where N >= 4
48
    def replace_spec(m):
49
        prefix = m.group(1)  # everything before the colon
50
        width = int(m.group(3))
51
        suffix = m.group(4)  # closing brace
52
        if width >= 2:
53
            return prefix + suffix
54
        return m.group(0)  # keep :>1 etc (unlikely)
55

56
    # Pattern: {expr:>N} or {expr:<N} or {expr:^N}
57
    result = re.sub(r'(\{[^}]*?)(:[><\^])(\d+)(\})', replace_spec, line)
58
    return result
59

60

61
def clean_constant_fstrings(line: str) -> str:
62
    """Replace f"{'constant'}" patterns with just the constant in print lines.
63

64
    print(f'{"message"}  {"encrypted"}') -> print('message  encrypted')
65
    Only applies to print lines where ALL {} contain string literals.
66
    """
67
    if 'print(f' not in line:
68
        return line
69

70
    # Replace {"string_literal"} and {'string_literal'} with just the string
71
    # This pattern matches f-string expressions that are just quoted constants
72
    result = re.sub(r'\{"([^"{}]*)"\}', r'\1', line)
73
    result = re.sub(r"\{'([^'{}]*)'\}", r'\1', result)
74

75
    # If there are no remaining { } expressions, downgrade f-string to plain string
76
    # But only if no other { remain (besides the ones we replaced)
77
    if result != line:
78
        # Check if there are still f-string expressions
79
        # Find the print(f'...' or print(f"..." part
80
        match = re.match(r"(\s*print\(f)(['\"])(.*)\2\)(.*)$", result)
81
        if match:
82
            prefix, quote, body, suffix = match.groups()
83
            # If no { remain in body, downgrade to plain string
84
            if '{' not in body:
85
                result = f"{prefix[:-1]}{quote}{body}{quote}){suffix}"
86

87
    return result
88

89

90
def clean_str_concat_in_fstring(line: str) -> str:
91
    """Replace {"text" + str(var) + "text"} with text{var}text in f-strings.
92

93
    Example:
94
      f'{"a^" + str(p-1) + " mod " + str(p)}'  ->  f'a^{p-1} mod {p}'
95
    """
96
    if 'str(' not in line:
97
        return line
98

99
    # Pattern: {"literal" + str(expr) + "literal" ...}
100
    # This can chain: {"a" + str(x) + " b " + str(y) + "c"}
101
    # Strategy: find expressions like {"..." + str(...) + "..."} and simplify
102

103
    def simplify_concat(m):
104
        """Simplify a single {concat_expression}."""
105
        inner = m.group(1)
106

107
        # Split on ' + ' or " + " while respecting quotes
108
        # Simple approach: try to parse the concatenation
109
        parts = []
110
        remaining = inner.strip()
111

112
        while remaining:
113
            remaining = remaining.strip()
114
            if remaining.startswith('"'):
115
                # String literal with double quotes
116
                end = remaining.index('"', 1)
117
                parts.append(('str', remaining[1:end]))
118
                remaining = remaining[end+1:].strip()
119
                if remaining.startswith('+'):
120
                    remaining = remaining[1:].strip()
121
            elif remaining.startswith("'"):
122
                # String literal with single quotes
123
                end = remaining.index("'", 1)
124
                parts.append(('str', remaining[1:end]))
125
                remaining = remaining[end+1:].strip()
126
                if remaining.startswith('+'):
127
                    remaining = remaining[1:].strip()
128
            elif remaining.startswith('str('):
129
                # str(expr) call
130
                depth = 0
131
                i = 4  # skip 'str('
132
                depth = 1
133
                while i < len(remaining) and depth > 0:
134
                    if remaining[i] == '(':
135
                        depth += 1
136
                    elif remaining[i] == ')':
137
                        depth -= 1
138
                    i += 1
139
                expr = remaining[4:i-1]
140
                parts.append(('expr', expr))
141
                remaining = remaining[i:].strip()
142
                if remaining.startswith('+'):
143
                    remaining = remaining[1:].strip()
144
            else:
145
                # Can't parse, bail out
146
                return m.group(0)
147

148
        if not parts:
149
            return m.group(0)
150

151
        # Rebuild as plain text + {expr} sequences
152
        result = ''
153
        for kind, val in parts:
154
            if kind == 'str':
155
                result += val
156
            elif kind == 'expr':
157
                result += '{' + val + '}'
158

159
        return result
160

161
    # Find all {expression} blocks in f-strings that contain str() + concatenation
162
    result = re.sub(r'\{(".*?str\(.*?")\}', simplify_concat, line)
163
    result = re.sub(r"\{('.*?str\(.*?')\}", simplify_concat, result)
164

165
    return result
166

167

168
def clean_cell_source(source_lines: list[str]) -> tuple[list[str], int]:
169
    """Clean formatting in a single cell's source lines.
170

171
    Returns (cleaned_lines, fix_count).
172
    """
173
    fixes = 0
174
    cleaned = []
175

176
    for line in source_lines:
177
        # Remove separator bars
178
        if is_separator_line(line):
179
            fixes += 1
180
            continue
181

182
        # Remove pure column header lines
183
        if is_column_header_line(line):
184
            fixes += 1
185
            continue
186

187
        # Clean width specifiers in print lines
188
        new_line = clean_width_specifiers(line)
189
        if new_line != line:
190
            fixes += 1
191
            line = new_line
192

193
        # Clean constant f-string expressions like f"{'header'}"
194
        new_line = clean_constant_fstrings(line)
195
        if new_line != line:
196
            fixes += 1
197
            line = new_line
198

199
        # Clean string concatenation in f-strings: {"a" + str(x) + "b"} -> a{x}b
200
        new_line = clean_str_concat_in_fstring(line)
201
        if new_line != line:
202
            fixes += 1
203
            line = new_line
204

205
        cleaned.append(line)
206

207
    return cleaned, fixes
208

209

210
def process_notebook(path: Path, dry_run: bool = False) -> int:
211
    """Process a single notebook. Returns number of fixes."""
212
    with open(path) as f:
213
        nb = json.load(f)
214

215
    total_fixes = 0
216
    modified = False
217

218
    for cell in nb.get('cells', []):
219
        if cell.get('cell_type') != 'code':
220
            continue
221

222
        source = cell.get('source', [])
223
        if not source:
224
            continue
225

226
        cleaned, fixes = clean_cell_source(source)
227
        if fixes > 0:
228
            total_fixes += fixes
229
            if not dry_run:
230
                cell['source'] = cleaned
231
                modified = True
232

233
    if modified and not dry_run:
234
        with open(path, 'w') as f:
235
            json.dump(nb, f, indent=1, ensure_ascii=False)
236
            f.write('\n')
237

238
    return total_fixes
239

240

241
def main():
242
    dry_run = '--dry-run' in sys.argv
243
    repo = Path(__file__).parent.parent
244

245
    notebooks = sorted(repo.glob('**/*.ipynb'))
246
    # Skip checkpoints
247
    notebooks = [nb for nb in notebooks if '.ipynb_checkpoints' not in str(nb)]
248

249
    total = 0
250
    changed_files = 0
251

252
    for nb_path in notebooks:
253
        fixes = process_notebook(nb_path, dry_run=dry_run)
254
        if fixes > 0:
255
            rel = nb_path.relative_to(repo)
256
            print(f"  {rel}: {fixes} fixes")
257
            total += fixes
258
            changed_files += 1
259

260
    mode = "DRY RUN" if dry_run else "APPLIED"
261
    print(f"\n{mode}: {total} fixes across {changed_files} files")
262

263

264
if __name__ == '__main__':
265
    main()
266

267
Product

Resources

Company