Path: blob/main/scripts/remove_dashes.py
483 views
unlisted
#!/usr/bin/env python31"""Replace ' -- ' (double-hyphen dashes) in notebook markdown cells and code comments.23Rules:4- In markdown cells: replace ' -- ' with '. ' or ', '5- In code cells: replace ' -- ' only in comments (lines starting with #) and strings6- Skip table separator lines7- Skip code fences8"""910import json11import re12from pathlib import Path131415def fix_dashes_in_text(text: str) -> str:16"""Replace ' -- ' dashes in a line of text."""17if re.match(r'^[\s|:-]+$', text):18return text19if text.strip().startswith('```'):20return text2122def replacer(m):23after = text[m.end():m.end() + 1] if m.end() < len(text) else ''24if after and (after.isupper() or after == '"' or after == "'"):25return '. '26return ', '2728return re.sub(r' -- ', replacer, text)293031def fix_dashes_in_code_line(line: str) -> str:32"""Replace ' -- ' in code comments and print strings only."""33if ' -- ' not in line:34return line3536# Comment lines (# ...)37if line.lstrip().startswith('#'):38return fix_dashes_in_text(line)3940# Inside print/string: replace within quoted portions41# Match f-strings and regular strings containing ' -- '42def fix_string(m):43return fix_dashes_in_text(m.group(0))4445# Fix inside single-quoted strings46line = re.sub(r"'[^']*? -- [^']*?'", fix_string, line)47# Fix inside double-quoted strings48line = re.sub(r'"[^"]*? -- [^"]*?"', fix_string, line)4950return line515253def process_notebook(path: Path) -> int:54with open(path) as f:55nb = json.load(f)5657changes = 058for cell in nb.get('cells', []):59cell_type = cell.get('cell_type', '')60new_source = []61for line in cell['source']:62if cell_type == 'markdown':63fixed = fix_dashes_in_text(line)64elif cell_type == 'code':65fixed = fix_dashes_in_code_line(line)66else:67fixed = line68if fixed != line:69changes += 170new_source.append(fixed)71cell['source'] = new_source7273if changes > 0:74with open(path, 'w') as f:75json.dump(nb, f, indent=1, ensure_ascii=False)76f.write('\n')7778return changes798081def main():82root = Path(__file__).resolve().parent.parent83total = 084files_changed = 08586for nb_path in sorted(root.rglob('*.ipynb')):87if '.ipynb_checkpoints' in str(nb_path):88continue89n = process_notebook(nb_path)90if n > 0:91rel = nb_path.relative_to(root)92print(f" {rel}: {n} fixes")93total += n94files_changed += 19596print(f"\nTotal: {total} fixes across {files_changed} files")979899if __name__ == '__main__':100main()101102103