Path: blob/main/reorganize_files.py
51 views
unlisted
#!/usr/bin/env python31"""2File Reorganization Script3Reorganizes notebooks, plots, and posts into subdirectories.4"""56import os7import shutil8from pathlib import Path9import subprocess1011# Directories12NOTEBOOKS_DIR = Path("/home/user/computational-pipeline/notebooks/published")13POSTS_DIR = Path("/home/user/computational-pipeline/social-media-automation/repo-data/output/social_posts")1415def get_notebook_list():16"""Get list of all notebooks"""17return sorted(NOTEBOOKS_DIR.glob("*.ipynb"))1819def get_plot_for_notebook(notebook_name):20"""Find plot files for a notebook"""21# Most common patterns22patterns = [23f"{notebook_name}_analysis.png",24f"{notebook_name}_comprehensive_analysis.png",25f"{notebook_name}.png",26# Handle special cases27]2829plots = []30for pattern in patterns:31plot_path = NOTEBOOKS_DIR / pattern32if plot_path.exists():33plots.append(plot_path)3435# Also check for any PNG with notebook name prefix36for png in NOTEBOOKS_DIR.glob(f"{notebook_name}*.png"):37if png not in plots:38plots.append(png)3940return plots4142def get_post_for_notebook(notebook_name):43"""Find post file for a notebook"""44post_file = POSTS_DIR / f"{notebook_name}_posts.txt"45return post_file if post_file.exists() else None4647def create_subdirectory(notebook_path):48"""Create subdirectory for notebook"""49notebook_name = notebook_path.stem50subdir = NOTEBOOKS_DIR / notebook_name51subdir.mkdir(exist_ok=True)52return subdir5354def move_file_with_git(source, dest):55"""Move file using git mv to preserve history"""56try:57subprocess.run(["git", "mv", str(source), str(dest)], check=True,58cwd=source.parent, capture_output=True)59return True60except subprocess.CalledProcessError as e:61print(f" Git mv failed for {source.name}, trying regular move: {e}")62try:63shutil.move(str(source), str(dest))64return True65except Exception as e2:66print(f" ERROR moving {source.name}: {e2}")67return False6869def reorganize_notebooks():70"""Main reorganization function"""71notebooks = get_notebook_list()7273stats = {74'notebooks_processed': 0,75'plots_moved': 0,76'posts_moved': 0,77'errors': []78}7980print(f"Found {len(notebooks)} notebooks to reorganize\n")8182for i, notebook_path in enumerate(notebooks, 1):83notebook_name = notebook_path.stem84print(f"[{i}/{len(notebooks)}] Processing: {notebook_name}")8586try:87# Create subdirectory88subdir = create_subdirectory(notebook_path)89print(f" Created/verified: {subdir.name}/")9091# Move notebook92dest_notebook = subdir / notebook_path.name93if not dest_notebook.exists():94if move_file_with_git(notebook_path, dest_notebook):95print(f" ✓ Moved notebook")96stats['notebooks_processed'] += 197else:98print(f" ✓ Notebook already in place")99stats['notebooks_processed'] += 1100101# Move plots102plots = get_plot_for_notebook(notebook_name)103for plot in plots:104dest_plot = subdir / plot.name105if not dest_plot.exists():106if move_file_with_git(plot, dest_plot):107print(f" ✓ Moved plot: {plot.name}")108stats['plots_moved'] += 1109else:110print(f" ✓ Plot already in place: {plot.name}")111stats['plots_moved'] += 1112113# Move post114post = get_post_for_notebook(notebook_name)115if post:116dest_post = subdir / post.name117if not dest_post.exists():118if move_file_with_git(post, dest_post):119print(f" ✓ Moved post")120stats['posts_moved'] += 1121else:122print(f" ✓ Post already in place")123stats['posts_moved'] += 1124else:125print(f" ⚠ No post file found (expected: {notebook_name}_posts.txt)")126127except Exception as e:128error_msg = f"Error processing {notebook_name}: {e}"129print(f" ✗ {error_msg}")130stats['errors'].append(error_msg)131132return stats133134def organize_template_posts():135"""Move template posts to templates/ subdirectory"""136templates_dir = POSTS_DIR / "templates"137templates_dir.mkdir(exist_ok=True)138139print("\n" + "="*60)140print("Organizing template posts")141print("="*60)142143# Get all remaining posts (should be template posts)144remaining_posts = list(POSTS_DIR.glob("*_posts.txt"))145remaining_posts = [p for p in remaining_posts if not p.name.endswith('.backup')]146147stats = {'moved': 0, 'errors': []}148149for post in remaining_posts:150dest = templates_dir / post.name151if not dest.exists():152try:153if move_file_with_git(post, dest):154print(f" ✓ Moved: {post.name}")155stats['moved'] += 1156except Exception as e:157error_msg = f"Error moving {post.name}: {e}"158print(f" ✗ {error_msg}")159stats['errors'].append(error_msg)160161return stats162163def print_summary(notebook_stats, template_stats):164"""Print reorganization summary"""165print("\n" + "="*60)166print("REORGANIZATION SUMMARY")167print("="*60)168print(f"\nNotebooks:")169print(f" Processed: {notebook_stats['notebooks_processed']}")170print(f" Plots moved: {notebook_stats['plots_moved']}")171print(f" Posts moved: {notebook_stats['posts_moved']}")172173print(f"\nTemplate Posts:")174print(f" Moved to templates/: {template_stats['moved']}")175176total_errors = len(notebook_stats['errors']) + len(template_stats['errors'])177print(f"\nErrors: {total_errors}")178179if notebook_stats['errors']:180print("\nNotebook errors:")181for error in notebook_stats['errors']:182print(f" - {error}")183184if template_stats['errors']:185print("\nTemplate errors:")186for error in template_stats['errors']:187print(f" - {error}")188189print("="*60)190191if __name__ == "__main__":192print("COMPUTATIONAL PIPELINE FILE REORGANIZATION")193print("="*60)194print("\nThis script will:")195print("1. Create subdirectories for each notebook")196print("2. Move notebooks, plots, and posts into subdirectories")197print("3. Move template posts to templates/ subdirectory")198print("4. Preserve git history using 'git mv' where possible")199print("\n" + "="*60 + "\n")200201# Reorganize notebooks202notebook_stats = reorganize_notebooks()203204# Organize template posts205template_stats = organize_template_posts()206207# Print summary208print_summary(notebook_stats, template_stats)209210211