Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Ok-landscape
GitHub Repository: Ok-landscape/computational-pipeline
Path: blob/main/reorganize_files.py
51 views
unlisted
1
#!/usr/bin/env python3
2
"""
3
File Reorganization Script
4
Reorganizes notebooks, plots, and posts into subdirectories.
5
"""
6
7
import os
8
import shutil
9
from pathlib import Path
10
import subprocess
11
12
# Directories
13
NOTEBOOKS_DIR = Path("/home/user/computational-pipeline/notebooks/published")
14
POSTS_DIR = Path("/home/user/computational-pipeline/social-media-automation/repo-data/output/social_posts")
15
16
def get_notebook_list():
17
"""Get list of all notebooks"""
18
return sorted(NOTEBOOKS_DIR.glob("*.ipynb"))
19
20
def get_plot_for_notebook(notebook_name):
21
"""Find plot files for a notebook"""
22
# Most common patterns
23
patterns = [
24
f"{notebook_name}_analysis.png",
25
f"{notebook_name}_comprehensive_analysis.png",
26
f"{notebook_name}.png",
27
# Handle special cases
28
]
29
30
plots = []
31
for pattern in patterns:
32
plot_path = NOTEBOOKS_DIR / pattern
33
if plot_path.exists():
34
plots.append(plot_path)
35
36
# Also check for any PNG with notebook name prefix
37
for png in NOTEBOOKS_DIR.glob(f"{notebook_name}*.png"):
38
if png not in plots:
39
plots.append(png)
40
41
return plots
42
43
def get_post_for_notebook(notebook_name):
44
"""Find post file for a notebook"""
45
post_file = POSTS_DIR / f"{notebook_name}_posts.txt"
46
return post_file if post_file.exists() else None
47
48
def create_subdirectory(notebook_path):
49
"""Create subdirectory for notebook"""
50
notebook_name = notebook_path.stem
51
subdir = NOTEBOOKS_DIR / notebook_name
52
subdir.mkdir(exist_ok=True)
53
return subdir
54
55
def move_file_with_git(source, dest):
56
"""Move file using git mv to preserve history"""
57
try:
58
subprocess.run(["git", "mv", str(source), str(dest)], check=True,
59
cwd=source.parent, capture_output=True)
60
return True
61
except subprocess.CalledProcessError as e:
62
print(f" Git mv failed for {source.name}, trying regular move: {e}")
63
try:
64
shutil.move(str(source), str(dest))
65
return True
66
except Exception as e2:
67
print(f" ERROR moving {source.name}: {e2}")
68
return False
69
70
def reorganize_notebooks():
71
"""Main reorganization function"""
72
notebooks = get_notebook_list()
73
74
stats = {
75
'notebooks_processed': 0,
76
'plots_moved': 0,
77
'posts_moved': 0,
78
'errors': []
79
}
80
81
print(f"Found {len(notebooks)} notebooks to reorganize\n")
82
83
for i, notebook_path in enumerate(notebooks, 1):
84
notebook_name = notebook_path.stem
85
print(f"[{i}/{len(notebooks)}] Processing: {notebook_name}")
86
87
try:
88
# Create subdirectory
89
subdir = create_subdirectory(notebook_path)
90
print(f" Created/verified: {subdir.name}/")
91
92
# Move notebook
93
dest_notebook = subdir / notebook_path.name
94
if not dest_notebook.exists():
95
if move_file_with_git(notebook_path, dest_notebook):
96
print(f" ✓ Moved notebook")
97
stats['notebooks_processed'] += 1
98
else:
99
print(f" ✓ Notebook already in place")
100
stats['notebooks_processed'] += 1
101
102
# Move plots
103
plots = get_plot_for_notebook(notebook_name)
104
for plot in plots:
105
dest_plot = subdir / plot.name
106
if not dest_plot.exists():
107
if move_file_with_git(plot, dest_plot):
108
print(f" ✓ Moved plot: {plot.name}")
109
stats['plots_moved'] += 1
110
else:
111
print(f" ✓ Plot already in place: {plot.name}")
112
stats['plots_moved'] += 1
113
114
# Move post
115
post = get_post_for_notebook(notebook_name)
116
if post:
117
dest_post = subdir / post.name
118
if not dest_post.exists():
119
if move_file_with_git(post, dest_post):
120
print(f" ✓ Moved post")
121
stats['posts_moved'] += 1
122
else:
123
print(f" ✓ Post already in place")
124
stats['posts_moved'] += 1
125
else:
126
print(f" ⚠ No post file found (expected: {notebook_name}_posts.txt)")
127
128
except Exception as e:
129
error_msg = f"Error processing {notebook_name}: {e}"
130
print(f" ✗ {error_msg}")
131
stats['errors'].append(error_msg)
132
133
return stats
134
135
def organize_template_posts():
136
"""Move template posts to templates/ subdirectory"""
137
templates_dir = POSTS_DIR / "templates"
138
templates_dir.mkdir(exist_ok=True)
139
140
print("\n" + "="*60)
141
print("Organizing template posts")
142
print("="*60)
143
144
# Get all remaining posts (should be template posts)
145
remaining_posts = list(POSTS_DIR.glob("*_posts.txt"))
146
remaining_posts = [p for p in remaining_posts if not p.name.endswith('.backup')]
147
148
stats = {'moved': 0, 'errors': []}
149
150
for post in remaining_posts:
151
dest = templates_dir / post.name
152
if not dest.exists():
153
try:
154
if move_file_with_git(post, dest):
155
print(f" ✓ Moved: {post.name}")
156
stats['moved'] += 1
157
except Exception as e:
158
error_msg = f"Error moving {post.name}: {e}"
159
print(f" ✗ {error_msg}")
160
stats['errors'].append(error_msg)
161
162
return stats
163
164
def print_summary(notebook_stats, template_stats):
165
"""Print reorganization summary"""
166
print("\n" + "="*60)
167
print("REORGANIZATION SUMMARY")
168
print("="*60)
169
print(f"\nNotebooks:")
170
print(f" Processed: {notebook_stats['notebooks_processed']}")
171
print(f" Plots moved: {notebook_stats['plots_moved']}")
172
print(f" Posts moved: {notebook_stats['posts_moved']}")
173
174
print(f"\nTemplate Posts:")
175
print(f" Moved to templates/: {template_stats['moved']}")
176
177
total_errors = len(notebook_stats['errors']) + len(template_stats['errors'])
178
print(f"\nErrors: {total_errors}")
179
180
if notebook_stats['errors']:
181
print("\nNotebook errors:")
182
for error in notebook_stats['errors']:
183
print(f" - {error}")
184
185
if template_stats['errors']:
186
print("\nTemplate errors:")
187
for error in template_stats['errors']:
188
print(f" - {error}")
189
190
print("="*60)
191
192
if __name__ == "__main__":
193
print("COMPUTATIONAL PIPELINE FILE REORGANIZATION")
194
print("="*60)
195
print("\nThis script will:")
196
print("1. Create subdirectories for each notebook")
197
print("2. Move notebooks, plots, and posts into subdirectories")
198
print("3. Move template posts to templates/ subdirectory")
199
print("4. Preserve git history using 'git mv' where possible")
200
print("\n" + "="*60 + "\n")
201
202
# Reorganize notebooks
203
notebook_stats = reorganize_notebooks()
204
205
# Organize template posts
206
template_stats = organize_template_posts()
207
208
# Print summary
209
print_summary(notebook_stats, template_stats)
210
211