Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Path: blob/master/src/scripts/multi-diffbundler
Views: 687
#!/usr/bin/env python import json, os, shutil, sys join = os.path.join isdir = os.path.isdir islink = os.path.islink listdir = os.listdir path_exists = os.path.exists splitext = os.path.splitext getsize = os.path.getsize from subprocess import Popen, PIPE multi_diffbundler = os.path.abspath(os.path.realpath(__file__)) diffbundler = join(os.path.split(multi_diffbundler)[0], 'diffbundler') verbose = False def cmd(args, dry_run=False, wait=True, ignore_errors=False): if isinstance(args, str): shell = True if verbose: print args else: if verbose: print ' '.join(args) shell = False if dry_run: return out = Popen(args, stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=shell) if wait: e = out.wait() stdout = out.stdout.read() stderr = out.stderr.read() if verbose: print stdout, if verbose: print stderr, if e and not ignore_errors: sys.exit(e) return {'stdout':stdout, 'stderr':stderr} else: return out def test(): target = '/tmp/test' # stupid cmd(["rm","-rf", target]) cmd(["mkdir", "-p", target]) os.chdir(target) cmd("git init; echo 'foo'>123; git add 123; git commit -a -m 'init'") cmd("mkdir x; cd x; git init; touch abc; git add abc; git commit -a -m 'init'; git branch test") cmd("mkdir x/y; cd x/y; git init; touch abc; git add abc; git commit -a -m 'init'") cmd("git add .; git commit -a -m 'add subdirs'") cmd("mkdir -p .git/salvus/bundles/") cmd([multi_diffbundler, 'create', target, join(target, '.git/salvus/bundles')]) cmd("touch abc; git add abc; git commit -a -m 'abc'") cmd([multi_diffbundler, 'create', target, join(target, '.git/salvus/bundles')]) # Now make a big random file in x/y/ and add to repo cmd("cd x/y; head -c 15000000 /dev/urandom > big_file; git add big_file; git commit -a -m 'big file added'") cmd([multi_diffbundler, 'create', target, join(target, '.git/salvus/bundles')]) # And make another, which must force creation of a new bundle 1.diffbundle. cmd("cd x/y; head -c 15000000 /dev/urandom > big_file; git add big_file; git commit -a -m 'big file added'") cmd([multi_diffbundler, 'create', target, join(target, '.git/salvus/bundles')]) # Now immediately do another save; now the 1.diffbundle should not be considered new. cmd([multi_diffbundler, 'create', target, join(target, '.git/salvus/bundles')]) def cleanup_path(path): if not os.path.exists(path): os.makedirs(path) elif not os.path.isdir(path): raise ValueError("'%s' is not a directory"%path) return os.path.realpath(path) def newest_bundle_filename(path): if not os.path.exists(path): return None v = [int(splitext(filename)[0]) for filename in listdir(path) if not isdir(join(path, filename))] v.sort() if len(v) > 0: return join(path, "%s.diffbundle"%v[-1]) else: return None def create_multidiffbundle(working_dir, bundler_dir, thresh, new_bundles): if path_exists(join(working_dir, '.git')): last_bundle = None last_bundlename = newest_bundle_filename(bundler_dir) # "git gc" before bundling is *very* important, since # otherwise the bundles mushroom for no good reason. os.chdir(working_dir) cmd(['git', 'commit', '-a', '-m', 'Salvus save'], ignore_errors=True) cmd(['git', 'gc']) # Try making a new bundle first cmd([diffbundler, 'create', working_dir, bundler_dir], dry_run=False) new_bundlename = newest_bundle_filename(bundler_dir) if new_bundlename != last_bundlename: # There were changes to the repo that we had not already bundled up. # First, check if we should merge the last two bundles. if thresh > 0 and last_bundlename is not None and getsize(last_bundlename)/1000000.0 < thresh: os.unlink(last_bundlename) os.unlink(new_bundlename) cmd([diffbundler, 'create', working_dir, bundler_dir], dry_run=False) new_bundles.append(last_bundlename) else: new_bundles.append(new_bundlename) for path in listdir(working_dir): p = join(working_dir, path) if isdir(p) and not islink(p): create_multidiffbundle(p, join(bundler_dir, path), thresh, new_bundles) def extract_multidiffbundle(bundle_dir, working_dir): print bundle_dir, working_dir if os.path.exists(working_dir): cmd([diffbundler, 'extract', bundle_dir, working_dir], dry_run=False) for path in listdir(bundle_dir): if isdir(join(bundle_dir, path)): extract_multidiffbundle(join(bundle_dir, path), join(working_dir, path)) ######################################################################################################### if __name__ == '__main__': import argparse parser = argparse.ArgumentParser(description= 'Utility script for managing git repositories that may contain other git repositories, stored in diffbundles') parser.add_argument('--thresh', help='threshhold in megabytes for creating new bundles', default=5, type=int, dest='thresh') parser.add_argument('--verbose', help='be more verbose (default: False)', action="store_true") subparsers = parser.add_subparsers() test_parser = subparsers.add_parser('test', help='run a test (involves creating a directory /tmp/test/)') create_parser = subparsers.add_parser('create', help='update new multi-diffbundle path') create_parser.add_argument('repodir', type=str, help='repository to bundle') create_parser.add_argument('bundledir', type=str, help='directory to place new diffbundle') extract_parser = subparsers.add_parser('extract', help='extract a repository with nested git repos from a multi-diffbundle path') extract_parser.add_argument('bundledir', type=str, help='directory with diffbundles') extract_parser.add_argument('extractdir', type=str, help='output directory') args = parser.parse_args() if args.verbose: verbose = True if 'repodir' in args: bundledir = cleanup_path(args.bundledir) new_bundles = [] create_multidiffbundle(cleanup_path(args.repodir), bundledir, args.thresh, new_bundles) n = len(bundledir) new_bundles = [f[n+1:] for f in new_bundles] print json.dumps(new_bundles, separators=(',',':')) elif 'extractdir' in args: bundledir = cleanup_path(args.bundledir) extract_multidiffbundle(bundledir, cleanup_path(args.extractdir)) elif 'test' in sys.argv: test() else: raise RuntimeError('command line arguments not properly processed')