#!/usr/bin/env python
import json, os, shutil, sys
join = os.path.join
isdir = os.path.isdir
islink = os.path.islink
listdir = os.listdir
path_exists = os.path.exists
splitext = os.path.splitext
getsize = os.path.getsize
from subprocess import Popen, PIPE
multi_diffbundler = os.path.abspath(os.path.realpath(__file__))
diffbundler = join(os.path.split(multi_diffbundler)[0], 'diffbundler')
verbose = False
def cmd(args, dry_run=False, wait=True, ignore_errors=False):
if isinstance(args, str):
shell = True
if verbose: print args
else:
if verbose: print ' '.join(args)
shell = False
if dry_run:
return
out = Popen(args, stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=shell)
if wait:
e = out.wait()
stdout = out.stdout.read()
stderr = out.stderr.read()
if verbose: print stdout,
if verbose: print stderr,
if e and not ignore_errors:
sys.exit(e)
return {'stdout':stdout, 'stderr':stderr}
else:
return out
def test():
target = '/tmp/test' # stupid
cmd(["rm","-rf", target])
cmd(["mkdir", "-p", target])
os.chdir(target)
cmd("git init; echo 'foo'>123; git add 123; git commit -a -m 'init'")
cmd("mkdir x; cd x; git init; touch abc; git add abc; git commit -a -m 'init'; git branch test")
cmd("mkdir x/y; cd x/y; git init; touch abc; git add abc; git commit -a -m 'init'")
cmd("git add .; git commit -a -m 'add subdirs'")
cmd("mkdir -p .git/salvus/bundles/")
cmd([multi_diffbundler, 'create', target, join(target, '.git/salvus/bundles')])
cmd("touch abc; git add abc; git commit -a -m 'abc'")
cmd([multi_diffbundler, 'create', target, join(target, '.git/salvus/bundles')])
# Now make a big random file in x/y/ and add to repo
cmd("cd x/y; head -c 15000000 /dev/urandom > big_file; git add big_file; git commit -a -m 'big file added'")
cmd([multi_diffbundler, 'create', target, join(target, '.git/salvus/bundles')])
# And make another, which must force creation of a new bundle 1.diffbundle.
cmd("cd x/y; head -c 15000000 /dev/urandom > big_file; git add big_file; git commit -a -m 'big file added'")
cmd([multi_diffbundler, 'create', target, join(target, '.git/salvus/bundles')])
# Now immediately do another save; now the 1.diffbundle should not be considered new.
cmd([multi_diffbundler, 'create', target, join(target, '.git/salvus/bundles')])
def cleanup_path(path):
if not os.path.exists(path):
os.makedirs(path)
elif not os.path.isdir(path):
raise ValueError("'%s' is not a directory"%path)
return os.path.realpath(path)
def newest_bundle_filename(path):
if not os.path.exists(path):
return None
v = [int(splitext(filename)[0]) for filename in listdir(path) if not isdir(join(path, filename))]
v.sort()
if len(v) > 0:
return join(path, "%s.diffbundle"%v[-1])
else:
return None
def create_multidiffbundle(working_dir, bundler_dir, thresh, new_bundles):
if path_exists(join(working_dir, '.git')):
last_bundle = None
last_bundlename = newest_bundle_filename(bundler_dir)
# "git gc" before bundling is *very* important, since
# otherwise the bundles mushroom for no good reason.
os.chdir(working_dir)
cmd(['git', 'commit', '-a', '-m', 'Salvus save'], ignore_errors=True)
cmd(['git', 'gc'])
# Try making a new bundle first
cmd([diffbundler, 'create', working_dir, bundler_dir], dry_run=False)
new_bundlename = newest_bundle_filename(bundler_dir)
if new_bundlename != last_bundlename:
# There were changes to the repo that we had not already bundled up.
# First, check if we should merge the last two bundles.
if thresh > 0 and last_bundlename is not None and getsize(last_bundlename)/1000000.0 < thresh:
os.unlink(last_bundlename)
os.unlink(new_bundlename)
cmd([diffbundler, 'create', working_dir, bundler_dir], dry_run=False)
new_bundles.append(last_bundlename)
else:
new_bundles.append(new_bundlename)
for path in listdir(working_dir):
p = join(working_dir, path)
if isdir(p) and not islink(p):
create_multidiffbundle(p, join(bundler_dir, path), thresh, new_bundles)
def extract_multidiffbundle(bundle_dir, working_dir):
print bundle_dir, working_dir
if os.path.exists(working_dir):
cmd([diffbundler, 'extract', bundle_dir, working_dir], dry_run=False)
for path in listdir(bundle_dir):
if isdir(join(bundle_dir, path)):
extract_multidiffbundle(join(bundle_dir, path), join(working_dir, path))
#########################################################################################################
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description=
'Utility script for managing git repositories that may contain other git repositories, stored in diffbundles')
parser.add_argument('--thresh', help='threshhold in megabytes for creating new bundles', default=5, type=int, dest='thresh')
parser.add_argument('--verbose', help='be more verbose (default: False)', action="store_true")
subparsers = parser.add_subparsers()
test_parser = subparsers.add_parser('test', help='run a test (involves creating a directory /tmp/test/)')
create_parser = subparsers.add_parser('create',
help='update new multi-diffbundle path')
create_parser.add_argument('repodir', type=str,
help='repository to bundle')
create_parser.add_argument('bundledir', type=str,
help='directory to place new diffbundle')
extract_parser = subparsers.add_parser('extract',
help='extract a repository with nested git repos from a multi-diffbundle path')
extract_parser.add_argument('bundledir', type=str,
help='directory with diffbundles')
extract_parser.add_argument('extractdir', type=str,
help='output directory')
args = parser.parse_args()
if args.verbose:
verbose = True
if 'repodir' in args:
bundledir = cleanup_path(args.bundledir)
new_bundles = []
create_multidiffbundle(cleanup_path(args.repodir), bundledir, args.thresh, new_bundles)
n = len(bundledir)
new_bundles = [f[n+1:] for f in new_bundles]
print json.dumps(new_bundles, separators=(',',':'))
elif 'extractdir' in args:
bundledir = cleanup_path(args.bundledir)
extract_multidiffbundle(bundledir, cleanup_path(args.extractdir))
elif 'test' in sys.argv:
test()
else:
raise RuntimeError('command line arguments not properly processed')