#!/usr/bin/env python ''' Copyright (C) 2013 R. Andrew Ohana Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Usage: # creates a new bundle that includes the commits missing from # bundle-dir (NOTE: bundle-dir must not exist!): $ diffbundler create repo-dir bundle-dir # This extracts the bundles into a repository in out-dir: $ diffbundler extract bundle-dir extract-dir ''' import os, shutil, subprocess, tarfile, tempfile REFSHEADS = os.path.join('refs', 'heads') REFSTAGS = os.path.join('refs', 'tags') def cleanup_path(path): if not os.path.exists(path): os.makedirs(path) elif not os.path.isdir(path): raise ValueError("'%s' is not a directory"%path) return os.path.realpath(path) def flattened_iter(*args): for arg in args: if hasattr(arg, '__iter__'): for a in flattened_iter(*arg): yield a else: yield arg _curdir = None def git_popen_unclean(directory, *args): global _curdir if _curdir != directory: os.chdir(directory) _curdir = directory ret = subprocess.Popen(flattened_iter('git', *args), stdout=subprocess.PIPE, stderr=subprocess.PIPE) return ret def git_popen(directory, *args): args = tuple(args) ret = git_popen_unclean(directory, *args) if ret.wait(): _, stderr = ret.communicate() print(stderr) raise RuntimeError("an error occurred running '%s'"%' '.join( flattened_iter('git', *args))) return ret def git_retcode(*args): p = git_popen_unclean(*args) return p.wait() def git_stdout(*args): p = git_popen(*args) stdout, _ = p.communicate() ret = tuple(bt.decode('utf-8') for bt in stdout.splitlines()) if len(ret) == 1: return ret[0] else: return ret def get_shas(refs_dir): ret = set() for f in ('HEAD', 'heads', 'tags'): try: f = open(f, 'r') except IOError: continue for l in f: ret.add(l.split(' ')[0]) f.close() return list(sorted(ret)) class SelfCleaningTempDir(str): def __new__(self): path = tempfile.mkdtemp() return str.__new__(self, path) def __enter__(self): return self def __exit__(self, *args): shutil.rmtree(self) class DiffBundle(object): def __init__(self, path): self._tarf = tarfile.open(path, 'r') def extract(self, extract_dir): try: tari = self._tarf.getmember('bundle') except KeyError: return with SelfCleaningTempDir() as tmpd: self._tarf.extract(tari, tmpd) git_popen(extract_dir, 'bundle', 'unbundle', os.path.join(tmpd, tari.name)) def extract_refs(self, extract_dir): try: tari = self._tarf.getmember('refs') except KeyError: return with SelfCleaningTempDir() as tmpd: self._tarf.extract(tari, tmpd) git_popen(extract_dir, 'checkout', '-f', git_stdout(extract_dir, 'bundle', 'unbundle', os.path.join(tmpd, tari.name)).split(' ')[0]) class DiffBundler(object): def __init__(self, bundle_dir): self.bundle_dir = bundle_dir def __len__(self): try: return self._len except AttributeError: pass self._len = 0 for f in sorted(os.listdir(self.bundle_dir)): try: if f.endswith('.diffbundle'): self._len = max(self._len, int(f[:-11])+1) except ValueError: pass return self._len def __getitem__(self, i): if not isinstance(i, int): raise TypeError('list indices must be integers') if i < 0: i += len(self) if i < 0 or i >= len(self): raise IndexError('list index out of range') try: return self._bundles[i] except AttributeError: self._bundles = {} except KeyError: pass ret = self._bundles[i] = DiffBundle( os.path.join(self.bundle_dir, str(i)+'.diffbundle')) return ret def __iter__(self): for i in range(len(self)): yield self[i] def __nonzero__(self): return len(self) def extract(self, extract_dir): git_popen(extract_dir, 'init') for bundle in self: bundle.extract(extract_dir) with SelfCleaningTempDir() as tmpd: self._extract_refs(tmpd) for fn, cmd in (('tags', 'tag'), ('heads', 'branch')): f = open(os.path.join(tmpd, fn), 'r') for ref in f: ref = ref.strip('\n') sha, name = ref.split(' ') git_popen(extract_dir, cmd, name, sha) f.close() # checkout the appropriate HEAD f = open(os.path.join(tmpd, 'HEAD'), 'r') sha, name = f.read().split(' ') f.close() if name == 'HEAD': git_popen(extract_dir, 'checkout', '-f', sha) else: git_popen(extract_dir, 'checkout', '-f', name) def _extract_refs(self, extract_dir): git_popen(extract_dir, 'init') for bundle in self: bundle.extract_refs(extract_dir) def create_diffbundle(self, repo_dir): with SelfCleaningTempDir() as tmpd: self._extract_refs(tmpd) old_shas = get_shas(tmpd) HEADf = open(os.path.join(tmpd, 'HEAD'), 'w') HEADf.write(git_stdout(repo_dir, 'rev-parse', 'HEAD')) HEADf.write(' ') HEADf.write(git_stdout(repo_dir, 'rev-parse', '--abbrev-ref', 'HEAD')) HEADf.close() args = [] headsf = open(os.path.join(tmpd, 'heads'), 'w') tagsf = open(os.path.join(tmpd, 'tags'), 'w') for ref in sorted(git_stdout(repo_dir, 'show-ref', '--head')): sha, name = ref.split(' ') if name.startswith(REFSHEADS): name = name[len(REFSHEADS)+1:] headsf.write(sha+' '+name+'\n') elif name.startswith(REFSTAGS): name = name[len(REFSTAGS)+1:] tagsf.write(sha+' '+name+'\n') m = -1 for old_sha in old_shas: tmb = git_stdout(repo_dir, 'merge-base', old_sha, sha) tm = len(git_stdout(repo_dir, 'rev-list', tmb+'..'+sha)) if m < 0 or tm < m: merge_base = tmb m = tm if not m: break if m > 0: args.append(merge_base+'..'+name) headsf.close() tagsf.close() if not self: args = ['--all'] # disable making a new (useless) diffbundle if there is a stash. args = [x for x in args if not x.endswith('..refs/stash')] bundle = bool(args) if self: refs = bool(git_retcode(tmpd, 'diff', '--quiet')) else: refs = True if not (bundle or refs): print 'WARNING: there were no changes in the repository' return if bundle: git_popen(repo_dir, 'bundle', 'create', os.path.join(tmpd, 'bundle'), args) if refs: git_popen(tmpd, 'add', 'HEAD', 'heads', 'tags') git_popen(tmpd, 'commit', "--message='%s.diffbundle"%len(self)) if not self: git_popen(tmpd, 'checkout', '-f', git_stdout(tmpd, 'show-ref', '--hash')) git_popen(tmpd, 'branch', '--delete', 'master') arg = 'HEAD^..HEAD' if self else '--all' git_popen(tmpd, 'bundle', 'create', 'refs', arg) new_bundle_path = os.path.join( self.bundle_dir, str(len(self))+'.diffbundle') try: tarf = tarfile.open(new_bundle_path, 'w') os.chdir(tmpd) if bundle: tarf.add('bundle') if refs: tarf.add('refs') tarf.close() except: try: os.remove(new_bundle_path) except: pass raise self._len = len(self) + 1 if __name__ == '__main__': import argparse parser = argparse.ArgumentParser(description= 'Utility script for managing git repositories stored in diffbundles') subparsers = parser.add_subparsers() create_parser = subparsers.add_parser('create', help='create a new diffbundle') create_parser.add_argument('repodir', type=str, help='repository to bundle') create_parser.add_argument('bundledir', type=str, help='directory to place new diffbundle') extract_parser = subparsers.add_parser('extract', help='extract a repository from diffbundles') extract_parser.add_argument('bundledir', type=str, help='directory with diffbundles') extract_parser.add_argument('extractdir', type=str, help='output directory') args = parser.parse_args() diffbundler = DiffBundler(cleanup_path(args.bundledir)) if 'repodir' in args: diffbundler.create_diffbundle(cleanup_path(args.repodir)) elif 'extractdir' in args: diffbundler.extract(cleanup_path(args.extractdir)) else: raise RuntimeError( 'command line arguments were not properly processed')