Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Avatar for KuCalc : devops.
Download
50629 views
#!/usr/bin/env python
'''
Copyright (C) 2013 R. Andrew Ohana

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

Usage:

# creates a new bundle that includes the commits missing from
# bundle-dir (NOTE: bundle-dir must not exist!):
$ diffbundler create repo-dir bundle-dir

# This extracts the bundles into a repository in out-dir:
$ diffbundler extract bundle-dir extract-dir

'''

import os, shutil, subprocess, tarfile, tempfile

REFSHEADS = os.path.join('refs', 'heads')
REFSTAGS = os.path.join('refs', 'tags')

def cleanup_path(path):
    if not os.path.exists(path):
        os.makedirs(path)
    elif not os.path.isdir(path):
        raise ValueError("'%s' is not a directory"%path)
    return os.path.realpath(path)

def flattened_iter(*args):
    for arg in args:
        if hasattr(arg, '__iter__'):
            for a in flattened_iter(*arg):
                yield a
        else:
            yield arg

_curdir = None

def git_popen_unclean(directory, *args):
    global _curdir
    if _curdir != directory:
        os.chdir(directory)
        _curdir = directory
    ret = subprocess.Popen(flattened_iter('git', *args),
            stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    return ret

def git_popen(directory, *args):
    args = tuple(args)
    ret = git_popen_unclean(directory, *args)
    if ret.wait():
        _, stderr = ret.communicate()
        print(stderr)
        raise RuntimeError("an error occurred running '%s'"%' '.join(
                flattened_iter('git', *args)))
    return ret

def git_retcode(*args):
    p = git_popen_unclean(*args)
    return p.wait()

def git_stdout(*args):
    p = git_popen(*args)
    stdout, _ = p.communicate()
    ret = tuple(bt.decode('utf-8') for bt in stdout.splitlines())
    if len(ret) == 1:
        return ret[0]
    else:
        return ret

def get_shas(refs_dir):
    ret = set()
    for f in ('HEAD', 'heads', 'tags'):
        try:
            f = open(f, 'r')
        except IOError:
            continue
        for l in f:
            ret.add(l.split(' ')[0])
        f.close()
    return list(sorted(ret))

class SelfCleaningTempDir(str):

    def __new__(self):
        path = tempfile.mkdtemp()
        return str.__new__(self, path)

    def __enter__(self):
        return self

    def __exit__(self, *args):
        shutil.rmtree(self)

class DiffBundle(object):

    def __init__(self, path):
        self._tarf = tarfile.open(path, 'r')

    def extract(self, extract_dir):
        try:
            tari = self._tarf.getmember('bundle')
        except KeyError:
            return
        with SelfCleaningTempDir() as tmpd:
            self._tarf.extract(tari, tmpd)
            git_popen(extract_dir, 'bundle', 'unbundle',
                    os.path.join(tmpd, tari.name))

    def extract_refs(self, extract_dir):
        try:
            tari = self._tarf.getmember('refs')
        except KeyError:
            return
        with SelfCleaningTempDir() as tmpd:
            self._tarf.extract(tari, tmpd)
            git_popen(extract_dir, 'checkout', '-f',
                    git_stdout(extract_dir, 'bundle', 'unbundle',
                        os.path.join(tmpd, tari.name)).split(' ')[0])

class DiffBundler(object):

    def __init__(self, bundle_dir):
        self.bundle_dir = bundle_dir

    def __len__(self):
        try:
            return self._len
        except AttributeError:
            pass
        self._len = 0
        for f in sorted(os.listdir(self.bundle_dir)):
            try:
                if f.endswith('.diffbundle'):
                    self._len = max(self._len, int(f[:-11])+1)
            except ValueError:
                pass
        return self._len

    def __getitem__(self, i):
        if not isinstance(i, int):
            raise TypeError('list indices must be integers')
        if i < 0:
            i += len(self)
        if i < 0 or i >= len(self):
            raise IndexError('list index out of range')
        try:
            return self._bundles[i]
        except AttributeError:
            self._bundles = {}
        except KeyError:
            pass
        ret = self._bundles[i] = DiffBundle(
                os.path.join(self.bundle_dir, str(i)+'.diffbundle'))
        return ret

    def __iter__(self):
        for i in range(len(self)):
            yield self[i]

    def __nonzero__(self):
        return len(self)

    def extract(self, extract_dir):
        git_popen(extract_dir, 'init')
        for bundle in self:
            bundle.extract(extract_dir)
        with SelfCleaningTempDir() as tmpd:
            self._extract_refs(tmpd)
            for fn, cmd in (('tags', 'tag'), ('heads', 'branch')):
                f = open(os.path.join(tmpd, fn), 'r')
                for ref in f:
                    ref = ref.strip('\n')
                    sha, name = ref.split(' ')
                    git_popen(extract_dir, cmd, name, sha)
                f.close()
            # checkout the appropriate HEAD
            f = open(os.path.join(tmpd, 'HEAD'), 'r')
            sha, name = f.read().split(' ')
            f.close()
            if name == 'HEAD':
                git_popen(extract_dir, 'checkout', '-f', sha)
            else:
                git_popen(extract_dir, 'checkout', '-f', name)

    def _extract_refs(self, extract_dir):
        git_popen(extract_dir, 'init')
        for bundle in self:
            bundle.extract_refs(extract_dir)

    def create_diffbundle(self, repo_dir):
        with SelfCleaningTempDir() as tmpd:
            self._extract_refs(tmpd)

            old_shas = get_shas(tmpd)

            HEADf = open(os.path.join(tmpd, 'HEAD'), 'w')
            HEADf.write(git_stdout(repo_dir, 'rev-parse', 'HEAD'))
            HEADf.write(' ')
            HEADf.write(git_stdout(repo_dir, 'rev-parse', '--abbrev-ref', 'HEAD'))
            HEADf.close()

            args = []

            headsf = open(os.path.join(tmpd, 'heads'), 'w')
            tagsf = open(os.path.join(tmpd, 'tags'), 'w')

            for ref in sorted(git_stdout(repo_dir, 'show-ref', '--head')):
                sha, name = ref.split(' ')

                if name.startswith(REFSHEADS):
                    name = name[len(REFSHEADS)+1:]
                    headsf.write(sha+' '+name+'\n')
                elif name.startswith(REFSTAGS):
                    name = name[len(REFSTAGS)+1:]
                    tagsf.write(sha+' '+name+'\n')

                m = -1
                for old_sha in old_shas:
                    tmb = git_stdout(repo_dir, 'merge-base', old_sha, sha)
                    tm = len(git_stdout(repo_dir,
                            'rev-list', tmb+'..'+sha))
                    if m < 0 or tm < m:
                        merge_base = tmb
                        m = tm
                        if not m:
                            break
                if m > 0:
                    args.append(merge_base+'..'+name)

            headsf.close()
            tagsf.close()

            if not self:
                args = ['--all']

            # disable making a new (useless) diffbundle if there is a stash.
            args = [x for x in args if not x.endswith('..refs/stash')]
            bundle = bool(args)

            if self:
                refs = bool(git_retcode(tmpd, 'diff', '--quiet'))
            else:
                refs = True

            if not (bundle or refs):
                print 'WARNING: there were no changes in the repository'
                return

            if bundle:
                git_popen(repo_dir, 'bundle', 'create',
                        os.path.join(tmpd, 'bundle'), args)

            if refs:
                git_popen(tmpd, 'add', 'HEAD', 'heads', 'tags')
                git_popen(tmpd, 'commit', "--message='%s.diffbundle"%len(self))
                if not self:
                    git_popen(tmpd, 'checkout', '-f',
                            git_stdout(tmpd, 'show-ref', '--hash'))
                    git_popen(tmpd, 'branch', '--delete', 'master')
                arg = 'HEAD^..HEAD' if self else '--all'
                git_popen(tmpd, 'bundle', 'create', 'refs', arg)
            new_bundle_path = os.path.join(
                    self.bundle_dir, str(len(self))+'.diffbundle')
            try:
                tarf = tarfile.open(new_bundle_path, 'w')
                os.chdir(tmpd)
                if bundle:
                    tarf.add('bundle')
                if refs:
                    tarf.add('refs')
                tarf.close()
            except:
                try:
                    os.remove(new_bundle_path)
                except:
                    pass
                raise
            self._len = len(self) + 1

if __name__ == '__main__':
    import argparse

    parser = argparse.ArgumentParser(description=
            'Utility script for managing git repositories stored in diffbundles')

    subparsers = parser.add_subparsers()

    create_parser = subparsers.add_parser('create',
            help='create a new diffbundle')
    create_parser.add_argument('repodir', type=str,
            help='repository to bundle')
    create_parser.add_argument('bundledir', type=str,
            help='directory to place new diffbundle')

    extract_parser = subparsers.add_parser('extract',
            help='extract a repository from diffbundles')
    extract_parser.add_argument('bundledir', type=str,
            help='directory with diffbundles')
    extract_parser.add_argument('extractdir', type=str,
            help='output directory')

    args = parser.parse_args()

    diffbundler = DiffBundler(cleanup_path(args.bundledir))
    if 'repodir' in args:
        diffbundler.create_diffbundle(cleanup_path(args.repodir))
    elif 'extractdir' in args:
        diffbundler.extract(cleanup_path(args.extractdir))
    else:
        raise RuntimeError(
                'command line arguments were not properly processed')