CoCalc -- storage

GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/scripts/storage_gluster.py
Views: ²⁷⁵
1
#!/usr/bin/env python
2
###############################################################################
3
#
4
#    CoCalc: Collaborative Calculation
5
#
6
#    Copyright (C) 2016, Sagemath Inc.
7
#
8
#    This program is free software: you can redistribute it and/or modify
9
#    it under the terms of the GNU General Public License as published by
10
#    the Free Software Foundation, either version 3 of the License, or
11
#    (at your option) any later version.
12
#
13
#    This program is distributed in the hope that it will be useful,
14
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
15
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
#    GNU General Public License for more details.
17
#
18
#    You should have received a copy of the GNU General Public License
19
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
20
#
21
###############################################################################
22

23
import argparse, cPickle, hashlib, json, logging, os, sys, time, random
24
from uuid import UUID
25

26
log = None
27

28
# This is so we can import salvus/salvus/daemon.py
29
sys.path.append('/home/salvus/salvus/salvus/')
30

31

32
def check_uuid(uuid):
33
    if UUID(uuid).version != 4:
34
        raise RuntimeError("invalid uuid")
35

36

37
def uid(uuid):
38
    # We take the sha-512 of the uuid just to make it harder to force a collision.  Thus even if a
39
    # user could somehow generate an account id of their choosing, this wouldn't help them get the
40
    # same uid as another user.
41
    n = hash(hashlib.sha512(uuid).digest()) % (
42
        4294967294 - 1000
43
    )  # 2^32-2=max uid, as keith determined by a program + experimentation.
44
    return n + 1001
45

46

47
def cmd(s, exit_on_error=True):
48
    log.debug(s)
49
    #s += ' &>/dev/null'
50
    t = time.time()
51
    if os.system(s):
52
        if exit_on_error:
53
            raise RuntimeError("Error running '%s'" % s)
54
    log.debug("time: %s seconds" % (time.time() - t))
55

56

57
def cmd2(s):
58
    log.debug(s)
59
    from subprocess import Popen, PIPE
60
    out = Popen(
61
        s, stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=not isinstance(s, list))
62
    e = out.wait()
63
    x = out.stdout.read() + out.stderr.read()
64
    log.debug(x)
65
    return x, e
66

67

68
def path_to_project(storage, project_id):
69
    return os.path.join(storage, project_id[:2], project_id[2:4], project_id)
70

71

72
def migrate_project_to_storage(src, storage, min_size_mb, max_size_mb,
73
                               new_only):
74
    info_json = os.path.join(src, '.sagemathcloud', 'info.json')
75
    if not os.path.exists(info_json):
76
        log.debug("Skipping since %s does not exist" % info_json)
77
        return
78
    project_id = json.loads(open(info_json).read())['project_id']
79
    projectid = project_id.replace('-', '')
80
    target = path_to_project(storage, project_id)
81
    try:
82
        if os.path.exists(target):
83
            if new_only:
84
                log.debug(
85
                    "skipping %s (%s) since it already exists (and new_only=True)"
86
                    % (src, project_id))
87
                return
88
            mount_project(storage=storage, project_id=project_id, force=False)
89
        else:
90
            # create
91
            os.makedirs(target)
92
            os.chdir(target)
93
            current_size_mb = int(
94
                os.popen("du -s '%s'" % src).read().split()[0]) // 1000 + 1
95
            size = min(max_size_mb, max(min_size_mb, current_size_mb))
96

97
            # Using many small img files might seem like a good idea.  It isn't, since mount takes massively longer, etc.
98
            #img_size_mb = 128
99
            #images = ['%s/%s.img'%(target, i) for i in range(size//img_size_mb + 1)]
100
            #for img in images:
101
            #    cmd("truncate -s %sM %s"%(img_size_mb,img))
102
            #images = ' '.join(images)
103

104
            images = '%s/%s.img' % (target, 0)
105
            cmd("truncate -s %sM %s" % (size, images))
106

107
            cmd("zpool create -m /home/%s project-%s %s" %
108
                (projectid, project_id, images))
109
            cmd("zfs set compression=gzip project-%s" % project_id)
110
            cmd("zfs set dedup=on project-%s" % project_id)
111
            cmd("zfs set snapdir=visible project-%s" % project_id)
112

113
        # rsync data over
114
        double_verbose = False
115
        cmd("time rsync -axH%s --delete --exclude .forever --exclude .bup %s/ /home/%s/"
116
            % ('v' if double_verbose else '', src, projectid),
117
            exit_on_error=False)
118
        id = uid(project_id)
119
        cmd("chown %s:%s -R /home/%s/" % (id, id, projectid))
120
        cmd("df -h /home/%s; zfs get compressratio project-%s; zpool get dedupratio project-%s"
121
            % (projectid, project_id, project_id))
122
    finally:
123
        unmount_project(project_id=project_id)
124

125

126
def mount_project(storage, project_id, force):
127
    check_uuid(project_id)
128
    id = uid(project_id)
129
    target = path_to_project(storage, project_id)
130
    out, e = cmd2("zpool import %s project-%s -d %s" % ('-f' if force else '',
131
                                                        project_id, target))
132
    if e:
133
        if 'a pool with that name is already created' in out:
134
            # no problem
135
            pass
136
        else:
137
            print "could not get pool"
138
            sys.exit(1)
139
    projectid = project_id.replace('-', '')
140
    # the -o makes it so in the incredibly unlikely event of a collision, no big deal.
141
    cmd("groupadd -g %s -o %s" % (id, projectid), exit_on_error=False)
142
    cmd("useradd -u %s -g %s -o -d /home/%s/  %s" % (id, id, projectid,
143
                                                     projectid),
144
        exit_on_error=False)  # error if user already exists is fine.
145

146

147
def unmount_project(project_id):
148
    check_uuid(project_id)
149
    projectid = project_id.replace('-', '')
150
    cmd("pkill -9 -u %s" % projectid, exit_on_error=False)
151
    cmd("deluser --force %s" % projectid, exit_on_error=False)
152
    time.sleep(.5)
153
    out, e = cmd2("zpool export project-%s" % project_id)
154
    if e:
155
        if 'no such pool' not in out:
156
            # not just a problem due to pool not being mounted.
157
            print "Error unmounting pool -- %s" % out
158
            sys.exit(1)
159

160

161
def tinc_address():
162
    return os.popen('ifconfig tun0|grep "inet addr"').read().split()[1].split(
163
        ':')[1].strip()
164

165

166
def info_json(path):
167
    if not os.path.exists('locations.dat'):
168
        sys.stderr.write(
169
            'Please run this from a node with db access to create locations.dat\n\t\techo "select location,project_id from projects limit 30000;" | cqlsh_connect 10.1.3.2 |grep "{" > locations.dat'
170
        )
171
        sys.exit(1)
172
    db = {}
173
    host = tinc_address()
174
    log.info("parsing database...")
175
    for x in open('locations.dat').readlines():
176
        if x.strip():
177
            location, project_id = x.split('|')
178
            location = json.loads(location.strip())
179
            project_id = project_id.strip()
180
            if location['host'] == host:
181
                if location['username'] in db:
182
                    log.warning("WARNING: collision -- %s, %s" % (location,
183
                                                                  project_id))
184
                db[location['username']] = {
185
                    'location': location,
186
                    'project_id': project_id,
187
                    'base_url': ''
188
                }
189
    v = [os.path.abspath(x) for x in path]
190
    for i, path in enumerate(v):
191
        log.info("** %s of %s" % (i + 1, len(v)))
192
        SMC = os.path.join(path, '.sagemathcloud')
193
        if not os.path.exists(SMC):
194
            log.warning(
195
                "Skipping '%s' since no .sagemathcloud directory" % path)
196
            continue
197
        f = os.path.join(path, '.sagemathcloud', 'info.json')
198
        username = os.path.split(path)[-1]
199
        if not os.path.exists(f):
200
            if username not in db:
201
                log.warning("Skipping '%s' since not in database!" % username)
202
            else:
203
                s = json.dumps(db[username], separators=(',', ':'))
204
                log.info("writing '%s': '%s'" % (f, s))
205
                open(f, 'w').write(s)
206
                os.system('chmod a+rw %s' % f)
207

208

209
def modtime(f):
210
    try:
211
        return int(os.stat(f).st_mtime)
212
    except:
213
        return 0  # 1970...
214

215

216
def copy_file_efficiently(src, dest):
217
    """
218
    Copy a possibly sparse file from a brick to a mounted glusterfs volume, if the dest is older.
219

220
    This for now -- later we might use a different method when the file is above a certain
221
    size threshold (?).  However, I can't think of any possible better method, really; anything
222
    involving computing a diff between the two files would require *reading* them, so already
223
    takes way too long (in sharp contrast to the ever-clever bup, which uses a blum filter!).
224

225
    This will raise a RuntimeError if something goes wrong.
226
    """
227
    import uuid
228
    s0, s1 = os.path.split(dest)
229
    if s1.startswith('.glusterfs'):
230
        # never copy around/sync any of the temp files we create below.
231
        return
232

233
    # The clock of the destination is used when doing this copy, so it's
234
    # *critical* that the clocks be in sync.  Run ntp!!!!!
235
    dest_modtime = modtime(dest)
236
    if dest_modtime >= modtime(src):
237
        return
238

239
    if not os.path.exists(s0):
240
        os.makedirs(s0)
241
    lock = os.path.join(s0, ".glusterfs-lock-%s" % s1)
242
    dest0 = os.path.join(s0, ".glusterfs-tmp-%s-%s" % (str(uuid.uuid4()), s1))
243

244
    now = time.time()
245
    recent = now - 5 * 60  # recent time = 5 minutes ago
246
    if os.path.exists(lock):
247
        log.debug(
248
            "another daemon is either copying the same file right now (or died)."
249
        )
250
        # If mod time of the lock is recent, just give up.
251
        t = modtime(lock)
252
        if t >= recent:
253
            return  # recent lock
254
        # check that dest0 exists and has mod time < 5 minutes; otherwise, take control.
255
        if os.path.exists(dest0) and modtime(dest0) >= recent:
256
            return
257

258
    if os.stat(src).st_mode == 33280:
259
        log.info(
260
            "skipping copy since source '%s' suddenly became special link file",
261
            src)
262
        return
263

264
    log.info("sync: %s --> %s" % (src, dest))
265
    t = time.time()
266
    try:
267
        log.info(cmd2('ls -lhs "%s"' % src)[0])
268
        cmd("touch '%s'; cp -av '%s' '%s'" % (lock, src, dest0),
269
            exit_on_error=True)
270
        # check that modtime of dest is *still* older, i.e., that somehow somebody didn't
271
        # just step in and change it.
272
        if modtime(dest) == dest_modtime:
273
            # modtime was unchanged.
274
            cmd("mv -v '%s' '%s'" % (dest0, dest), exit_on_error=True)
275

276
    finally:
277
        # remove the tmp file instead of leaving it there all corrupted.
278
        if os.path.exists(dest0):
279
            try:
280
                os.unlink(dest0)
281
            except:
282
                pass
283
        if os.path.exists(lock):
284
            try:
285
                os.unlink(lock)
286
            except:
287
                pass
288

289
    total_time = time.time() - t
290
    log.info("time: %s" % total_time)
291
    return total_time
292

293

294
def sync(src, dest):
295
    """
296
    copy all older files from src/ to dest/.
297

298
    -- src/ = underyling *brick* path for some glusterfs host
299
    -- dest/ = remote mounted glusterfs filesystem
300
    """
301
    src = os.path.abspath(src)
302
    dest = os.path.abspath(dest)
303

304
    cache_file = "/var/lib/glusterd/glustersync/cache.pickle"
305
    if not os.path.exists("/var/lib/glusterd/glustersync"):
306
        os.makedirs("/var/lib/glusterd/glustersync")
307
    if os.path.exists(cache_file):
308
        cache_all = cPickle.loads(open(cache_file).read())
309
    else:
310
        cache_all = {}
311
    if dest not in cache_all:
312
        cache_all[dest] = {}
313
    cache = cache_all[dest]
314

315
    log.info("sync: '%s' --> '%s'" % (src, dest))
316

317
    import stat
318

319
    def walktree(top):
320
        #log.info("scanning '%s'", top)
321
        v = os.listdir(top)
322
        random.shuffle(v)
323
        for i, f in enumerate(v):
324
            if f == '.glusterfs':
325
                # skip the glusterfs meta-data
326
                continue
327
            if len(v) > 10:
328
                log.debug("%s(%s/%s): %s", top, i + 1, len(v), f)
329
            pathname = os.path.join(top, f)
330

331
            src_name = os.path.join(src, pathname)
332
            dest_name = os.path.join(dest, pathname)
333

334
            st = os.stat(src_name)
335

336
            if st.st_mode == 33280:
337
                # glusterfs meta-info file to indicate a moved file...
338
                continue
339

340
            if stat.S_ISDIR(st.st_mode):
341
                # It's a directory: create in target if necessary, then recurse...
342
                ## !! we skip creation; this is potentially expensive and isn't needed for our application.
343
                ##if not os.path.exists(dest_name):
344
                ##  try:
345
                ##        os.makedirs(dest_name)
346
                ##    except OSError:
347
                ##        if not os.path.exists(dest_name):
348
                ##            raise RuntimeError("unable to make directory '%s'"%dest_name)
349
                try:
350
                    walktree(pathname)
351
                except OSError, mesg:
352
                    log.warning("error walking '%s': %s", pathname, mesg)
353

354
            elif stat.S_ISREG(st.st_mode):
355
                mtime = int(st.st_mtime)
356
                if cache.get(src_name, {'mtime': 0})['mtime'] >= mtime:
357
                    continue
358
                try:
359
                    copy_file_efficiently(
360
                        src_name, dest_name
361
                    )  # checks dest modtime before actually doing copy.
362
                    cache[src_name] = {
363
                        'mtime': mtime,
364
                        'size_mb': st.st_blocks // 2000
365
                    }
366
                except RuntimeError, mesg:
367
                    log.warning("error copying %s to %s; skipping.", src_name,
368
                                dest_name)
369

370
            else:
371
                # Unknown file type, print a message
372
                log.warning("unknown file type: %s", pathname)
373

374
    os.chdir(src)
375
    walktree('.')
376

377
    s = cPickle.dumps(cache_all)
378
    open(cache_file, 'w').write(s)
379

380

381
def sync_watch(sources, dests, min_sync_time):
382
    ### WARNING -- this code does not work very well, and is sort of pointless.  AVOID!
383
    """
384
    Watch filesystem trees and on modification or creation, cp file, possibly creating directories.
385
    The frequency of copying is limited in various ways.
386

387
    This uses inotify so that it is event driven.   You must increase the number of watched files
388
    that are allowed!  "sudo sysctl fs.inotify.max_user_watches=10000000" and in /etc/sysctl.conf:
389
        fs.inotify.max_user_watches=10000000
390

391
    - sources   = list of underyling *brick* path for some glusterfs host
392
    - dests = list of paths of remote mounted glusterfs filesystems
393
    - min_sync_time = never sync a file more frequently than this many seconds; no matter what, we
394
      also wait at least twice the time it takes to sync out the file before syncing it again.
395
    """
396
    sources = [os.path.abspath(src) for src in sources]
397
    dests = [os.path.abspath(dest) for dest in dests]
398

399
    next_sync = {}  # soonest time when may again sync a given file
400

401
    modified_files = set([])
402
    received_files = set([])
403

404
    def add(pathname):
405
        try:
406
            if os.stat(pathname).st_mode == 33280:
407
                # ignore gluster special files
408
                log.debug("ignoring gluster special file: '%s'", pathname)
409
                return
410
        except:
411
            pass
412
        log.debug("inotify: %s" % pathname)
413
        s = os.path.split(pathname)
414
        if s[1].startswith('.glusterfs-lock-'):
415
            received_files.add(
416
                os.path.join(s[0], s[1][len('.glusterfs-lock-'):]))
417
        elif s[1].startswith('.glusterfs'):
418
            return
419
        elif os.path.isfile(pathname):
420
            modified_files.add(pathname)
421

422
    def handle_modified_files():
423
        if not modified_files:
424
            return
425
        log.debug("handling modified_files=%s", modified_files)
426
        log.debug("received_files=%s", received_files)
427
        now = time.time()
428
        do_later = []
429
        for path in modified_files:
430
            if path in sources:  # ignore changes to the sources directories
431
                continue
432
            if path in received_files:  # recently copied to us.
433
                continue
434
            if path not in next_sync or now >= next_sync[path]:
435
                src = None
436
                for s in sources:
437
                    if path.startswith(s):
438
                        src = s
439
                        break
440
                if not src:
441
                    log.warning(
442
                        "not copying '%s' -- must be under a source: %s" %
443
                        (path, sources))
444
                    continue
445
                t0 = time.time()
446
                for dest in dests:
447
                    dest_path = os.path.join(dest, path[len(src) + 1:])
448
                    log.info("copy('%s', '%s')" % (path, dest_path))
449
                    try:
450
                        copy_file_efficiently(path, dest_path)
451
                    except Exception, msg:
452
                        log.warning("problem syncing %s to %s! -- %s" %
453
                                    (path, dest_path, msg))
454
                # no matter what, we wait at least twice the time (from now) that it takes to sync out the file before syncing it again.
455
                next_sync[path] = time.time() + max(2 * (time.time() - t0),
456
                                                    min_sync_time)
457
            else:
458
                pass
459
                #log.debug("waiting until later to sync (too frequent): '%s' "%path)
460
                do_later.append(path)
461
        modified_files.clear()
462
        received_files.clear()
463
        modified_files.update(do_later)
464

465
    import pyinotify
466
    wm = pyinotify.WatchManager()  # Watch Manager
467
    mask = pyinotify.IN_CREATE | pyinotify.IN_MOVED_TO | pyinotify.IN_MODIFY | pyinotify.IN_CLOSE_WRITE
468

469
    class EventHandler(pyinotify.ProcessEvent):
470
        def process_IN_CREATE(self, event):
471
            print "Creating:", event.pathname
472
            if os.path.isdir(event.pathname):
473
                # created a directory -- add it to the watch list
474
                watchers.append(wm.add_watch(event.pathname, mask))
475
            add(event.pathname)
476

477
        def process_IN_MOVED_TO(self, event):
478
            print "File moved to:", event.pathname
479
            add(event.pathname)
480

481
        def process_IN_MODIFY(self, event):
482
            print "Modified:", event.pathname
483
            add(event.pathname)
484

485
        def process_IN_CLOSE_WRITE(self, event):
486
            print "Close write:", event.pathname
487
            add(event.pathname)
488

489
    handler = EventHandler()
490

491
    # we get inotify events for *at most* timeout seconds, then handle them all
492
    notifier = pyinotify.Notifier(wm, handler, timeout=1)
493

494
    t = time.time()
495

496
    watchers = []
497
    for src in sources:
498
        log.info("adding watches to '%s' (this could take several minutes)..."
499
                 % src)
500
        dot_gluster = os.path.join(src, '.glusterfs')
501
        watchers.append(
502
            wm.add_watch(
503
                src,
504
                mask,
505
                rec=True,
506
                exclude_filter=pyinotify.ExcludeFilter(['^' + dot_gluster])))
507
        log.info("watch added (%s seconds): listening for file events..." %
508
                 (time.time() - t))
509

510
    def check_for_events():
511
        #print "check_for_events"
512
        notifier.process_events()
513
        while notifier.check_events(
514
        ):  #loop in case more events appear while we are processing
515
            notifier.read_events()
516
            notifier.process_events()
517

518
    while True:
519
        check_for_events()
520
        handle_modified_files()
521
        time.sleep(1)
522

523

524
def volume_info():
525
    # parse 'gluster volume info' as a python object.
526
    s, e = cmd2('unset PYTHONPATH; unset PYTHONHOME; gluster volume info')
527
    if e:
528
        raise RuntimeError(e)
529
    v = {}
530
    for x in s.split("\nVolume Name: "):
531
        z = x.strip().splitlines()
532
        if z:
533
            name = z[0]
534
            m = {'bricks': []}
535
            for k in z[1:]:
536
                i = k.find(':')
537
                if i == -1:
538
                    continue
539
                key = k[:i].strip()
540
                val = k[i + 1:].strip()
541
                if val:
542
                    if key.startswith('Brick'):
543
                        m['bricks'].append(val)
544
                    else:
545
                        m[key] = val
546
            v[name] = m
547
    return v
548

549

550
def ip_address(dest):
551
    # get the ip address that is used to communicate with the given destination
552
    import misc
553
    return misc.local_ip_address(dest)
554

555

556
def mount_target_volumes(volume_name):
557
    info = volume_info()
558
    dests = []
559
    ip = None
560
    mount = cmd2('mount')[0]
561
    for name, data in volume_info().iteritems():
562
        if name.startswith('dc'):
563
            v = name.split('-')
564
            if len(v) >= 2 and v[1] == volume_name:
565
                use = True
566
                for brick in data['bricks']:
567
                    brick_ip, path = brick.split(':')
568
                    if ip_address(brick_ip) == brick_ip:
569
                        # this volume is partly hosted on this computer, hence not a target.
570
                        use = False
571
                        break
572
                if use:
573
                    # ensure volume is mounted and add to list
574
                    if 'mnt/%s' % name not in mount:
575
                        cmd("mkdir -p '/mnt/%s'; mount -t glusterfs localhost:'/%s' '/mnt/%s'"
576
                            % (name, name, name))
577
                    dests.append('/mnt/%s' % name)
578
    return dests
579

580

581
def find_bricks(volume_name):
582
    bricks = []
583
    ip = None
584
    for name, data in volume_info().iteritems():
585
        if name.startswith('dc'):
586
            v = name.split('-')
587
            if len(v) >= 2 and v[1] == volume_name:
588
                for brick in data['bricks']:
589
                    brick_ip, path = brick.split(':')
590
                    if ip_address(brick_ip) == brick_ip:
591
                        bricks.append(path)
592
    return bricks
593

594

595
def setup_log(loglevel='DEBUG', logfile=''):
596
    logging.basicConfig()
597
    global log
598
    log = logging.getLogger('storage')
599
    if loglevel:
600
        level = getattr(logging, loglevel.upper())
601
        log.setLevel(level)
602

603
    if logfile:
604
        log.addHandler(logging.FileHandler(logfile))
605

606
    log.info("logger started")
607

608

609
if __name__ == "__main__":
610

611
    parser = argparse.ArgumentParser(description="Project storage")
612
    parser.add_argument(
613
        "--loglevel",
614
        dest='loglevel',
615
        type=str,
616
        default='INFO',
617
        help="log level: useful options include INFO, WARNING and DEBUG")
618
    parser.add_argument(
619
        "--logfile",
620
        dest="logfile",
621
        type=str,
622
        default='',
623
        help="store log in this file (default: '' = don't log to a file)")
624

625
    subparsers = parser.add_subparsers(help='sub-command help')
626

627
    def migrate(args):
628
        if not args.storage:
629
            args.storage = os.environ['SALVUS_STORAGE']
630
        v = [os.path.abspath(x) for x in args.src]
631
        for i, src in enumerate(v):
632
            log.info("\n** %s of %s" % (i + 1, len(v)))
633
            migrate_project_to_storage(
634
                src=src,
635
                storage=args.storage,
636
                min_size_mb=args.min_size_mb,
637
                max_size_mb=10000,
638
                new_only=args.new_only)
639

640
    parser_migrate = subparsers.add_parser(
641
        'migrate', help='migrate to or update project in storage pool')
642
    parser_migrate.add_argument(
643
        "--storage",
644
        help=
645
        "the directory where project image directories are stored (default: $SALVUS_STORAGE enviro var)",
646
        type=str,
647
        default='')
648
    parser_migrate.add_argument(
649
        "--min_size_mb",
650
        help="min size of zfs image in megabytes (default: 512)",
651
        type=int,
652
        default=512)
653
    parser_migrate.add_argument(
654
        "--new_only",
655
        help="if image already created, do nothing (default: False)",
656
        default=False,
657
        action="store_const",
658
        const=True)
659
    parser_migrate.add_argument(
660
        "src", help="the current project home directory", type=str, nargs="+")
661
    parser_migrate.set_defaults(func=migrate)
662

663
    def mount(args):
664
        if not args.storage:
665
            args.storage = os.environ['SALVUS_STORAGE']
666
        mount_project(
667
            storage=args.storage, project_id=args.project_id, force=args.f)
668

669
    parser_mount = subparsers.add_parser(
670
        'mount', help='mount a project that is available in the storage pool')
671
    parser_mount.add_argument(
672
        "--storage",
673
        help=
674
        "the directory where project image directories are stored (default: $SALVUS_STORAGE enviro var)",
675
        type=str,
676
        default='')
677

678
    parser_mount.add_argument("project_id", help="the project id", type=str)
679
    parser_mount.add_argument(
680
        "-f",
681
        help="force (default: False)",
682
        default=False,
683
        action="store_const",
684
        const=True)
685
    parser_mount.set_defaults(func=mount)
686

687
    def unmount(args):
688
        unmount_project(project_id=args.project_id)
689

690
    parser_unmount = subparsers.add_parser(
691
        'umount',
692
        help='unmount a project that is available in the storage pool')
693
    parser_unmount.add_argument("project_id", help="the project id", type=str)
694
    parser_unmount.set_defaults(func=unmount)
695

696
    def _info_json(args):
697
        info_json(path=args.path)
698

699
    parser_migrate = subparsers.add_parser(
700
        'info_json',
701
        help='query database, then write info.json file if there is none')
702
    parser_migrate.add_argument(
703
        "path",
704
        help="path to a project home directory (old non-pooled)",
705
        type=str,
706
        nargs="+")
707
    parser_migrate.set_defaults(func=_info_json)
708

709
    def _sync(args):
710
        if not args.dest:
711
            args.dest = ','.join(mount_target_volumes(args.volume))
712
        if not args.src:
713
            args.src = ','.join(find_bricks(args.volume))
714

715
        def main():
716
            while True:
717
                try:
718
                    if args.watch:
719
                        sync_watch(
720
                            sources=args.src.split(','),
721
                            dests=args.dest.split(','),
722
                            min_sync_time=args.min_sync_time)
723
                    else:
724
                        for src in args.src.split(','):
725
                            for dest in args.dest.split(','):
726
                                sync(src=src, dest=dest)
727
                except KeyboardInterrupt:
728
                    return
729
                except Exception, mesg:
730
                    print mesg
731
                if not args.daemon:
732
                    return
733
                time.sleep(5)
734

735
        if args.daemon:
736
            if not args.pidfile:
737
                raise RuntimeError(
738
                    "in --daemon mode you *must* specify --pidfile")
739
            import daemon
740
            daemon.daemonize(args.pidfile)
741
        main()
742

743
    parser_sync = subparsers.add_parser(
744
        'sync',
745
        help=
746
        'Cross data center project sync: simply uses the local "cp" command and local mounts of the glusterfs, but provides massive speedups due to sparseness of image files'
747
    )
748
    parser_sync.add_argument(
749
        "--watch",
750
        help=
751
        "after running once, use inotify to watch for changes to the src filesystem and cp when they occur",
752
        default=False,
753
        action="store_const",
754
        const=True)
755
    parser_sync.add_argument(
756
        "--min_sync_time",
757
        help=
758
        "never copy a file more frequently than this (default: 30 seconds)",
759
        type=int,
760
        default=30)
761
    parser_sync.add_argument(
762
        "--daemon",
763
        help="daemon mode; will repeatedly sync",
764
        dest="daemon",
765
        default=False,
766
        action="store_const",
767
        const=True)
768
    parser_sync.add_argument(
769
        "--pidfile",
770
        dest="pidfile",
771
        type=str,
772
        default='',
773
        help="store pid in this file when daemonized")
774
    parser_sync.add_argument(
775
        "--dest",
776
        help=
777
        "comma separated list of destinations; if not given, all remote gluster volumes with name dc[n]-volume are mounted and targeted",
778
        type=str,
779
        default='')
780
    parser_sync.add_argument(
781
        "--src",
782
        help=
783
        "comma separated paths to bricks; if not given, local bricks for dc[n]-volume are used",
784
        type=str,
785
        default='')
786
    parser_sync.add_argument(
787
        "--volume",
788
        help=
789
        "if there are volumes dc0-projects, dc1-projects, dc2-projects, then pass option --volume=projects (default: 'projects')",
790
        default='projects')
791
    parser_sync.set_defaults(func=_sync)
792

793
    args = parser.parse_args()
794

795
    setup_log(loglevel=args.loglevel, logfile=args.logfile)
796

797
    args.func(args)
798

799
else:
800
    setup_log()
801

802