Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemath
GitHub Repository: sagemath/sage
Path: blob/develop/src/bin/sage-cleaner
4052 views
#!/usr/bin/env python3

#*****************************************************************************
# This is the sage monitor *daemon*, which cleans up after Sage.
# Some things that it cleans up:
#   * $DOT_SAGE/temp/HOSTNAME/pid directories
#   * Processes that Sage spawns.  If a copy of Sage isn't
#     running, then any process it spawned should have its
#     process group killed
#*****************************************************************************
#       Copyright (C) 2005, 2006, 2007 William Stein <[email protected]>
#
#  Distributed under the terms of the GNU General Public License (GPL)
#  as published by the Free Software Foundation; either version 2 of
#  the License, or (at your option) any later version.
#                  http://www.gnu.org/licenses/
#*****************************************************************************


import atexit
import errno
import os
import shutil
import signal
import socket
import sys
import time

HOSTNAME = os.environ.get('HOSTNAME', socket.gethostname())
DOT_SAGE = os.environ['DOT_SAGE']

SAGE_TMP_ROOT = os.path.join(DOT_SAGE, 'temp', HOSTNAME)
logfile = os.path.join(SAGE_TMP_ROOT, 'cleaner.log')
pidfile = os.path.join(SAGE_TMP_ROOT, 'cleaner.pid')
# pidfile used by earlier versions of sage-cleaner (before #15457)
old_pidfile = os.path.join(DOT_SAGE, 'temp', 'cleaner-%s.pid'%HOSTNAME)

import logging

logger = logging.getLogger(__name__)


def mkdir_p(path):
    """
    A "mkdir -p" command that makes intermediate directories if necessary
    """
    try:
        os.makedirs(path)
    except OSError:
        if not os.path.isdir(path):
            raise


def rm_rf(file_or_path):
    """
    Force recursive delete, ignoring errors.
    """
    try:
        os.unlink(file_or_path)
    except OSError as e:
        if e.errno == errno.EISDIR or e.errno == errno.EPERM:
            shutil.rmtree(file_or_path, ignore_errors=True)


def is_running(pid):
    """
    Return True if and only if there is a process with id pid running.
    """
    try:
        os.kill(pid,0)
        return True
    except OSError:
        return False


def cleanup():
    tmp_dirs = os.listdir(SAGE_TMP_ROOT)
    try:
        tmp_dirs.remove('cleaner.pid')
        tmp_dirs.remove('cleaner.log')
    except ValueError:
        logger.error('No cleaner pid/log in SAGE_TMP_ROOT')
    # Convert strings to integers
    pid_list = []
    for dir_entry in tmp_dirs:
        try:
            pid_list.append(int(dir_entry))
        except ValueError:
            badfile = os.path.join(SAGE_TMP_ROOT, dir_entry)
            logger.warning('File %s must not be in SAGE_TMP_ROOT, deleting', badfile)
            rm_rf(badfile)

    logger.info("Checking PIDs %s", pid_list)
    for parent_pid in pid_list:
        try:
            if not is_running(parent_pid):
                logger.info("Process %s is no longer running, so we clean up", parent_pid)
                dir_name = os.path.join(SAGE_TMP_ROOT, str(parent_pid))
                spawned_processes = os.path.join(dir_name, 'spawned_processes')
                if not os.path.isfile(spawned_processes) \
                   or kill_spawned_jobs(spawned_processes, parent_pid):
                    logger.info('Deleting %s', dir_name)
                    rm_rf(dir_name)
        except Exception:
            logger.error("Exception while cleaning up PID %s:", parent_pid, exc_info=True)

    return len(pid_list)

def cleanup_cruft():
    """ remove directories leftover from improper shutdown """
    tmp_dirs = os.listdir(SAGE_TMP_ROOT)
    for dir_entry in tmp_dirs:
        baddir = os.path.join(SAGE_TMP_ROOT, dir_entry)
        if os.path.isdir(baddir):
            logger.warning('Removing old directory %s from SAGE_TMP_ROOT', baddir)
            rm_rf(baddir)

def kill_spawned_jobs(jobfile, parent_pid):
    logger.info("Killing %s's spawned jobs", parent_pid)
    killed_them_all = True
    for job in open(jobfile).readlines():
        try:
            pid, cmd = job.strip().split(' ', 1)
            pid = int(pid)
            logger.info("--> Killing %r with PID %s and parent PID %s", cmd, pid, parent_pid)
        except Exception:
            logger.error("Exception while processing job %r from %s, ignoring", job, jobfile)
            continue
        try:
            pgrp = os.getpgid(pid)
            logger.info("--> Killing process group %s", pgrp)
            os.killpg(pgrp, signal.SIGKILL)
        except OSError:
            pass
        if is_running(pid):
            logger.error("--> Failed to kill %s", pid)
            # try again later
            killed_them_all = False
    return killed_them_all


def exit_handler():
    logger.info("Removing pidfile and logfile")
    rm_rf(pidfile)
    rm_rf(logfile)


def setup_daemon():
    mkdir_p(SAGE_TMP_ROOT)

    logger.setLevel(logging.DEBUG)
    h = logging.StreamHandler()                # log to stderr
    h.setLevel(logging.DEBUG)
    logger.addHandler(h)
    h = logging.FileHandler(filename=logfile)  # log to logfile
    h.setLevel(logging.INFO)
    logger.addHandler(h)

    logger.info("SAGE_TMP_ROOT = %s", SAGE_TMP_ROOT)

    # Check old pidfile (to allow old and new sage-cleaners to peacefully coexist)
    try:
        pid = int(open(old_pidfile).read())
    except (IOError, ValueError):
        pass
    else:
        if is_running(pid):
            logger.info("old sage-cleaner is already running with PID %s, exiting", pid)
            sys.exit(0)

    try:
        pid = int(open(pidfile).read())
    except (IOError, ValueError):
        pass
    else:
        if is_running(pid):
            logger.info("sage-cleaner is already running with PID %s, exiting", pid)
            sys.exit(0)
    with open(pidfile, 'w') as f:
        f.write(str(os.getpid()))  # initialize pid file
    atexit.register(exit_handler)


def fix_old_mistakes():
    """
    Experience is simply the name we give our mistakes.
    """
    # inconsistently escaped hyphens with underscores (https://github.com/sagemath/sage/issues/14055)
    wrong_hostname = HOSTNAME.replace('-','_').replace('/','_').replace('\\','_')
    wrong_sage_tmp_root = os.path.join(DOT_SAGE, 'temp', wrong_hostname)
    if wrong_sage_tmp_root != SAGE_TMP_ROOT and os.path.exists(wrong_sage_tmp_root):
        logger.warning('Deleting invalid temp dir %s', wrong_sage_tmp_root)
        rm_rf(wrong_sage_tmp_root)

    # SAGE_TMP in DOT_SAGE/tmp instead of DOT_SAGE/temp
    import glob
    old_root =  glob.glob(os.path.join(DOT_SAGE, 'tmp', HOSTNAME+'-*'))
    if wrong_hostname != HOSTNAME:
        old_root += glob.glob(os.path.join(DOT_SAGE, 'tmp', wrong_hostname+'-*'))
    for old_tmp in old_root:
        logger.warning('Deleting invalid temp dir %s', old_tmp)
        rm_rf(old_tmp)

    # PID file before it was moved into SAGE_TMP_ROOT
    rm_rf(old_pidfile)


if __name__ == '__main__':
    setup_daemon()
    fix_old_mistakes()
    logger.info("Starting sage-cleaner with PID %s", os.getpid())
    cleanup_cruft()

    if len(sys.argv) > 1:
        wait = int(sys.argv[1])
    else:
        wait = 10

    # Initial cleanup, ignore time
    running_sages = cleanup()
    cleanup_time = 0.0
    count = 1

    # In the first 10 iterations, continue anyway (even if there are
    # no Sage processes running) because it can happen that Sage is
    # not yet started.
    while count < 10 or running_sages > 0:
        # Time to wait = "wait" plus 20 times the time of last cleanup().
        # This ensures that sage-cleaner causes a load of at most 5%.
        time.sleep(wait + 20*cleanup_time)
        count += 1
        t0 = time.time()
        running_sages = cleanup()
        cleanup_time = time.time() - t0
        logger.debug("cleanup() #{:d} took {:.2f}s".format(count, cleanup_time))

    logger.info("sage-cleaner is finished")