summaryrefslogtreecommitdiffstats
path: root/extras/snap_scheduler/gcron.py
diff options
context:
space:
mode:
authorAvra Sengupta <asengupt@redhat.com>2015-02-09 18:03:20 +0530
committerVijay Bellur <vbellur@redhat.com>2015-03-18 18:31:54 -0700
commit7c4461329bba38b72536ee71a8172bc861ddf890 (patch)
treec9cb709f17892f20b3e3addafad5f62a41590b1e /extras/snap_scheduler/gcron.py
parent3e18f093974c85ac92a4c48f0cd13aa9ff9c5cac (diff)
snapshot/scheduling: A cron based scheduler for snapshot scheduling
GlusterFS volume snapshot provides point-in-time copy of a GlusterFS volume. Currently, GlusterFS volume snapshots can be easily scheduled by setting up cron jobs on one of the nodes in the GlusterFS trusted storage pool. This has a single point failure (SPOF), as scheduled jobs can be missed if the node running the cron jobs dies. The solution to the above problems is addressed in this patch. The snap_scheduler.py helper script expects the user to install the argparse python module before using it. Further details for the same are available at: http://www.gluster.org/community/documentation/index.php/Features/Scheduling_of_Snapshot Change-Id: I2c357af5b7d3e66f270d20eef50cdeecdcbe15c7 BUG: 1198027 Signed-off-by: Avra Sengupta <asengupt@redhat.com> Signed-off-by: Aravinda VK <avishwan@redhat.com> Reviewed-on: http://review.gluster.org/9788 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'extras/snap_scheduler/gcron.py')
-rwxr-xr-xextras/snap_scheduler/gcron.py146
1 files changed, 146 insertions, 0 deletions
diff --git a/extras/snap_scheduler/gcron.py b/extras/snap_scheduler/gcron.py
new file mode 100755
index 00000000000..763eb1460b8
--- /dev/null
+++ b/extras/snap_scheduler/gcron.py
@@ -0,0 +1,146 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+from __future__ import print_function
+import subprocess
+import os
+import os.path
+import sys
+import time
+import logging
+import logging.handlers
+import fcntl
+
+
+GCRON_TASKS = "/var/run/gluster/snaps/shared_storage/glusterfs_snap_cron_tasks"
+GCRON_CROND_TASK = "/etc/cron.d/glusterfs_snap_cron_tasks"
+LOCK_FILE_DIR = "/var/run/gluster/snaps/shared_storage/lock_files/"
+log = logging.getLogger("gcron-logger")
+start_time = 0.0
+
+
+def initLogger(script_name):
+ log.setLevel(logging.DEBUG)
+ logFormat = "[%(asctime)s %(filename)s:%(lineno)s %(funcName)s] "\
+ "%(levelname)s %(message)s"
+ formatter = logging.Formatter(logFormat)
+
+ sh = logging.handlers.SysLogHandler()
+ sh.setLevel(logging.ERROR)
+ sh.setFormatter(formatter)
+
+ process = subprocess.Popen(["gluster", "--print-logdir"],
+ stdout=subprocess.PIPE)
+ out, err = process.communicate()
+ if process.returncode == 0:
+ logfile = os.path.join(out.strip(), script_name[:-3]+".log")
+
+ fh = logging.FileHandler(logfile)
+ fh.setLevel(logging.DEBUG)
+ fh.setFormatter(formatter)
+
+ log.addHandler(sh)
+ log.addHandler(fh)
+
+
+def takeSnap(volname=""):
+ success = True
+ if volname == "":
+ log.debug("No volname given")
+ return False
+
+ timeStr = time.strftime("%Y%m%d%H%M%S")
+ cli = ["gluster",
+ "snapshot",
+ "create",
+ "%s-snapshot-%s %s" % (volname, timeStr, volname)]
+ log.debug("Running command '%s'", " ".join(cli))
+
+ p = subprocess.Popen(cli, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ out, err = p.communicate()
+ rv = p.returncode
+
+ log.debug("Command '%s' returned '%d'", " ".join(cli), rv)
+
+ if rv:
+ log.error("Snapshot of %s failed", volname)
+ log.error("Command output:")
+ log.error(err)
+ success = False
+ else:
+ log.info("Snapshot of %s successful", volname)
+
+ return success
+
+
+def doJob(name, lockFile, jobFunc, volname):
+ success = True
+ try:
+ f = os.open(lockFile, os.O_RDWR | os.O_NONBLOCK)
+ try:
+ fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ mtime = os.path.getmtime(lockFile)
+ global start_time
+ log.debug("%s last modified at %s", lockFile, time.ctime(mtime))
+ if mtime < start_time:
+ log.debug("Processing job %s", name)
+ if jobFunc(volname):
+ log.info("Job %s succeeded", name)
+ else:
+ log.error("Job %s failed", name)
+ success = False
+ os.utime(lockFile, None)
+ else:
+ log.info("Job %s has been processed already", name)
+ fcntl.flock(f, fcntl.LOCK_UN)
+ except IOError as (errno, strerror):
+ log.info("Job %s is being processed by another agent", name)
+ os.close(f)
+ except IOError as (errno, strerror):
+ log.debug("Failed to open lock file %s : %s", lockFile, strerror)
+ log.error("Failed to process job %s", name)
+ success = False
+
+ return success
+
+
+def main():
+ script_name = os.path.basename(__file__)
+ initLogger(script_name)
+ global start_time
+ if sys.argv[1] == "--update":
+ if os.lstat(GCRON_TASKS).st_mtime > \
+ os.lstat(GCRON_CROND_TASK).st_mtime:
+ try:
+ process = subprocess.Popen(["touch", "-h", GCRON_CROND_TASK],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ out, err = process.communicate()
+ if process.returncode != 0:
+ log.error("Failed to touch %s. Error: %s.",
+ GCRON_CROND_TASK, err)
+ except IOError as (errno, strerror):
+ log.error("Failed to touch %s. Error: %s.",
+ GCRON_CROND_TASK, strerror)
+ return
+
+ volname = sys.argv[1]
+ locking_file = os.path.join(LOCK_FILE_DIR, sys.argv[2])
+ log.debug("locking_file = %s", locking_file)
+ log.debug("volname = %s", volname)
+
+ start_time = int(time.time())
+
+ doJob("Snapshot-" + volname, locking_file, takeSnap, volname)
+
+
+if __name__ == "__main__":
+ main()