fuse, glusterfsd: mount logic fixes

Commit 7d0397c2 introduced two issues: i) broke the libfuse derived mount logic (details below) ii) in case of a daemonized glusterfs client is ran as daemon, parent process can return earlier than the mount is in place, which breaks agents that programmatically do a gluster mount via a direct call to glusterfs (ie. not via mount(8)). This patch fixes these issues by a refactor that merges the approaches sported by commits 7d0397c2 fuse: allow requests during mount (needed for SELinux labels) c5d781e0 upon daemonizing, wait on mtab update to terminate in parent Original daemonized libfuse event flow is as follows: try: fd = open("/dev/fuse") mount("-oopts,fd=%s" % fd ...) mount(8) -f # manipulate mtab except: sp = socketpair() env _FUSE_COMMFD=sp fusermount -oopts fd = receive_fd(sp) where fusermount(1) does: fd = open("/dev/fuse") mount("-oopts,fd=%d" % fd ...) sp = atoi(getenv("_FUSE_COMMFD")) send_fd(sp, fd) daemonize( # in child fuse_loop(fd) ) # in parent exit() As of 013850c9 (instead of adopting FUSE's 47e61004¹), we went for async mtab manipulation, and as of c5d781e0, still wanted keep that in sync with termination of daemon parent, so we changed it to: try: fd = open("/dev/fuse") mount("-oopts,fd=%s" % fd ...) pid = fork( # in child mount(8) -f ) except: sp = socketpair() env _FUSE_COMMFD=sp fusermount -oopts fd = receive_fd(sp) daemonize( fuse_loop(fd) ) waitpid(pid) exit() (Note the new approch came only to direct [privileged] mount, so fusermount based mounting was already partially broken.) As of 7d0397c2, with the purpose of facilitating async mount, the event flow was practically reduced to: fd = open("/dev/fuse") fork( mount("-oopts,fd=%s" % fd ...) fork( mount(8) -n ) ) daemonize( fuse_loop(fd) ) exit() Thus fusermount based mounting become defunct; however, the dead code was still kept around. So, we should either drop it or fix it. Also, the mtab manipulator is forked into yet another child with no purpose, while syncing with it in daemon parent is broken. mount(2) is neither synced with parent. Now we are coming to the following scheme: fd = open("/dev/fuse") pid = fork( try: mount("-oopts,fd=%s" % fd ...) mount(8) -n except: env _FUSE_DEVFD=fd fusermount -oopts ) where fusermount(1) does: fd = getenv("_FUSE_DEVFD") mount("-oopts,fd=%s" % fd ...) daemonize( fuse_loop(fd) ) waitpid(pid) exit() Nb.: - We can't help losing compatibility with upstream fusermount, as it sends back the fd only when mount(2) is completed, thus defeating the async mount approach. The 'getenv("_FUSE_DEVFD")' mechanism is specfic to glusterfs' fusermount (at the moment -- sure we can talk about it with upstream) - fusermount opens /dev/fuse at same privilege level as of original process², so we can bravely go on with doing the open unconditionally in original process - Original mounting code actually tries to mount through fusermount _twice_: if first attempt fails, then, assuming subtype support is missing in kernel, it tries again subtype stripped. However, this is redundant, as fusermount internally also performs the subtype check³. Therefore we simplified the logic to have just a single fusermount call. - we revert the changes to mount.glusterfs as of 7d0397c2, as now there is no issue with glusterfs to work around in that scope ¹ http://fuse.git.sourceforge.net/git/gitweb.cgi?p=fuse/fuse;a=blobdiff;f=ChangeLog;h=47e61004;hb=4c3d9b19;hpb=e61b775a ² http://fuse.git.sourceforge.net/git/gitweb.cgi?p=fuse/fuse;a=blob;f=util/fusermount.c;h=b2e87d95#l1023 ³ http://fuse.git.sourceforge.net/git/gitweb.cgi?p=fuse/fuse;a=blob;f=util/fusermount.c;h=b2e87d95#l839 Change-Id: I0c4ab70e0c5ad7b27337228749b266bcd0ba941d BUG: 811217 Signed-off-by: Csaba Henk <csaba@redhat.com> Reviewed-on: http://review.gluster.com/3341 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Jeff Darcy <jdarcy@redhat.com> Reviewed-by: Anand Avati <avati@redhat.com>
author: Csaba Henk <csaba@redhat.com> 2012-05-14 17:07:28 +0530
committer: Anand Avati <avati@redhat.com> 2012-05-21 13:47:09 -0700
commit: 439d0426dd60ef6c1f4af13fcbbe73f1d206acc6 (patch)
tree: ab9b7637bd223bf7862e2861dd562faa541687eb /xlators/mount
parent: a2b7f11f279b9acc74c1a3b6d893fda7c160683d (diff)
2 files changed, 21 insertions, 62 deletions
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index 401ff64955e..5131d6c05ba 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -3867,29 +3867,17 @@ int
 fuse_get_mount_status (xlator_t *this)
 {
         int             kid_status = -1;
-        pid_t           kid_pid = -1;
         fuse_private_t *priv = this->private;
-        int             our_status = -1;
 
         if (read(priv->status_pipe[0],&kid_status, sizeof(kid_status)) < 0) {
                 gf_log (this->name, GF_LOG_ERROR, "could not get mount status");
-                goto out;
+                kid_status = -1;
         }
         gf_log (this->name, GF_LOG_DEBUG, "mount status is %d", kid_status);
 
-        if (read(priv->status_pipe[0],&kid_pid, sizeof(kid_pid)) < 0) {
-                gf_log (this->name, GF_LOG_ERROR, "could not get mount PID");
-                goto out;
-        }
-        gf_log (this->name, GF_LOG_DEBUG, "mount PID is %d", kid_pid);
-
-        (void)waitpid(kid_pid,NULL,0);
-        our_status = kid_status;
-
-out:
         close(priv->status_pipe[0]);
         close(priv->status_pipe[1]);
-        return our_status;
+        return kid_status;
 }
 
 static void *
@@ -4372,7 +4360,7 @@ init (xlator_t *this_xl)
         int                xl_name_allocated = 0;
         int                fsname_allocated = 0;
         glusterfs_ctx_t   *ctx = NULL;
-        gf_boolean_t       sync_mtab = _gf_false;
+        gf_boolean_t       sync_to_mount = _gf_false;
         char              *mnt_args = NULL;
 
         if (this_xl == NULL)
@@ -4520,11 +4508,11 @@ init (xlator_t *this_xl)
                 priv->fuse_dump_fd = ret;
         }
 
-        sync_mtab = _gf_false;
-        ret = dict_get_str (options, "sync-mtab", &value_string);
+        sync_to_mount = _gf_false;
+        ret = dict_get_str (options, "sync-to-mount", &value_string);
         if (ret == 0) {
                 ret = gf_string2boolean (value_string,
-                                         &sync_mtab);
+                                         &sync_to_mount);
                 GF_ASSERT (ret == 0);
         }
 
@@ -4570,7 +4558,7 @@ init (xlator_t *this_xl)
         }
 
         priv->fd = gf_fuse_mount (priv->mount_point, fsname, mnt_args,
-                                  sync_mtab ? &ctx->mtab_pid : NULL,
+                                  sync_to_mount ? &ctx->mnt_pid : NULL,
                                   priv->status_pipe[1]);
         if (priv->fd == -1)
                 goto cleanup_exit;
@@ -4682,7 +4670,7 @@ struct volume_options options[] = {
         { .key  = {"uid-map-root"},
           .type = GF_OPTION_TYPE_INT
         },
-        { .key  = {"sync-mtab"},
+        { .key  = {"sync-to-mount"},
           .type = GF_OPTION_TYPE_BOOL
         },
         { .key = {"read-only"},
diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in
index 39f81ce592e..0181e4e174c 100755
--- a/xlators/mount/fuse/utils/mount.glusterfs.in
+++ b/xlators/mount/fuse/utils/mount.glusterfs.in
@@ -55,36 +55,6 @@ _init ()
     export LD_LIBRARY_PATH
 }
 
-# Mount happens asynchronously, so the command status alone will never be
-# sufficient.  Instead, we have to wait for multiple events representing
-# different possible outcomes.
-wait_for ()
-{
-    local daemon_pid=$1
-    local mount_point=$2
-
-    waited=0
-    while true; do
-        kill -s 0 $daemon_pid
-        if [ $? != 0 ]; then
-                echo "Gluster client daemon exited unexpectedly."
-                umount $mount_point &> /dev/null
-                exit 1
-        fi
-        inode=$( ${getinode} $mount_point 2>/dev/null);
-        if [ "$inode" = "1" ]; then
-            break
-        fi
-        if [ $waited -ge 10 ]; then
-            break
-        fi
-        sleep 1
-        waited=$((waited+1))
-    done
-
-    echo "$inode"
-}
-
 start_glusterfs ()
 {
     if [ -n "$log_level_str" ]; then
@@ -172,33 +142,34 @@ start_glusterfs ()
         cmd_line=$(echo "$cmd_line --volfile=$volfile_loc");
     fi
 
-    cmd_line=$(echo "$cmd_line $mount_point")
-    err=0
-    $cmd_line
-    daemon_pid=$$
+    cmd_line=$(echo "$cmd_line $mount_point");
+    err=0;
+    $cmd_line;
+
+
+    inode=$( ${getinode} $mount_point 2>/dev/null);
 
-    # Wait for the inode to change *or* for the daemon to exit (indicating a
-    # problem with the mount).
-    inode=$(wait_for $daemon_pid $mount_point)
     # this is required if the stat returns error
     if [ -z "$inode" ]; then
-        inode="0"
+        inode="0";
     fi
 
     # retry the failover
+    # if [ $? != "0" ]; then # <--- TODO: Once glusterfs returns proper error code, change it.
     if [ $inode -ne 1 ]; then
+        err=1;
         if [ -n "$cmd_line1" ]; then
             cmd_line1=$(echo "$cmd_line1 $mount_point");
-            $cmd_line1
-            daemon_pid=$$
+            $cmd_line1;
+            err=0;
 
-            inode=$(wait_for $daemon_pid $mount_point)
+            inode=$( ${getinode} $mount_point 2>/dev/null);
             # this is required if the stat returns error
             if [ -z "$inode" ]; then
                 inode="0";
             fi
             if [ $inode -ne 1 ]; then
-                err=1
+                err=1;
             fi
         fi
     fi
author	Csaba Henk <csaba@redhat.com>	2012-05-14 17:07:28 +0530
committer	Anand Avati <avati@redhat.com>	2012-05-21 13:47:09 -0700
commit	439d0426dd60ef6c1f4af13fcbbe73f1d206acc6 (patch)
tree	ab9b7637bd223bf7862e2861dd562faa541687eb /xlators/mount
parent	a2b7f11f279b9acc74c1a3b6d893fda7c160683d (diff)