diff options
| -rw-r--r-- | glusterfsd/src/glusterfsd-mgmt.c | 3 | ||||
| -rw-r--r-- | tests/basic/bug-1595320.t | 92 | ||||
| -rw-r--r-- | tests/basic/posix/shared-statfs.t | 2 | ||||
| -rw-r--r-- | tests/bitrot/bug-1373520.t | 1 | ||||
| -rw-r--r-- | tests/bugs/distribute/bug-1368012.t | 2 | ||||
| -rwxr-xr-x | tests/bugs/distribute/bug-853258.t | 1 | ||||
| -rw-r--r-- | tests/bugs/quota/bug-1293601.t | 3 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-snapshot.c | 2 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 260 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.h | 6 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 7 | 
11 files changed, 329 insertions, 50 deletions
diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c index f1b2f9e7123..1e9015440ae 100644 --- a/glusterfsd/src/glusterfsd-mgmt.c +++ b/glusterfsd/src/glusterfsd-mgmt.c @@ -919,6 +919,9 @@ glusterfs_handle_attach (rpcsvc_request_t *req)                                  "got attach for %s but no active graph",                                  xlator_req.name);                  } +                if (ret) { +                        ret = -1; +                }                  glusterfs_translator_info_response_send (req, ret, NULL, NULL); diff --git a/tests/basic/bug-1595320.t b/tests/basic/bug-1595320.t new file mode 100644 index 00000000000..9d856eeadec --- /dev/null +++ b/tests/basic/bug-1595320.t @@ -0,0 +1,92 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc +. $(dirname $0)/../snapshot.rc + +cleanup + +function count_up_bricks { +        $CLI --xml volume status $V0 | grep '<status>1' | wc -l +} + +function count_brick_processes { +        pgrep glusterfsd | wc -l +} + +# Setup 3 LVMS +LVM_PREFIX="test" +TEST init_n_bricks 3 +TEST setup_lvm 3 + +# Start glusterd +TEST glusterd +TEST pidof glusterd + +# Create volume and enable brick multiplexing +TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3 +gluster v set all cluster.brick-multiplex on + +# Start the volume +TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks +EXPECT 1 count_brick_processes + +# Kill volume ungracefully +brick_pid=`pgrep glusterfsd` + +# Make sure every brick root should be consumed by a brick process +n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L1 | grep -v ".glusterfs" | wc -l` +TEST [ $n -eq 1 ] +n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L2 | grep -v ".glusterfs" | wc -l` +TEST [ $n -eq 1 ] +n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L3 | grep -v ".glusterfs" | wc -l` +TEST [ $n -eq 1 ] + +b1_pid_file=$(ls $GLUSTERD_PIDFILEDIR/vols/$V0/*d-backends-1*.pid) +b2_pid_file=$(ls $GLUSTERD_PIDFILEDIR/vols/$V0/*d-backends-2*.pid) +b3_pid_file=$(ls $GLUSTERD_PIDFILEDIR/vols/$V0/*d-backends-3*.pid) + +kill -9 $brick_pid +EXPECT 0 count_brick_processes + +# Unmount 3rd brick root from node +brick_root=$L3 +TEST umount -l $brick_root 2>/dev/null + +# Start the volume only 2 brick should be start +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks +EXPECT 1 count_brick_processes + +brick_pid=`pgrep glusterfsd` + +# Make sure only two brick root should be consumed by a brick process +n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L1 | grep -v ".glusterfs" | wc -l` +TEST [ $n -eq 1 ] +n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L2 | grep -v ".glusterfs" | wc -l` +TEST [ $n -eq 1 ] +n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L3 | grep -v ".glusterfs" | wc -l` +TEST [ $n -eq 0 ] + +# Mount the brick root +TEST mount -t xfs -o nouuid  /dev/test_vg_3/brick_lvm $brick_root + +# Replace brick_pid file to test brick_attach code +TEST cp $b1_pid_file $b3_pid_file + +# Start the volume all brick should be up +TEST $CLI volume start $V0 force + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks +EXPECT 1 count_brick_processes + +# Make sure every brick root should be consumed by a brick process +n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L1 | grep -v ".glusterfs" | wc -l` +TEST [ $n -eq 1 ] +n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L2 | grep -v ".glusterfs" | wc -l` +TEST [ $n -eq 1 ] +n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L3 | grep -v ".glusterfs" | wc -l` +TEST [ $n -eq 1 ] + +cleanup diff --git a/tests/basic/posix/shared-statfs.t b/tests/basic/posix/shared-statfs.t index 8caa9fa2110..33439562ec9 100644 --- a/tests/basic/posix/shared-statfs.t +++ b/tests/basic/posix/shared-statfs.t @@ -23,6 +23,7 @@ TEST MOUNT_LOOP $LO2 $B0/${V0}2  # Create a subdir in mountpoint and use that for volume.  TEST $CLI volume create $V0 $H0:$B0/${V0}1/1 $H0:$B0/${V0}2/1;  TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count  TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0  total_space=$(df -P $M0 | tail -1 | awk '{ print $2}')  # Keeping the size less than 200M mainly because XFS will use @@ -38,6 +39,7 @@ EXPECT 'Stopped' volinfo_field $V0 'Status';  TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1/2 $H0:$B0/${V0}2/2 $H0:$B0/${V0}1/3 $H0:$B0/${V0}2/3  TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "6" online_brick_count  TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0  total_space=$(df -P $M0 | tail -1 | awk '{ print $2}')  TEST [ $total_space -gt 194000 -a $total_space -lt 200000 ] diff --git a/tests/bitrot/bug-1373520.t b/tests/bitrot/bug-1373520.t index 225d3b1a9bc..c09d424f73b 100644 --- a/tests/bitrot/bug-1373520.t +++ b/tests/bitrot/bug-1373520.t @@ -11,6 +11,7 @@ TEST pidof glusterd  #Create a disperse volume  TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}  TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "6" online_brick_count  EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status'  #Disable md-cache diff --git a/tests/bugs/distribute/bug-1368012.t b/tests/bugs/distribute/bug-1368012.t index f89314b1f2e..b8615549ad9 100644 --- a/tests/bugs/distribute/bug-1368012.t +++ b/tests/bugs/distribute/bug-1368012.t @@ -22,6 +22,7 @@ EXPECT "$V0" volinfo_field $V0 'Volume Name';  EXPECT 'Created' volinfo_field $V0 'Status';  ## Start volume and verify  TEST $CLI volume start $V0; +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count  TEST $CLI volume set $V0 performance.stat-prefetch off  EXPECT 'Started' volinfo_field $V0 'Status';  TEST glusterfs -s $H0 --volfile-id=$V0 $M0 @@ -36,6 +37,7 @@ TEST permission_root=`stat -c "%A" $M0`  TEST echo $permission_root  #Add-brick  TEST $CLI volume add-brick $V0 $H0:/${V0}3 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" online_brick_count  #Allow one lookup to happen  TEST pushd $M0 diff --git a/tests/bugs/distribute/bug-853258.t b/tests/bugs/distribute/bug-853258.t index e39f507baf9..6817d9e2cd3 100755 --- a/tests/bugs/distribute/bug-853258.t +++ b/tests/bugs/distribute/bug-853258.t @@ -31,6 +31,7 @@ done  # Expand the volume and force assignment of new ranges.  TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" online_brick_count  # Force assignment of initial ranges.  TEST $CLI volume rebalance $V0 fix-layout start  EXPECT_WITHIN $REBALANCE_TIMEOUT "fix-layout completed" fix-layout_status_field $V0 diff --git a/tests/bugs/quota/bug-1293601.t b/tests/bugs/quota/bug-1293601.t index def4ef9eda3..741758b73f5 100644 --- a/tests/bugs/quota/bug-1293601.t +++ b/tests/bugs/quota/bug-1293601.t @@ -9,6 +9,7 @@ TEST glusterd  TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4}  TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" online_brick_count  TEST $CLI volume quota $V0 enable  TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0; @@ -27,6 +28,6 @@ EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "1.0MB" quotausage "/"  TEST $CLI volume quota $V0 disable  TEST $CLI volume quota $V0 enable -EXPECT_WITHIN 40 "1.0MB" quotausage "/" +EXPECT_WITHIN 60 "1.0MB" quotausage "/"  cleanup; diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c index ef3ca7014b1..a47b921b3c8 100644 --- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c @@ -2800,7 +2800,7 @@ glusterd_do_lvm_snapshot_remove (glusterd_volinfo_t *snap_vol,          GLUSTERD_GET_BRICK_PIDFILE (pidfile, snap_vol, brickinfo, priv);          if (gf_is_service_running (pidfile, &pid)) {                  (void) send_attach_req (this, brickinfo->rpc, -                                        brickinfo->path, NULL, +                                        brickinfo->path, NULL, NULL,                                          GLUSTERD_BRICK_TERMINATE);                  brickinfo->status = GF_BRICK_STOPPED;          } diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index d791dc3b977..1d4de35fad6 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -2241,7 +2241,7 @@ retry:                  goto out;          } -        ret = glusterd_brick_process_add_brick (brickinfo, volinfo); +        ret = glusterd_brick_process_add_brick (brickinfo);          if (ret) {                  gf_msg (this->name, GF_LOG_ERROR, 0,                          GD_MSG_BRICKPROC_ADD_BRICK_FAILED, "Adding brick %s:%s " @@ -2434,8 +2434,7 @@ out:  }  int -glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo, -                                  glusterd_volinfo_t *volinfo) +glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo)  {          int                      ret = -1;          xlator_t                *this = NULL; @@ -2563,7 +2562,7 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,                                        brickinfo->hostname, brickinfo->path);                          (void) send_attach_req (this, brickinfo->rpc, -                                                brickinfo->path, NULL, +                                                brickinfo->path, NULL, NULL,                                                  GLUSTERD_BRICK_TERMINATE);                  } else {                          gf_msg_debug (this->name, 0, "About to stop glusterfsd" @@ -5508,23 +5507,93 @@ static int32_t  attach_brick_callback (struct rpc_req *req, struct iovec *iov, int count,                         void *v_frame)  { -        call_frame_t    *frame  = v_frame; -        glusterd_conf_t *conf   = frame->this->private; -        glusterd_brickinfo_t *brickinfo = frame->local; +        call_frame_t                     *frame     = v_frame; +        glusterd_conf_t                  *conf      = frame->this->private; +        glusterd_brickinfo_t             *brickinfo = frame->local; +        glusterd_brickinfo_t             *other_brick = frame->cookie; +        glusterd_volinfo_t               *volinfo   =  NULL; +        xlator_t                         *this      = THIS; +        int                               ret       = -1; +        char                              pidfile1[PATH_MAX]      = {0}; +        char                              pidfile2[PATH_MAX]      = {0}; +        gf_getspec_rsp                    rsp   = {0,}; +        int                               last_brick = -1;          frame->local = NULL; -        brickinfo->port_registered = _gf_true; +        frame->cookie = NULL; + +        ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp); +        if (ret < 0) { +                gf_log (frame->this->name, GF_LOG_ERROR, "XDR decoding error"); +                ret   = -1; +                goto out; +        } + +        ret =  glusterd_get_volinfo_from_brick (other_brick->path, +                                                &volinfo); +        if (ret) { +                gf_msg (THIS->name, GF_LOG_ERROR, 0, +                        GD_MSG_VOLINFO_GET_FAIL, "Failed to get volinfo" +                        " from brick(%s) so  pidfile copying/unlink will fail", +                        other_brick->path); +                goto out; +        } +        GLUSTERD_GET_BRICK_PIDFILE (pidfile1, volinfo, other_brick, conf); +        volinfo = NULL; + +        ret =  glusterd_get_volinfo_from_brick (brickinfo->path, +                                                &volinfo); +        if (ret) { +                gf_msg (THIS->name, GF_LOG_ERROR, 0, +                        GD_MSG_VOLINFO_GET_FAIL, "Failed to get volinfo" +                        " from brick(%s) so  pidfile copying/unlink will fail", +                        brickinfo->path); +                goto out; +        } +        GLUSTERD_GET_BRICK_PIDFILE (pidfile2, volinfo, brickinfo, conf); + +        if (rsp.op_ret == 0) { +                brickinfo->port_registered = _gf_true; + +                /* PID file is copied once brick has attached +                   successfully +                */ +                glusterd_copy_file (pidfile1, pidfile2); +                brickinfo->status = GF_BRICK_STARTED; +                brickinfo->rpc = rpc_clnt_ref (other_brick->rpc); +                gf_log (THIS->name, GF_LOG_INFO, "brick %s is attached successfully", +                        brickinfo->path); +        } else { +                gf_log (THIS->name, GF_LOG_INFO, "attach_brick failed pidfile" +                        " is %s for brick_path %s", pidfile2, brickinfo->path); +                brickinfo->port = 0; +                brickinfo->status = GF_BRICK_STOPPED; +                ret = glusterd_brick_process_remove_brick (brickinfo, &last_brick); +                if (ret) +                        gf_msg_debug (this->name, 0, "Couldn't remove brick from" +                                      " brick process"); +                LOCK (&volinfo->lock); +                ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_NONE); +                UNLOCK (&volinfo->lock); +                if (ret) { +                       gf_msg (this->name, GF_LOG_ERROR, 0, +                               GD_MSG_VOLINFO_SET_FAIL, +                               "Failed to store volinfo of " +                               "%s volume", volinfo->volname); +                       goto out; +                } +        } +out:          synclock_lock (&conf->big_lock);          --(conf->blockers);          synclock_unlock (&conf->big_lock); -          STACK_DESTROY (frame->root);          return 0;  }  int  send_attach_req (xlator_t *this, struct rpc_clnt *rpc, char *path, -                 glusterd_brickinfo_t *brickinfo, int op) +                 glusterd_brickinfo_t *brickinfo, glusterd_brickinfo_t *other_brick, int op)  {          int                             ret      = -1;          struct iobuf                    *iobuf    = NULL; @@ -5598,6 +5667,7 @@ send_attach_req (xlator_t *this, struct rpc_clnt *rpc, char *path,          if (op == GLUSTERD_BRICK_ATTACH) {                  frame->local = brickinfo; +                frame->cookie = other_brick;                  cbkfn = attach_brick_callback;          }          /* Send the msg */ @@ -5671,27 +5741,19 @@ attach_brick (xlator_t *this,                  rpc = rpc_clnt_ref (other_brick->rpc);                  if (rpc) {                          ret = send_attach_req (this, rpc, path, brickinfo, +                                               other_brick,                                                 GLUSTERD_BRICK_ATTACH);                          rpc_clnt_unref (rpc);                          if (!ret) {                                  ret = pmap_registry_extend (this, other_brick->port, -                                                            brickinfo->path); +                                            brickinfo->path);                                  if (ret != 0) {                                          gf_log (this->name, GF_LOG_ERROR,                                                  "adding brick to process failed"); -                                        return ret; +                                        goto out;                                  } - -                                /* PID file is copied once brick has attached -                                  successfully -                                */ -                                glusterd_copy_file (pidfile1, pidfile2);                                  brickinfo->port = other_brick->port; -                                brickinfo->status = GF_BRICK_STARTED; -                                brickinfo->rpc = -                                        rpc_clnt_ref (other_brick->rpc); -                                ret = glusterd_brick_process_add_brick (brickinfo, -                                                                        volinfo); +                                ret = glusterd_brick_process_add_brick (brickinfo);                                  if (ret) {                                          gf_msg (this->name, GF_LOG_ERROR, 0,                                                  GD_MSG_BRICKPROC_ADD_BRICK_FAILED, @@ -5700,23 +5762,17 @@ attach_brick (xlator_t *this,                                                  brickinfo->path);                                          return ret;                                  } - -                                if (ret) { -                                        gf_msg_debug (this->name, 0, "Add brick" -                                                    " to brick process failed"); -                                        return ret; -                                } -                                  return 0;                          }                  }                  /* -                 * It might not actually be safe to manipulate the lock like -                 * this, but if we don't then the connection can never actually -                 * complete and retries are useless.  Unfortunately, all of the -                 * alternatives (e.g. doing all of this in a separate thread) -                 * are much more complicated and risky.  TBD: see if there's a -                 * better way +                 * It might not actually be safe to manipulate the lock +                 * like this, but if we don't then the connection can +                 * never actually complete and retries are useless. +                 * Unfortunately, all of the alternatives (e.g. doing +                 * all of this in a separate thread) are much more +                 * complicated and risky. +                 * TBD: see if there's a better way                   */                  synclock_unlock (&conf->big_lock);                  sleep (1); @@ -5945,6 +6001,7 @@ find_compatible_brick (glusterd_conf_t *conf,          return NULL;  } +  /* Below function is use to populate sockpath based on passed pid     value as a argument after check the value from proc and also     check if passed pid is match with running  glusterfs process @@ -6031,6 +6088,62 @@ glusterd_get_sock_from_brick_pid (int pid, char *sockpath, size_t len)  } +char * +search_brick_path_from_proc (pid_t brick_pid, char *brickpath) +{ +        struct dirent *dp = NULL; +        DIR *dirp = NULL; +        size_t len = 0; +        int fd = -1; +        char path[PATH_MAX] = {0,}; +        char sym[PATH_MAX] = {0,}; +        struct dirent  scratch[2] = {{0,},}; +        char *brick_path = NULL; + +        if (!brickpath) +                goto out; + +        sprintf(path, "/proc/%d/fd/", brick_pid); +        dirp = sys_opendir (path); +        if (!dirp) +                goto out; + +        len = strlen (path); +        if (len >= (sizeof(path) - 2)) +                goto out; + +        fd = dirfd (dirp); +        if (fd  < 0) +                goto out; + +        memset(path, 0, sizeof(path)); +        memset(sym, 0, sizeof(sym)); + +        while ((dp = sys_readdir(dirp, scratch))) { +                if (!strcmp(dp->d_name, ".") || +                    !strcmp(dp->d_name, "..")) +                        continue; + +                /* check for non numerical descriptors */ +                if (!strtol(dp->d_name, (char **)NULL, 10)) +                        continue; + +                len = readlinkat (fd, dp->d_name, sym, sizeof(sym) - 1); +                if (len > 1) { +                        sym[len] = '\0'; +                        if (!strcmp (sym, brickpath)) { +                                brick_path = gf_strdup(sym); +                                break; +                        } +                        memset (sym, 0, sizeof (sym)); +                } +        } +out: +        sys_closedir(dirp); +        return brick_path; +} + +  int  glusterd_brick_start (glusterd_volinfo_t *volinfo,                        glusterd_brickinfo_t *brickinfo, @@ -6044,7 +6157,9 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,          int32_t                 pid                   = -1;          char                    pidfile[PATH_MAX]     = {0};          char                    socketpath[PATH_MAX]  = {0}; +        char                    *brickpath            = NULL;          glusterd_volinfo_t      *other_vol; +        struct statvfs           brickstat = {0,};          this = THIS;          GF_ASSERT (this); @@ -6090,6 +6205,28 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,                  brickinfo->start_triggered = _gf_true;          GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf); + +        ret = sys_statvfs (brickinfo->path, &brickstat); +        if (ret) { +                gf_msg (this->name, GF_LOG_ERROR, +                        errno, GD_MSG_BRICKINFO_CREATE_FAIL, +                        "failed to get statfs() call on brick %s", +                        brickinfo->path); +                goto out; +        } + +        /* Compare fsid is helpful to ensure the existence of a brick_root +           path before the start/attach a brick +        */ +        if (brickinfo->statfs_fsid && +            (brickinfo->statfs_fsid != brickstat.f_fsid)) { +                gf_log (this->name, GF_LOG_ERROR, +                        "fsid comparison is failed it means Brick root path" +                        " %s is not created by glusterd, start/attach will also fail", +                        brickinfo->path); +                goto out; +        } +          if (gf_is_service_running (pidfile, &pid)) {                  if (brickinfo->status != GF_BRICK_STARTING &&                      brickinfo->status != GF_BRICK_STARTED) { @@ -6109,12 +6246,29 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,                           * TBD: re-use RPC connection across bricks                           */                          if (is_brick_mx_enabled ()) { +                                brickpath = search_brick_path_from_proc (pid, brickinfo->path); +                                if (!brickpath) { +                                        gf_log (this->name, GF_LOG_INFO, +                                                "Either pid %d is not running or brick" +                                                " path %s is not consumed so cleanup pidfile", +                                                pid, brickinfo->path); +                                        /* search brick is failed so unlink pidfile */ +                                        if (sys_access (pidfile , R_OK) == 0) { +                                                sys_unlink (pidfile); +                                        } +                                        goto run; +                                } +                                GF_FREE (brickpath);                                  ret = glusterd_get_sock_from_brick_pid (pid, socketpath,                                                                          sizeof(socketpath));                                  if (ret) { -                                        gf_log (this->name, GF_LOG_DEBUG, +                                        gf_log (this->name, GF_LOG_INFO,                                                  "Either pid %d is not running or is not match"                                                  " with any running brick process ", pid); +                                        /* Fetch unix socket is failed so unlink pidfile */ +                                        if (sys_access (pidfile , R_OK) == 0) { +                                                sys_unlink (pidfile); +                                        }                                          goto run;                                  }                          } else { @@ -6129,7 +6283,7 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,                          (void) glusterd_brick_connect (volinfo, brickinfo,                                          socketpath); -                        ret = glusterd_brick_process_add_brick (brickinfo, volinfo); +                        ret = glusterd_brick_process_add_brick (brickinfo);                          if (ret) {                                  gf_msg (this->name, GF_LOG_ERROR, 0,                                          GD_MSG_BRICKPROC_ADD_BRICK_FAILED, @@ -6169,6 +6323,10 @@ run:                  if (ret == 0) {                          goto out;                  } +                /* Attach_brick is failed so unlink pidfile */ +                if (sys_access (pidfile , R_OK) == 0) { +                        sys_unlink (pidfile); +                }          }          /* @@ -7137,14 +7295,15 @@ glusterd_add_brick_to_dict (glusterd_volinfo_t *volinfo,                              dict_t  *dict, int32_t count)  { -        int             ret                   = -1; -        int32_t         pid                   = -1; -        char            key[1024 + 16]        = {0}; -        char            base_key[1024]        = {0}; -        char            pidfile[PATH_MAX]     = {0}; +        int              ret                  = -1; +        int32_t          pid                  = -1; +        char             key[1024]            = {0}; +        char             base_key[1024]       = {0}; +        char             pidfile[PATH_MAX]    = {0};          xlator_t        *this                 = NULL;          glusterd_conf_t *priv                 = NULL; -        gf_boolean_t    brick_online          = _gf_false; +        gf_boolean_t     brick_online         = _gf_false; +        char            *brickpath            = NULL;          GF_ASSERT (volinfo);          GF_ASSERT (brickinfo); @@ -7201,7 +7360,20 @@ glusterd_add_brick_to_dict (glusterd_volinfo_t *volinfo,          if (glusterd_is_brick_started (brickinfo)) {                  if (gf_is_service_running (pidfile, &pid) &&                      brickinfo->port_registered) { -                        brick_online = _gf_true; +                        if (!is_brick_mx_enabled ()) { +                                brick_online = _gf_true; +                        } else { +                                brickpath = search_brick_path_from_proc (pid, brickinfo->path); +                                if (!brickpath) { +                                        gf_log (this->name, GF_LOG_INFO, +                                                "brick path %s is not consumed", +                                                brickinfo->path); +                                        brick_online = _gf_false; +                                } else { +                                        brick_online = _gf_true; +                                        GF_FREE (brickpath); +                                } +                        }                  } else {                          pid = -1;                  } diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 51e7c43d781..eded8e231d4 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -178,8 +178,7 @@ int32_t  glusterd_resolve_brick (glusterd_brickinfo_t *brickinfo);  int -glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo, -                                  glusterd_volinfo_t *volinfo); +glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo);  int  glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo, @@ -200,7 +199,8 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,  int  send_attach_req (xlator_t *this, struct rpc_clnt *rpc, char *path, -                 glusterd_brickinfo_t *brick, int op); +                 glusterd_brickinfo_t *brick, +                 glusterd_brickinfo_t *other_brick, int op);  glusterd_volinfo_t *  glusterd_volinfo_ref (glusterd_volinfo_t *volinfo); diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index b6cef79df7f..4c1f0be3d06 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -2608,8 +2608,13 @@ glusterd_start_volume (glusterd_volinfo_t *volinfo, int flags,          }          glusterd_set_volume_status (volinfo, GLUSTERD_STATUS_STARTED); - +        /* Update volinfo on disk in critical section because +           attach_brick_callback can also call store_volinfo for same +           volume to update volinfo on disk +        */ +        LOCK (&volinfo->lock);          ret = glusterd_store_volinfo (volinfo, verincrement); +        UNLOCK (&volinfo->lock);          if (ret) {                  gf_msg (this->name, GF_LOG_ERROR, 0,                          GD_MSG_VOLINFO_SET_FAIL,  | 
