diff options
| -rw-r--r-- | libglusterfs/src/glusterfs/common-utils.h | 3 | ||||
| -rw-r--r-- | tests/bugs/replicate/bug-1655854-support-dist-to-rep3-arb-conversion.t | 95 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 83 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-inode-write.c | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-read-txn.c | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 13 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.h | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.c | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 4 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 2 | 
10 files changed, 176 insertions, 32 deletions
diff --git a/libglusterfs/src/glusterfs/common-utils.h b/libglusterfs/src/glusterfs/common-utils.h index 55d8f8cc931..76d1e5feee5 100644 --- a/libglusterfs/src/glusterfs/common-utils.h +++ b/libglusterfs/src/glusterfs/common-utils.h @@ -169,7 +169,8 @@ enum _gf_special_pid {      GF_CLIENT_PID_BITD = -8,      GF_CLIENT_PID_SCRUB = -9,      GF_CLIENT_PID_TIER_DEFRAG = -10, -    GF_SERVER_PID_TRASH = -11 +    GF_SERVER_PID_TRASH = -11, +    GF_CLIENT_PID_ADD_REPLICA_MOUNT = -12  };  enum _gf_xlator_ipc_targets { diff --git a/tests/bugs/replicate/bug-1655854-support-dist-to-rep3-arb-conversion.t b/tests/bugs/replicate/bug-1655854-support-dist-to-rep3-arb-conversion.t new file mode 100644 index 00000000000..783016dc3c0 --- /dev/null +++ b/tests/bugs/replicate/bug-1655854-support-dist-to-rep3-arb-conversion.t @@ -0,0 +1,95 @@ +#!/bin/bash +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +cleanup; + +TEST glusterd +TEST pidof glusterd + +# Conversion from 2x1 to 2x3 + +TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1} +EXPECT 'Created' volinfo_field $V0 'Status'; +TEST $CLI volume start $V0 +EXPECT 'Started' volinfo_field $V0 'Status'; + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 + +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; +TEST mkdir $M0/dir +TEST dd if=/dev/urandom of=$M0/dir/file bs=100K count=5 +file_md5sum=$(md5sum $M0/dir/file | awk '{print $1}') + +TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}{2..5} + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}3 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}4 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}5 + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 4 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 5 + +# Trigger heal and wait for for it to complete +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + +# Check whether the directory & file are healed to the newly added bricks +TEST ls $B0/${V0}2/dir +TEST ls $B0/${V0}3/dir +TEST ls $B0/${V0}4/dir +TEST ls $B0/${V0}5/dir + +TEST [ $file_md5sum == $(md5sum $B0/${V0}4/dir/file | awk '{print $1}') ] +TEST [ $file_md5sum == $(md5sum $B0/${V0}5/dir/file | awk '{print $1}') ] + + +# Conversion from 2x1 to 2x(2+1) + +TEST $CLI volume create $V1 $H0:$B0/${V1}{0,1} +EXPECT 'Created' volinfo_field $V1 'Status'; +TEST $CLI volume start $V1 +EXPECT 'Started' volinfo_field $V1 'Status'; + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}1 + +TEST $GFS --volfile-id=$V1 --volfile-server=$H0 $M1; +TEST mkdir $M1/dir +TEST dd if=/dev/urandom of=$M1/dir/file bs=100K count=5 +file_md5sum=$(md5sum $M1/dir/file | awk '{print $1}') + +TEST $CLI volume add-brick $V1 replica 3 arbiter 1 $H0:$B0/${V1}{2..5} +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}2 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}3 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}4 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}5 + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 3 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 4 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 5 + +# Trigger heal and wait for for it to complete +TEST $CLI volume heal $V1 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V1 + +# Check whether the directory & file are healed to the newly added bricks +TEST ls $B0/${V1}2/dir +TEST ls $B0/${V1}3/dir +TEST ls $B0/${V1}4/dir +TEST ls $B0/${V1}5/dir + +EXPECT "0" stat -c %s $B0/${V1}5/dir/file +TEST [ $file_md5sum == $(md5sum $B0/${V1}4/dir/file | awk '{print $1}') ] + +cleanup; diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 40740814103..32fa634def1 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1268,7 +1268,7 @@ afr_inode_refresh_done(call_frame_t *frame, xlator_t *this, int error)      success_replies = alloca0(priv->child_count);      afr_fill_success_replies(local, priv, success_replies); -    if (!afr_has_quorum(success_replies, this)) { +    if (!afr_has_quorum(success_replies, this, frame)) {          error = afr_final_errno(frame->local, this->private);          if (!error)              error = afr_quorum_errno(priv); @@ -2253,7 +2253,8 @@ afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this,          *read_subvol = spb_choice;      } else if (!priv->quorum_count) {          *read_subvol = afr_first_up_child(frame, this); -    } else if (priv->quorum_count && afr_has_quorum(data_readable, this)) { +    } else if (priv->quorum_count && +               afr_has_quorum(data_readable, this, NULL)) {          /* read_subvol is guaranteed to be valid if we hit this path. */          *read_subvol = afr_first_up_child(frame, this);      } else { @@ -2396,7 +2397,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)      read_subvol = -1;      memset(readable, 0, sizeof(*readable) * priv->child_count);      if (can_interpret) { -        if (!afr_has_quorum(success_replies, this)) +        if (!afr_has_quorum(success_replies, this, NULL))              goto cant_interpret;          /* It is safe to call afr_replies_interpret() because we have             a response from all the UP subvolumes and all of them resolved @@ -2873,7 +2874,7 @@ afr_lookup_entry_heal(call_frame_t *frame, xlator_t *this)      if (name_state_mismatch) {          if (!priv->quorum_count)              goto name_heal; -        if (!afr_has_quorum(success, this)) +        if (!afr_has_quorum(success, this, NULL))              goto name_heal;          if (op_errno)              goto name_heal; @@ -2961,7 +2962,6 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)  {      afr_private_t *priv = NULL;      afr_local_t *local = NULL; -    int op_errno = 0;      int read_subvol = -1;      unsigned char *data_readable = NULL;      unsigned char *success_replies = NULL; @@ -2975,14 +2975,13 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)      if (AFR_COUNT(success_replies, priv->child_count) > 0)          local->op_ret = 0; -    op_errno = afr_final_errno(frame->local, this->private); -      if (local->op_ret < 0) { -        AFR_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); -        return; +        local->op_ret = -1; +        local->op_errno = afr_final_errno(frame->local, this->private); +        goto error;      } -    if (!afr_has_quorum(success_replies, this)) +    if (!afr_has_quorum(success_replies, this, frame))          goto unwind;      afr_replies_interpret(frame, this, local->inode, NULL); @@ -2993,11 +2992,8 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)  unwind:      afr_attempt_readsubvol_set(frame, this, success_replies, data_readable,                                 &read_subvol); -    if (read_subvol == -1) { -        AFR_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, NULL, -                         NULL, NULL, NULL); -        return; -    } +    if (read_subvol == -1) +        goto error;      if (AFR_IS_ARBITER_BRICK(priv, read_subvol) && local->op_ret == 0) {          local->op_ret = -1; @@ -3012,6 +3008,11 @@ unwind:                       local->inode, &local->replies[read_subvol].poststat,                       local->replies[read_subvol].xdata,                       &local->replies[read_subvol].postparent); +    return; + +error: +    AFR_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, NULL, NULL, +                     NULL, NULL);  }  static int @@ -4005,7 +4006,7 @@ afr_fop_lock_done(call_frame_t *frame, xlator_t *this)      if (afr_is_conflicting_lock_present(local->op_ret, local->op_errno)) {          afr_unlock_locks_and_proceed(frame, this, lock_count); -    } else if (priv->quorum_count && !afr_has_quorum(success, this)) { +    } else if (priv->quorum_count && !afr_has_quorum(success, this, NULL)) {          local->fop_lock_state = AFR_FOP_LOCK_QUORUM_FAILED;          local->op_ret = -1;          local->op_errno = afr_final_errno(local, priv); @@ -4496,7 +4497,7 @@ afr_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,                            local->cont.lk.cmd, &local->cont.lk.user_flock,                            local->xdata_req);      } else if (priv->quorum_count && -               !afr_has_quorum(local->cont.lk.locked_nodes, this)) { +               !afr_has_quorum(local->cont.lk.locked_nodes, this, NULL)) {          local->op_ret = -1;          local->op_errno = afr_final_errno(local, priv); @@ -4651,7 +4652,7 @@ afr_lease_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,                            priv->children[child_index]->fops->lease, &local->loc,                            &local->cont.lease.user_lease, xdata);      } else if (priv->quorum_count && -               !afr_has_quorum(local->cont.lease.locked_nodes, this)) { +               !afr_has_quorum(local->cont.lease.locked_nodes, this, NULL)) {          local->op_ret = -1;          local->op_errno = afr_final_errno(local, priv); @@ -4889,7 +4890,7 @@ afr_priv_dump(xlator_t *this)          gf_proc_dump_write("quorum-type", "fixed");          gf_proc_dump_write("quorum-count", "%d", priv->quorum_count);      } -    gf_proc_dump_write("up", "%u", afr_has_quorum(priv->child_up, this)); +    gf_proc_dump_write("up", "%u", afr_has_quorum(priv->child_up, this, NULL));      if (priv->thin_arbiter_count) {          gf_proc_dump_write("ta_child_up", "%d", priv->ta_child_up);          gf_proc_dump_write("ta_bad_child_index", "%d", @@ -5467,7 +5468,8 @@ afr_notify(xlator_t *this, int32_t event, void *data, void *data2)          goto out;      } -    had_quorum = priv->quorum_count && afr_has_quorum(priv->child_up, this); +    had_quorum = priv->quorum_count && +                 afr_has_quorum(priv->child_up, this, NULL);      if (priv->halo_enabled) {          halo_max_latency_msec = afr_get_halo_latency(this); @@ -5582,7 +5584,7 @@ afr_notify(xlator_t *this, int32_t event, void *data, void *data2)      UNLOCK(&priv->lock);      if (priv->quorum_count) { -        has_quorum = afr_has_quorum(priv->child_up, this); +        has_quorum = afr_has_quorum(priv->child_up, this, NULL);          if (!had_quorum && has_quorum) {              gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_QUORUM_MET,                     "Client-quorum is met"); @@ -6987,3 +6989,42 @@ afr_ta_has_quorum(afr_private_t *priv, afr_local_t *local)      return _gf_false;  } + +gf_boolean_t +afr_is_add_replica_mount_lookup_on_root(call_frame_t *frame) +{ +    afr_local_t *local = NULL; + +    local = frame->local; + +    if (frame->root->pid != GF_CLIENT_PID_ADD_REPLICA_MOUNT) +        return _gf_false; + +    if (local->op != GF_FOP_LOOKUP) +        /* TODO:If the replica count is being increased on a plain distribute +         * volume that was never mounted, we need to allow setxattr on '/' with +         * GF_CLIENT_PID_NO_ROOT_SQUASH to accomodate for DHT layout setting */ +        return _gf_false; + +    if (local->inode == NULL) +        return _gf_false; + +    if (!__is_root_gfid(local->inode->gfid)) +        return _gf_false; + +    return _gf_true; +} + +gf_boolean_t +afr_lookup_has_quorum(call_frame_t *frame, xlator_t *this, +                      unsigned char *subvols) +{ +    afr_private_t *priv = this->private; + +    if (frame && afr_is_add_replica_mount_lookup_on_root(frame)) { +        if (AFR_COUNT(subvols, priv->child_count) > 0) +            return _gf_true; +    } + +    return _gf_false; +} diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index 4f6e6a906dc..184244d6cd8 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -1489,7 +1489,7 @@ afr_handle_empty_brick(xlator_t *this, call_frame_t *frame, loc_t *loc,      if (ret && ab_ret)          goto out; -    if (frame->root->pid != GF_CLIENT_PID_SELF_HEALD) { +    if (frame->root->pid != GF_CLIENT_PID_ADD_REPLICA_MOUNT) {          gf_msg(this->name, GF_LOG_ERROR, EPERM, afr_get_msg_id(op_type),                 "'%s' is an internal extended attribute.", op_type);          ret = 1; diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c index 1cd5c2eee3b..9a91f2e56fc 100644 --- a/xlators/cluster/afr/src/afr-read-txn.c +++ b/xlators/cluster/afr/src/afr-read-txn.c @@ -431,7 +431,7 @@ afr_read_txn(call_frame_t *frame, xlator_t *this, inode_t *inode,      local->is_read_txn = _gf_true;      local->transaction.type = type; -    if (priv->quorum_count && !afr_has_quorum(local->child_up, this)) { +    if (priv->quorum_count && !afr_has_quorum(local->child_up, this, NULL)) {          local->op_ret = -1;          local->op_errno = afr_quorum_errno(priv);          goto read; diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index ab3a121efe3..8d97ff53253 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -242,7 +242,7 @@ afr_changelog_has_quorum(afr_local_t *local, xlator_t *this)          }      } -    if (afr_has_quorum(success_children, this)) { +    if (afr_has_quorum(success_children, this, NULL)) {          return _gf_true;      } @@ -869,7 +869,7 @@ afr_handle_symmetric_errors(call_frame_t *frame, xlator_t *this)  }  gf_boolean_t -afr_has_quorum(unsigned char *subvols, xlator_t *this) +afr_has_quorum(unsigned char *subvols, xlator_t *this, call_frame_t *frame)  {      unsigned int quorum_count = 0;      afr_private_t *priv = NULL; @@ -878,6 +878,9 @@ afr_has_quorum(unsigned char *subvols, xlator_t *this)      priv = this->private;      up_children_count = AFR_COUNT(subvols, priv->child_count); +    if (afr_lookup_has_quorum(frame, this, subvols)) +        return _gf_true; +      if (priv->quorum_count == AFR_QUORUM_AUTO) {          /*           * Special case for auto-quorum with an even number of nodes. @@ -932,7 +935,7 @@ afr_has_fop_quorum(call_frame_t *frame)      locked_nodes = afr_locked_nodes_get(local->transaction.type,                                          &local->internal_lock); -    return afr_has_quorum(locked_nodes, this); +    return afr_has_quorum(locked_nodes, this, NULL);  }  static gf_boolean_t @@ -950,7 +953,7 @@ afr_has_fop_cbk_quorum(call_frame_t *frame)                  success[i] = 1;      } -    return afr_has_quorum(success, this); +    return afr_has_quorum(success, this, NULL);  }  gf_boolean_t @@ -2882,7 +2885,7 @@ afr_transaction(call_frame_t *frame, xlator_t *this, afr_transaction_type type)      local->transaction.type = type; -    if (priv->quorum_count && !afr_has_quorum(local->child_up, this)) { +    if (priv->quorum_count && !afr_has_quorum(local->child_up, this, NULL)) {          ret = -afr_quorum_errno(priv);          goto out;      } diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h index 35a922544bc..beefa26f4a6 100644 --- a/xlators/cluster/afr/src/afr-transaction.h +++ b/xlators/cluster/afr/src/afr-transaction.h @@ -48,7 +48,7 @@ afr_pending_read_decrement(afr_private_t *priv, int child_index);  call_frame_t *  afr_transaction_detach_fop_frame(call_frame_t *frame);  gf_boolean_t -afr_has_quorum(unsigned char *subvols, xlator_t *this); +afr_has_quorum(unsigned char *subvols, xlator_t *this, call_frame_t *frame);  gf_boolean_t  afr_needs_changelog_update(afr_local_t *local);  void diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 78eff154a80..ce04d5efbfe 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -242,7 +242,7 @@ reconfigure(xlator_t *this, dict_t *options)      GF_OPTION_RECONF("quorum-type", qtype, options, str, out);      GF_OPTION_RECONF("quorum-count", priv->quorum_count, options, uint32, out);      fix_quorum_options(this, priv, qtype, options); -    if (priv->quorum_count && !afr_has_quorum(priv->child_up, this)) +    if (priv->quorum_count && !afr_has_quorum(priv->child_up, this, NULL))          gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_FAIL,                 "Client-quorum is not met"); diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 489fdc738ab..9c12d2d9ac0 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -1327,4 +1327,8 @@ afr_ta_has_quorum(afr_private_t *priv, afr_local_t *local);  void  afr_ta_lock_release_synctask(xlator_t *this); + +gf_boolean_t +afr_lookup_has_quorum(call_frame_t *frame, xlator_t *this, +                      unsigned char *subvols);  #endif /* __AFR_H__ */ diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index e9b30504a0c..ae6d4cedae1 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -13837,7 +13837,7 @@ glusterd_handle_replicate_brick_ops(glusterd_volinfo_t *volinfo,          goto out;      } -    ret = gf_asprintf(&pid, "%d", GF_CLIENT_PID_SELF_HEALD); +    ret = gf_asprintf(&pid, "%d", GF_CLIENT_PID_ADD_REPLICA_MOUNT);      if (ret < 0)          goto out;  | 
