diff options
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 93 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-messages.h | 3 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 107 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.h | 5 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.c | 25 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 4 | 
6 files changed, 229 insertions, 8 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 01a5db54bdd..8752e98c8df 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -2901,10 +2901,8 @@ afr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,  	return 0;  } - -  static void -afr_discover_done (call_frame_t *frame, xlator_t *this) +afr_discover_unwind (call_frame_t *frame, xlator_t *this)  {          afr_private_t       *priv  = NULL;          afr_local_t         *local = NULL; @@ -2966,6 +2964,84 @@ unwind:  			  &local->replies[read_subvol].postparent);  } +static int +afr_ta_id_file_check (void *opaque) +{ +        afr_private_t *priv = NULL; +        xlator_t *this = NULL; +        loc_t loc = {0, }; +        struct iatt  stbuf = {0,}; +        dict_t *dict = NULL; +        uuid_t  gfid = {0,}; +        fd_t *fd = NULL; +        int ret = 0; + +        this = opaque; +        priv = this->private; + +        ret = afr_fill_ta_loc (this, &loc); +        if (ret) +                goto out; + +        ret = syncop_lookup (priv->children[THIN_ARBITER_BRICK_INDEX], &loc, +                             &stbuf, 0, 0, 0); +        if (ret == 0) { +                goto out; +        } else if (ret == -ENOENT) { +                fd = fd_create (loc.inode, getpid()); +                if (!fd) +                        goto out; +                dict = dict_new (); +                if (!dict) +                        goto out; +                gf_uuid_generate (gfid); +                ret = dict_set_gfuuid (dict, "gfid-req", gfid, true); +                ret = syncop_create (priv->children[THIN_ARBITER_BRICK_INDEX], +                                     &loc, O_RDWR, 0664, fd, &stbuf, dict, +                                     NULL); +        } + +out: +        if (ret == 0) { +                gf_uuid_copy (priv->ta_gfid, stbuf.ia_gfid); +        } else { +                gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, +                        "Failed to lookup/create thin-arbiter id file."); +        } +        if (dict) +                dict_unref (dict); +        if (fd) +                fd_unref (fd); +        loc_wipe (&loc); + +        return 0; +} + +static int +afr_ta_id_file_check_cbk (int ret, call_frame_t *ta_frame, void *opaque) +{ +        return 0; +} + +static void +afr_discover_done (call_frame_t *frame, xlator_t *this) +{ +        int ret = 0; +        afr_private_t *priv = NULL; + +        priv = this->private; +        if (!priv->thin_arbiter_count) +                goto unwind; +        if (!gf_uuid_is_null(priv->ta_gfid)) +                goto unwind; + +        ret = synctask_new (this->ctx->env, afr_ta_id_file_check, +                            afr_ta_id_file_check_cbk, NULL, this); +        if (ret) +                goto unwind; +unwind: +        afr_discover_unwind (frame, this); +}  int  afr_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -5514,15 +5590,22 @@ afr_set_low_priority (call_frame_t *frame)  void  afr_priv_destroy (afr_private_t *priv)  { -        int            i           = 0; +        int i = 0; +        int child_count = -1;          if (!priv)                  goto out;          GF_FREE (priv->last_event); + +        child_count = priv->child_count; +        if (priv->thin_arbiter_count) { +                child_count++; +        }          if (priv->pending_key) { -                for (i = 0; i < priv->child_count; i++) +                for (i = 0; i < child_count; i++)                          GF_FREE (priv->pending_key[i]);          } +          GF_FREE (priv->pending_reads);          GF_FREE (priv->local);          GF_FREE (priv->pending_key); diff --git a/xlators/cluster/afr/src/afr-messages.h b/xlators/cluster/afr/src/afr-messages.h index 743bd945821..8aa94730158 100644 --- a/xlators/cluster/afr/src/afr-messages.h +++ b/xlators/cluster/afr/src/afr-messages.h @@ -66,7 +66,8 @@ GLFS_MSGID(AFR,          AFR_MSG_NO_CHANGELOG,          AFR_MSG_TIMER_CREATE_FAIL,          AFR_MSG_SBRAIN_FAV_CHILD_POLICY, -        AFR_MSG_INODE_CTX_GET_FAILED +        AFR_MSG_INODE_CTX_GET_FAILED, +        AFR_MSG_THIN_ARB  );  #endif /* !_AFR_MESSAGES_H_ */ diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 8bb096775c2..b4d3062fc2b 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -635,6 +635,14 @@ afr_txn_nothing_failed (call_frame_t *frame, xlator_t *this)          local = frame->local;  	priv = this->private; +        if (priv->thin_arbiter_count) { +                /* We need to perform post-op even if 1 data brick was down +                 * before the txn started.*/ +                if (AFR_COUNT (local->transaction.failed_subvols, +                               priv->child_count)) +                        return _gf_false; +        } +          for (i = 0; i < priv->child_count; i++) {                  if (local->transaction.pre_op[i] &&                      local->transaction.failed_subvols[i]) @@ -825,6 +833,97 @@ afr_handle_quorum (call_frame_t *frame)  }  int +afr_fill_ta_loc (xlator_t *this, loc_t *loc) +{ +        afr_private_t *priv = NULL; + +        priv = this->private; +        loc->parent = inode_ref (priv->root_inode); +        gf_uuid_copy (loc->pargfid, loc->parent->gfid); +        loc->name = priv->pending_key[THIN_ARBITER_BRICK_INDEX]; +        gf_uuid_copy (loc->gfid, priv->ta_gfid); +        loc->inode = inode_new (loc->parent->table); +        if (!loc->inode) +                return -ENOMEM; +        return 0; +} + +int +afr_changelog_thin_arbiter_post_op (xlator_t *this, afr_local_t *local) +{ +        int ret = 0; +        afr_private_t *priv = NULL; +        dict_t *xattr = NULL; +        int failed_count = 0; +        struct gf_flock flock = {0, }; +        loc_t loc = {0,}; +        int i = 0; + +        priv = this->private; +        if (!priv->thin_arbiter_count) +                return 0; + + +        failed_count = AFR_COUNT (local->transaction.failed_subvols, +                                  priv->child_count); +        if (!failed_count) +                return 0; + +        GF_ASSERT (failed_count == 1); +        ret = afr_fill_ta_loc (this, &loc); +        if (ret) +                goto out; + +        xattr = dict_new (); +        if (!xattr) { +                ret = -ENOMEM; +                goto out; +        } +        for (i = 0; i < priv->child_count; i++) { +                ret = dict_set_static_bin (xattr, priv->pending_key[i], +                                           local->pending[i], +                                           AFR_NUM_CHANGE_LOGS * sizeof (int)); +                if (ret) +                        goto out; +        } + +        flock.l_type = F_WRLCK; +        flock.l_start = 0; +        flock.l_len = 0; + +        /*TODO: Convert to two domain locking. */ +        ret = syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX], +                              THIN_ARBITER_DOM1, &loc, F_SETLKW, &flock, +                              NULL, NULL); +        if (ret) +                goto out; + +        ret = syncop_xattrop (priv->children[THIN_ARBITER_BRICK_INDEX], &loc, +                              GF_XATTROP_ADD_ARRAY, xattr, NULL, NULL, NULL); + +        if (ret == -EINVAL) { +                gf_msg (this->name, GF_LOG_INFO, -ret, AFR_MSG_THIN_ARB, +                        "Thin-arbiter has denied post-op on %s for gfid %s.", +                        priv->pending_key[THIN_ARBITER_BRICK_INDEX], +                        uuid_utoa (local->inode->gfid)); + +        } else if (ret) { +                gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, +                        "Post-op on thin-arbiter id file %s failed for gfid %s.", +                        priv->pending_key[THIN_ARBITER_BRICK_INDEX], +                        uuid_utoa (local->inode->gfid)); +        } +        flock.l_type = F_UNLCK; +        syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX], +                        THIN_ARBITER_DOM1, &loc, F_SETLKW, &flock, NULL, NULL); +out: +        if (xattr) +                dict_unref (xattr); + +        return ret; +} + +int  afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)  {          afr_private_t           *priv           = this->private; @@ -885,6 +984,14 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)  		goto out;  	} +        ret = afr_changelog_thin_arbiter_post_op (this, local); +        if (ret < 0) { +                local->op_ret = -1; +                local->op_errno = -ret; +		afr_changelog_post_op_done (frame, this); +                goto out; +        } +          if (need_undirty)  		local->dirty[idx] = hton32(-1);  	else diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h index cb62c185938..629f6dd557c 100644 --- a/xlators/cluster/afr/src/afr-transaction.h +++ b/xlators/cluster/afr/src/afr-transaction.h @@ -54,8 +54,13 @@ afr_pick_error_xdata (afr_local_t *local, afr_private_t *priv,                        inode_t *inode2, unsigned char *readable2);  int  afr_transaction_resume (call_frame_t *frame, xlator_t *this); +  int  afr_lock (call_frame_t *frame, xlator_t *this); +  void  afr_delayed_changelog_wake_up_cbk (void *data); + +int +afr_fill_ta_loc (xlator_t *this, loc_t *loc);  #endif /* __TRANSACTION_H__ */ diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index cb4b1537984..27cee590b4b 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -303,12 +303,20 @@ afr_pending_xattrs_init (afr_private_t *priv, xlator_t *this)          char *ptr1 = NULL;          char *xattrs_list = NULL;          xlator_list_t *trav = NULL; +        int child_count = -1;          trav = this->children; +        child_count = priv->child_count; +        if (priv->thin_arbiter_count) { +                /* priv->pending_key[THIN_ARBITER_BRICK_INDEX] is used as the +                 * name of the thin arbiter file for persistance across add/ +                 * removal of DHT subvols.*/ +                child_count++; +        }          GF_OPTION_INIT ("afr-pending-xattr", xattrs_list, str, out);          priv->pending_key = GF_CALLOC (sizeof (*priv->pending_key), -                                       priv->child_count, gf_afr_mt_char); +                                       child_count, gf_afr_mt_char);          if (!priv->pending_key) {                  ret = -ENOMEM;                  goto out; @@ -318,7 +326,7 @@ afr_pending_xattrs_init (afr_private_t *priv, xlator_t *this)                          "Unable to fetch afr-pending-xattr option from volfile."                          " Falling back to using client translator names. "); -                while (i < priv->child_count) { +                while (i < child_count) {                          ret = gf_asprintf (&priv->pending_key[i], "%s.%s",                                             AFR_XATTR_PREFIX,                                             trav->xlator->name); @@ -368,6 +376,7 @@ init (xlator_t *this)          int            read_subvol_index = -1;          char          *qtype       = NULL;          char          *fav_child_policy = NULL; +        char          *thin_arbiter = NULL;          if (!this->children) {                  gf_msg (this->name, GF_LOG_ERROR, 0, @@ -397,6 +406,11 @@ init (xlator_t *this)          priv->read_child = -1;          GF_OPTION_INIT ("arbiter-count", priv->arbiter_count, uint32, out); +        GF_OPTION_INIT ("thin-arbiter", thin_arbiter, str, out); +        if (thin_arbiter && strlen(thin_arbiter) > 0) { +                priv->thin_arbiter_count = 1; +                priv->child_count--; +        }          INIT_LIST_HEAD (&priv->healing);          INIT_LIST_HEAD (&priv->heal_waiting); @@ -1103,6 +1117,13 @@ struct volume_options options[] = {            .type = GF_OPTION_TYPE_INT,            .description = "subset of child_count. Has to be 0 or 1."          }, +        { .key = {"thin-arbiter"}, +          .type = GF_OPTION_TYPE_STR, +          .op_version = {GD_OP_VERSION_4_1_0}, +          .flags = OPT_FLAG_SETTABLE, +          .tags = {"replicate"}, +          .description = "contains host:path of thin abriter brick", +        },          { .key   = {"shd-max-threads"},            .type  = GF_OPTION_TYPE_INT,            .min   = 1, diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index b96be62a910..fd75de45341 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -35,6 +35,8 @@  #define AFR_DEFAULT_SPB_CHOICE_TIMEOUT 300 /*in seconds*/  #define ARBITER_BRICK_INDEX 2 +#define THIN_ARBITER_BRICK_INDEX 2 +#define THIN_ARBITER_DOM1 "afr.ta.domain-1"  #define AFR_HALO_MAX_LATENCY 99999  typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this); @@ -81,10 +83,12 @@ typedef struct _afr_private {          unsigned int child_count;     /* total number of children   */          unsigned int arbiter_count;   /*subset of child_count.                                          Has to be 0 or 1.*/ +        unsigned int thin_arbiter_count; /* 0 or 1 at the moment.*/          xlator_t **children;          inode_t *root_inode; +        uuid_t ta_gfid; /*For thin arbiter.*/          unsigned char *child_up;          int64_t *child_latency;  | 
