diff options
Diffstat (limited to 'xlators')
49 files changed, 2735 insertions, 398 deletions
diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c index 9027e2a33..a8e0c21ab 100644 --- a/xlators/cluster/afr/src/pump.c +++ b/xlators/cluster/afr/src/pump.c @@ -1678,6 +1678,7 @@ pump_setxattr (call_frame_t *frame, xlator_t *this, ret = afr_local_init (local, priv, &op_errno); if (ret < 0) { afr_local_cleanup (local, this); + mem_put (local); goto out; } @@ -2389,7 +2390,7 @@ int32_t init (xlator_t *this) { afr_private_t * priv = NULL; - pump_private_t *pump_priv = NULL; + pump_private_t *pump_priv = NULL; int child_count = 0; xlator_list_t * trav = NULL; int i = 0; @@ -2410,12 +2411,10 @@ init (xlator_t *this) "Volume is dangling."); } - this->private = GF_CALLOC (1, sizeof (afr_private_t), - gf_afr_mt_afr_private_t); - if (!this->private) + priv = GF_CALLOC (1, sizeof (afr_private_t), gf_afr_mt_afr_private_t); + if (!priv) goto out; - priv = this->private; LOCK_INIT (&priv->lock); LOCK_INIT (&priv->read_child_lock); //lock recovery is not done in afr @@ -2455,7 +2454,6 @@ init (xlator_t *this) */ priv->strict_readdir = _gf_false; - priv->wait_count = 1; priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count, gf_afr_mt_char); @@ -2490,7 +2488,8 @@ init (xlator_t *this) while (i < child_count) { priv->children[i] = trav->xlator; - ret = gf_asprintf (&priv->pending_key[i], "%s.%s", AFR_XATTR_PREFIX, + ret = gf_asprintf (&priv->pending_key[i], "%s.%s", + AFR_XATTR_PREFIX, trav->xlator->name); if (-1 == ret) { gf_log (this->name, GF_LOG_ERROR, @@ -2534,8 +2533,7 @@ init (xlator_t *this) pump_priv->resume_path = GF_CALLOC (1, PATH_MAX, gf_afr_mt_char); if (!pump_priv->resume_path) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); + gf_log (this->name, GF_LOG_ERROR, "Out of memory"); ret = -1; goto out; } @@ -2558,11 +2556,33 @@ init (xlator_t *this) } priv->pump_private = pump_priv; + pump_priv = NULL; + + this->private = priv; + priv = NULL; pump_change_state (this, PUMP_STATE_ABORT); ret = 0; out: + + if (pump_priv) { + GF_FREE (pump_priv->resume_path); + LOCK_DESTROY (&pump_priv->resume_path_lock); + LOCK_DESTROY (&pump_priv->pump_state_lock); + GF_FREE (pump_priv); + } + + if (priv) { + GF_FREE (priv->child_up); + GF_FREE (priv->children); + GF_FREE (priv->pending_key); + GF_FREE (priv->last_event); + LOCK_DESTROY (&priv->lock); + LOCK_DESTROY (&priv->read_child_lock); + GF_FREE (priv); + } + return ret; } diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index cf4ec258d..3055f4615 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -1843,10 +1843,13 @@ dht_vgetxattr_fill_and_set (dht_local_t *local, dict_t **dict, xlator_t *this, } else { gf_log (this->name, GF_LOG_WARNING, "Unknown local->xsel (%s)", local->xsel); + GF_FREE (xattr_buf); goto out; } ret = dict_set_dynstr (*dict, local->xsel, xattr_buf); + if (ret) + GF_FREE (xattr_buf); GF_FREE (local->xattr_val); out: @@ -3714,12 +3717,12 @@ dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, } hashed_subvol = dht_subvol_get_hashed (this, loc); + /* Dont fail unlink if hashed_subvol is NULL which can be the result + * of layout anomaly */ if (!hashed_subvol) { gf_log (this->name, GF_LOG_DEBUG, "no subvolume in layout for path=%s", loc->path); - op_errno = EINVAL; - goto err; } cached_subvol = local->cached_subvol; @@ -3731,7 +3734,7 @@ dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, } local->flags = xflag; - if (hashed_subvol != cached_subvol) { + if (hashed_subvol && hashed_subvol != cached_subvol) { STACK_WIND (frame, dht_unlink_linkfile_cbk, hashed_subvol, hashed_subvol->fops->unlink, loc, xflag, xdata); @@ -4611,17 +4614,85 @@ err: int +dht_rmdir_cached_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *stbuf, dict_t *xattr, + struct iatt *parent) +{ + dht_local_t *local = NULL; + xlator_t *src = NULL; + call_frame_t *main_frame = NULL; + dht_local_t *main_local = NULL; + int this_call_cnt = 0; + dht_conf_t *conf = this->private; + dict_t *xattrs = NULL; + int ret = 0; + + local = frame->local; + src = local->hashed_subvol; + + main_frame = local->main_frame; + main_local = main_frame->local; + + if (op_ret == 0) { + main_local->op_ret = -1; + main_local->op_errno = ENOTEMPTY; + + gf_log (this->name, GF_LOG_WARNING, + "%s found on cached subvol %s", + local->loc.path, src->name); + goto err; + } else if (op_errno != ENOENT) { + main_local->op_ret = -1; + main_local->op_errno = op_errno; + goto err; + } + + xattrs = dict_new (); + if (!xattrs) { + gf_log (this->name, GF_LOG_ERROR, "dict_new failed"); + goto err; + } + + ret = dict_set_uint32 (xattrs, conf->link_xattr_name, 256); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to set linkto key" + " in dict"); + if (xattrs) + dict_unref (xattrs); + goto err; + } + + STACK_WIND (frame, dht_rmdir_lookup_cbk, + src, src->fops->lookup, &local->loc, xattrs); + if (xattrs) + dict_unref (xattrs); + + return 0; +err: + + this_call_cnt = dht_frame_return (main_frame); + if (is_last_call (this_call_cnt)) + dht_rmdir_do (main_frame, this); + + DHT_STACK_DESTROY (frame); + return 0; +} + + +int dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this, gf_dirent_t *entries, xlator_t *src) { - int ret = 0; - int build_ret = 0; - gf_dirent_t *trav = NULL; + int ret = 0; + int build_ret = 0; + gf_dirent_t *trav = NULL; call_frame_t *lookup_frame = NULL; dht_local_t *lookup_local = NULL; - dht_local_t *local = NULL; - dict_t *xattrs = NULL; - dht_conf_t *conf = this->private; + dht_local_t *local = NULL; + dict_t *xattrs = NULL; + dht_conf_t *conf = this->private; + xlator_t *subvol = NULL; local = frame->local; @@ -4681,6 +4752,7 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this, lookup_frame->local = lookup_local; lookup_local->main_frame = frame; + lookup_local->hashed_subvol = src; build_ret = dht_build_child_loc (this, &lookup_local->loc, &local->loc, trav->d_name); @@ -4699,9 +4771,20 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this, } UNLOCK (&frame->lock); - STACK_WIND (lookup_frame, dht_rmdir_lookup_cbk, - src, src->fops->lookup, - &lookup_local->loc, xattrs); + subvol = dht_linkfile_subvol (this, NULL, &trav->d_stat, + trav->dict); + if (!subvol) { + gf_log (this->name, GF_LOG_INFO, + "linkfile not having link subvolume. path=%s", + lookup_local->loc.path); + STACK_WIND (lookup_frame, dht_rmdir_lookup_cbk, + src, src->fops->lookup, + &lookup_local->loc, xattrs); + } else { + STACK_WIND (lookup_frame, dht_rmdir_cached_lookup_cbk, + subvol, subvol->fops->lookup, + &lookup_local->loc, xattrs); + } ret++; } @@ -4767,16 +4850,18 @@ int dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, fd_t *fd, dict_t *xdata) { - dht_local_t *local = NULL; + dht_local_t *local = NULL; int this_call_cnt = -1; - call_frame_t *prev = NULL; - dict_t *dict = NULL; - int ret = 0; - dht_conf_t *conf = this->private; + call_frame_t *prev = NULL; + dict_t *dict = NULL; + int ret = 0; + dht_conf_t *conf = this->private; + int i = 0; local = frame->local; prev = cookie; + this_call_cnt = dht_frame_return (frame); if (op_ret == -1) { gf_log (this->name, GF_LOG_DEBUG, "opendir on %s for %s failed (%s)", @@ -4789,6 +4874,12 @@ dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto err; } + if (!is_last_call (this_call_cnt)) + return 0; + + if (local->op_ret == -1) + goto err; + dict = dict_new (); if (!dict) { local->op_ret = -1; @@ -4802,9 +4893,13 @@ dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, "%s: failed to set '%s' key", local->loc.path, conf->link_xattr_name); - STACK_WIND (frame, dht_rmdir_readdirp_cbk, - prev->this, prev->this->fops->readdirp, - local->fd, 4096, 0, dict); + local->call_cnt = conf->subvolume_cnt; + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND (frame, dht_rmdir_readdirp_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->readdirp, + local->fd, 4096, 0, dict); + } if (dict) dict_unref (dict); @@ -4812,8 +4907,6 @@ dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, return 0; err: - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { dht_rmdir_do (frame, this); } diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index 381643b22..18a501f04 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -884,8 +884,12 @@ dht_migration_complete_check_task (void *data) if (fd_is_anonymous (iter_fd)) continue; + /* flags for open are stripped down to allow following the + * new location of the file, otherwise we can get EEXIST or + * truncate the file again as rebalance is moving the data */ ret = syncop_open (dst_node, &tmp_loc, - iter_fd->flags, iter_fd); + (iter_fd->flags & + ~(O_CREAT | O_EXCL | O_TRUNC)), iter_fd); if (ret < 0) { gf_log (this->name, GF_LOG_ERROR, "failed to open " "the fd (%p, flags=0%o) on file %s @ %s", @@ -1034,8 +1038,12 @@ dht_rebalance_inprogress_task (void *data) if (fd_is_anonymous (iter_fd)) continue; + /* flags for open are stripped down to allow following the + * new location of the file, otherwise we can get EEXIST or + * truncate the file again as rebalance is moving the data */ ret = syncop_open (dst_node, &tmp_loc, - iter_fd->flags, iter_fd); + (iter_fd->flags & + ~(O_CREAT | O_EXCL | O_TRUNC)), iter_fd); if (ret < 0) { gf_log (this->name, GF_LOG_ERROR, "failed to send open " "the fd (%p, flags=0%o) on file %s @ %s", diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c index 1e38d6be1..31d85a506 100644 --- a/xlators/cluster/dht/src/dht-layout.c +++ b/xlators/cluster/dht/src/dht-layout.c @@ -465,11 +465,8 @@ dht_layout_entry_cmp (dht_layout_t *layout, int i, int j) - (int64_t) layout->list[j].stop; goto out; } - if (layout->list[i].err || layout->list[j].err) - diff = layout->list[i].err - layout->list[j].err; - else - diff = (int64_t) layout->list[i].start - - (int64_t) layout->list[j].start; + diff = (int64_t) layout->list[i].start + - (int64_t) layout->list[j].start; out: return diff; @@ -536,8 +533,20 @@ dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, char is_virgin = 1; uint32_t no_space = 0; - /* TODO: explain what is happening */ - + /* This funtion scans through the layout spread of a directory to + check if there are any anomalies. Prior to calling this function + the layout entries should be sorted in the ascending order. + + If the layout entry has err != 0 + then increment the corresponding anomaly. + else + if (start of the current layout entry > stop + 1 of previous + non erroneous layout entry) + then it indicates a hole in the layout + if (start of the current layout entry < stop + 1 of previous + non erroneous layout entry) + then it indicates an overlap in the layout + */ last_stop = layout->list[0].start - 1; prev_stop = last_stop; diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c index 324b30626..36c073973 100644 --- a/xlators/cluster/dht/src/dht-shared.c +++ b/xlators/cluster/dht/src/dht-shared.c @@ -103,20 +103,20 @@ dht_priv_dump (xlator_t *this) this->name); gf_proc_dump_write("subvol_cnt","%d", conf->subvolume_cnt); for (i = 0; i < conf->subvolume_cnt; i++) { - sprintf (key, "subvolumes[%d]", i); + snprintf (key, sizeof (key), "subvolumes[%d]", i); gf_proc_dump_write(key, "%s.%s", conf->subvolumes[i]->type, conf->subvolumes[i]->name); if (conf->file_layouts && conf->file_layouts[i]){ - sprintf (key, "file_layouts[%d]", i); + snprintf (key, sizeof (key), "file_layouts[%d]", i); dht_layout_dump(conf->file_layouts[i], key); } if (conf->dir_layouts && conf->dir_layouts[i]) { - sprintf (key, "dir_layouts[%d]", i); + snprintf (key, sizeof (key), "dir_layouts[%d]", i); dht_layout_dump(conf->dir_layouts[i], key); } if (conf->subvolume_status) { - sprintf (key, "subvolume_status[%d]", i); + snprintf (key, sizeof (key), "subvolume_status[%d]", i); gf_proc_dump_write(key, "%d", (int)conf->subvolume_status[i]); } @@ -130,14 +130,35 @@ dht_priv_dump (xlator_t *this) gf_proc_dump_write("disk_unit", "%c", conf->disk_unit); gf_proc_dump_write("refresh_interval", "%d", conf->refresh_interval); gf_proc_dump_write("unhashed_sticky_bit", "%d", conf->unhashed_sticky_bit); - if (conf ->du_stats) { - gf_proc_dump_write("du_stats.avail_percent", "%lf", - conf->du_stats->avail_percent); - gf_proc_dump_write("du_stats.avail_space", "%lu", - conf->du_stats->avail_space); - gf_proc_dump_write("du_stats.avail_inodes", "%lf", - conf->du_stats->avail_inodes); - gf_proc_dump_write("du_stats.log", "%lu", conf->du_stats->log); + + if (conf->du_stats) { + for (i = 0; i < conf->subvolume_cnt; i++) { + if (!conf->subvolume_status[i]) + continue; + + snprintf (key, sizeof (key), "subvolumes[%d]", i); + gf_proc_dump_write (key, "%s", + conf->subvolumes[i]->name); + + snprintf (key, sizeof (key), + "du_stats[%d].avail_percent", i); + gf_proc_dump_write (key, "%lf", + conf->du_stats[i].avail_percent); + + snprintf (key, sizeof (key), "du_stats[%d].avail_space", + i); + gf_proc_dump_write (key, "%lu", + conf->du_stats[i].avail_space); + + snprintf (key, sizeof (key), + "du_stats[%d].avail_inodes", i); + gf_proc_dump_write (key, "%lf", + conf->du_stats[i].avail_inodes); + + snprintf (key, sizeof (key), "du_stats[%d].log", i); + gf_proc_dump_write (key, "%lu", + conf->du_stats[i].log); + } } if (conf->last_stat_fetch.tv_sec) diff --git a/xlators/features/changelog/lib/src/gf-changelog-helpers.h b/xlators/features/changelog/lib/src/gf-changelog-helpers.h index f35220ccb..e4cf506c4 100644 --- a/xlators/features/changelog/lib/src/gf-changelog-helpers.h +++ b/xlators/features/changelog/lib/src/gf-changelog-helpers.h @@ -51,7 +51,7 @@ typedef struct gf_changelog { /* connection retries */ int gfc_connretries; - char gfc_sockpath[PATH_MAX]; + char gfc_sockpath[UNIX_PATH_MAX]; char gfc_brickpath[PATH_MAX]; diff --git a/xlators/features/changelog/lib/src/gf-changelog.c b/xlators/features/changelog/lib/src/gf-changelog.c index ca8e373e7..4b2b25ad5 100644 --- a/xlators/features/changelog/lib/src/gf-changelog.c +++ b/xlators/features/changelog/lib/src/gf-changelog.c @@ -173,7 +173,7 @@ gf_changelog_notification_init (xlator_t *this, gf_changelog_t *gfc) } CHANGELOG_MAKE_SOCKET_PATH (gfc->gfc_brickpath, - gfc->gfc_sockpath, PATH_MAX); + gfc->gfc_sockpath, UNIX_PATH_MAX); gf_log (this->name, GF_LOG_INFO, "connecting to changelog socket: %s (brick: %s)", gfc->gfc_sockpath, gfc->gfc_brickpath); @@ -202,6 +202,7 @@ gf_changelog_notification_init (xlator_t *this, gf_changelog_t *gfc) gf_log (this->name, GF_LOG_ERROR, "could not connect to changelog socket!" " bailing out..."); + close (sockfd); ret = -1; } else gf_log (this->name, GF_LOG_INFO, diff --git a/xlators/features/changelog/src/changelog-helpers.h b/xlators/features/changelog/src/changelog-helpers.h index 2412d810e..4c3d5405b 100644 --- a/xlators/features/changelog/src/changelog-helpers.h +++ b/xlators/features/changelog/src/changelog-helpers.h @@ -246,7 +246,7 @@ typedef struct changelog_notify { pthread_t notify_th; /* unique socket path */ - char sockpath[PATH_MAX]; + char sockpath[UNIX_PATH_MAX]; int socket_fd; diff --git a/xlators/features/changelog/src/changelog-notifier.c b/xlators/features/changelog/src/changelog-notifier.c index 1f8b31253..5f3d063a8 100644 --- a/xlators/features/changelog/src/changelog-notifier.c +++ b/xlators/features/changelog/src/changelog-notifier.c @@ -178,7 +178,7 @@ changelog_notifier (void *data) } CHANGELOG_MAKE_SOCKET_PATH (priv->changelog_brick, - cn->sockpath, PATH_MAX); + cn->sockpath, UNIX_PATH_MAX); if (unlink (cn->sockpath) < 0) { if (errno != ENOENT) { gf_log (this->name, GF_LOG_ERROR, diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c index db592719b..5edfeda8f 100644 --- a/xlators/features/index/src/index.c +++ b/xlators/features/index/src/index.c @@ -371,6 +371,7 @@ sync_base_indices (void *index_priv) char xattrop_directory[PATH_MAX] = {0}; char base_index_path[PATH_MAX] = {0}; char xattrop_index_path[PATH_MAX] = {0}; + int32_t op_errno = 0; int ret = 0; priv = index_priv; @@ -381,11 +382,14 @@ sync_base_indices (void *index_priv) XATTROP_SUBDIR); if ((dir_base_holder = opendir(base_indices_holder)) == NULL) { + op_errno = errno; ret = -1; goto out; } if ((xattrop_dir = opendir (xattrop_directory)) == NULL) { + op_errno = errno; ret = -1; + (void) closedir (dir_base_holder); goto out; } @@ -410,20 +414,30 @@ sync_base_indices (void *index_priv) ret = sys_link (xattrop_index_path, base_index_path); - if (ret && errno != EEXIST) + if (ret && errno != EEXIST) { + op_errno = errno; + (void) closedir (dir_base_holder); + (void) closedir (xattrop_dir); goto out; + } } } ret = closedir (xattrop_dir); - if (ret) + if (ret) { + op_errno = errno; + (void) closedir (dir_base_holder); goto out; + } ret = closedir (dir_base_holder); - if (ret) + if (ret) { + op_errno = errno; goto out; + } ret = 0; out: + errno = op_errno; return ret; } @@ -490,8 +504,10 @@ sync_base_indices_from_xattrop (xlator_t *this) continue; } ret = unlink (entry->d_name); - if (ret) + if (ret) { + closedir (dir); goto out; + } } closedir (dir); } @@ -655,22 +671,16 @@ _check_key_is_zero_filled (dict_t *d, char *k, data_t *v, return 0; } - void -_xattrop_index_action (xlator_t *this, inode_t *inode, dict_t *xattr) +_index_action (xlator_t *this, inode_t *inode, gf_boolean_t zero_xattr) { - gf_boolean_t zero_xattr = _gf_true; + int ret = 0; index_inode_ctx_t *ctx = NULL; - int ret = 0; - - ret = dict_foreach (xattr, _check_key_is_zero_filled, NULL); - if (ret == -1) - zero_xattr = _gf_false; ret = index_inode_ctx_get (inode, this, &ctx); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Not able to %s %s -> index", - zero_xattr?"add":"del", uuid_utoa (inode->gfid)); + zero_xattr?"del":"add", uuid_utoa (inode->gfid)); goto out; } if (zero_xattr) { @@ -691,6 +701,19 @@ out: } void +_xattrop_index_action (xlator_t *this, inode_t *inode, dict_t *xattr) +{ + gf_boolean_t zero_xattr = _gf_true; + int ret = 0; + + ret = dict_foreach (xattr, _check_key_is_zero_filled, NULL); + if (ret == -1) + zero_xattr = _gf_false; + _index_action (this, inode, zero_xattr); + return; +} + +void fop_xattrop_index_action (xlator_t *this, inode_t *inode, dict_t *xattr) { _xattrop_index_action (this, inode, xattr); @@ -868,6 +891,11 @@ int index_xattrop_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { + //In wind phase bring the gfid into index. This way if the brick crashes + //just after posix performs xattrop before _cbk reaches index xlator + //we will still have the gfid in index. + _index_action (this, frame->local, _gf_false); + STACK_WIND (frame, index_xattrop_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->xattrop, loc, optype, xattr, xdata); @@ -878,6 +906,10 @@ int index_fxattrop_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { + //In wind phase bring the gfid into index. This way if the brick crashes + //just after posix performs xattrop before _cbk reaches index xlator + //we will still have the gfid in index. + _index_action (this, frame->local, _gf_false); STACK_WIND (frame, index_fxattrop_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->fxattrop, fd, optype, xattr, xdata); diff --git a/xlators/features/marker/src/marker.c b/xlators/features/marker/src/marker.c index d6b6dd358..45058652b 100644 --- a/xlators/features/marker/src/marker.c +++ b/xlators/features/marker/src/marker.c @@ -1021,7 +1021,7 @@ marker_rename_done (call_frame_t *frame, void *cookie, xlator_t *this, if (op_ret < 0) { if (local->err == 0) { - local->err = op_errno; + local->err = op_errno ? op_errno : EINVAL; } gf_log (this->name, GF_LOG_WARNING, @@ -1037,6 +1037,11 @@ marker_rename_done (call_frame_t *frame, void *cookie, xlator_t *this, } else if (local->err != 0) { STACK_UNWIND_STRICT (rename, frame, -1, local->err, NULL, NULL, NULL, NULL, NULL, NULL); + } else { + gf_log (this->name, GF_LOG_CRITICAL, + "continuation stub to unwind the call is absent, hence " + "call will be hung (call-stack id = %"PRIu64")", + frame->root->unique); } mq_reduce_parent_size (this, &oplocal->loc, oplocal->contribution); @@ -1082,7 +1087,7 @@ marker_rename_release_newp_lock (call_frame_t *frame, void *cookie, if (op_ret < 0) { if (local->err == 0) { - local->err = op_errno; + local->err = op_errno ? op_errno : EINVAL; } gf_log (this->name, GF_LOG_WARNING, @@ -1271,7 +1276,7 @@ marker_do_rename (call_frame_t *frame, void *cookie, xlator_t *this, MARKER_RESET_UID_GID (frame, frame->root, local); if ((op_ret < 0) && (op_errno != ENOATTR)) { - local->err = op_errno; + local->err = op_errno ? op_errno : EINVAL; gf_log (this->name, GF_LOG_WARNING, "fetching contribution values from %s (gfid:%s) " "failed (%s)", local->loc.path, @@ -1283,7 +1288,7 @@ marker_do_rename (call_frame_t *frame, void *cookie, xlator_t *this, if (local->loc.inode != NULL) { GET_CONTRI_KEY (contri_key, local->loc.parent->gfid, ret); if (ret < 0) { - local->err = errno; + local->err = errno ? errno : ENOMEM; goto err; } @@ -1323,7 +1328,7 @@ marker_get_newpath_contribution (call_frame_t *frame, void *cookie, MARKER_RESET_UID_GID (frame, frame->root, local); if ((op_ret < 0) && (op_errno != ENOATTR)) { - local->err = op_errno; + local->err = op_errno ? op_errno : EINVAL; gf_log (this->name, GF_LOG_WARNING, "fetching contribution values from %s (gfid:%s) " "failed (%s)", oplocal->loc.path, @@ -1334,7 +1339,7 @@ marker_get_newpath_contribution (call_frame_t *frame, void *cookie, GET_CONTRI_KEY (contri_key, oplocal->loc.parent->gfid, ret); if (ret < 0) { - local->err = errno; + local->err = errno ? errno : ENOMEM; goto err; } @@ -1344,7 +1349,7 @@ marker_get_newpath_contribution (call_frame_t *frame, void *cookie, if (local->loc.inode != NULL) { GET_CONTRI_KEY (contri_key, local->loc.parent->gfid, ret); if (ret < 0) { - local->err = errno; + local->err = errno ? errno : ENOMEM; goto err; } @@ -1385,7 +1390,7 @@ marker_get_oldpath_contribution (call_frame_t *frame, void *cookie, oplocal = local->oplocal; if (op_ret < 0) { - local->err = op_errno; + local->err = op_errno ? op_errno : EINVAL; gf_log (this->name, GF_LOG_WARNING, "cannot hold inodelk on %s (gfid:%s) (%s)", local->next_lock_on->path, @@ -1396,7 +1401,7 @@ marker_get_oldpath_contribution (call_frame_t *frame, void *cookie, GET_CONTRI_KEY (contri_key, oplocal->loc.parent->gfid, ret); if (ret < 0) { - local->err = errno; + local->err = errno ? errno : ENOMEM; goto quota_err; } @@ -1452,7 +1457,7 @@ marker_rename_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, loc = &local->parent_loc; } - local->err = op_errno; + local->err = op_errno ? op_errno : EINVAL; gf_log (this->name, GF_LOG_WARNING, "cannot hold inodelk on %s (gfid:%s) (%s)", loc->path, uuid_utoa (loc->inode->gfid), diff --git a/xlators/features/qemu-block/src/bdrv-xlator.c b/xlators/features/qemu-block/src/bdrv-xlator.c index aaf028cfe..1e55b5fb7 100644 --- a/xlators/features/qemu-block/src/bdrv-xlator.c +++ b/xlators/features/qemu-block/src/bdrv-xlator.c @@ -235,7 +235,6 @@ qemu_gluster_co_writev (BlockDriverState *bs, int64_t sector_num, int nb_sectors iobref = iobref_new (); if (!iobref) { - iobuf_unref (iobuf); goto out; } diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c index 2812a2b13..bb5dc7aba 100644 --- a/xlators/features/quota/src/quota.c +++ b/xlators/features/quota/src/quota.c @@ -721,7 +721,7 @@ quota_check_limit_continuation (struct list_head *parents, inode_t *inode, this = THIS; if ((op_ret < 0) || list_empty (parents)) { - if (list_empty (parents)) { + if (op_ret >= 0) { gf_log (this->name, GF_LOG_WARNING, "Couldn't build ancestry for inode (gfid:%s). " "Without knowing ancestors till root, quota " @@ -3387,6 +3387,10 @@ quota_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, if (hard_lim > 0) { local = quota_local_new (); + if (local == NULL) { + op_errno = ENOMEM; + goto err; + } frame->local = local; local->loc.inode = inode_ref (fd->inode); @@ -3561,11 +3565,9 @@ quota_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, buf->f_blocks = blocks; avail = buf->f_blocks - usage; - avail = (avail >= 0) ? avail : 0; + avail = max (avail, 0); - if (buf->f_bfree > avail) { - buf->f_bfree = avail; - } + buf->f_bfree = avail; /* * We have to assume that the total assigned quota * won't cause us to dip into the reserved space, @@ -4029,6 +4031,7 @@ quota_log_usage (xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode, char *path = NULL; int64_t cur_size = 0; quota_priv_t *priv = NULL; + gf_boolean_t dyn_mem = _gf_true; priv = this->private; if ((ctx->soft_lim <= 0) || (timerisset (&ctx->prev_log) && @@ -4043,6 +4046,7 @@ quota_log_usage (xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode, if (!usage_str) { snprintf (size_str, sizeof (size_str), "%"PRId64, cur_size); usage_str = (char*) size_str; + dyn_mem = _gf_false; } inode_path (inode, NULL, &path); if (!path) @@ -4063,7 +4067,8 @@ quota_log_usage (xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode, ctx->prev_log = cur_time; } - GF_FREE (usage_str); + if (dyn_mem) + GF_FREE (usage_str); } int32_t diff --git a/xlators/lib/src/libxlator.c b/xlators/lib/src/libxlator.c index 63e9bcf9f..4e680c510 100644 --- a/xlators/lib/src/libxlator.c +++ b/xlators/lib/src/libxlator.c @@ -233,6 +233,7 @@ out: frame->local = local->xl_local; local->xl_specf_unwind (frame, op_ret, op_errno, dict, xdata); + GF_FREE (local); } else if (need_unwind) { STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata); @@ -344,6 +345,7 @@ unlock: frame->local = local->xl_local; local->xl_specf_unwind (frame, op_ret, op_errno, dict, xdata); + GF_FREE (local); return 0; } else if (need_unwind){ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am index cbb6353f8..25f2ed051 100644 --- a/xlators/mgmt/glusterd/src/Makefile.am +++ b/xlators/mgmt/glusterd/src/Makefile.am @@ -12,7 +12,7 @@ glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \ glusterd-geo-rep.c glusterd-replace-brick.c glusterd-log-ops.c \ glusterd-volume-ops.c glusterd-brick-ops.c glusterd-mountbroker.c \ glusterd-syncop.c glusterd-hooks.c glusterd-volume-set.c \ - glusterd-etcd.c + glusterd-locks.c glusterd-etcd.c glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ $(top_builddir)/rpc/xdr/src/libgfxdr.la \ @@ -22,7 +22,7 @@ glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \ glusterd-sm.h glusterd-store.h glusterd-mem-types.h \ glusterd-pmap.h glusterd-volgen.h glusterd-mountbroker.h \ - glusterd-syncop.h glusterd-hooks.h glusterd-etcd.h + glusterd-syncop.h glusterd-hooks.h glusterd-locks.h glusterd-etcd.h AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ -I$(rpclibdir) -I$(CONTRIBDIR)/rbtree \ diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index 26d608a2f..ced916ea1 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -1467,7 +1467,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) goto out; } - if (is_origin_glusterd ()) { + if (is_origin_glusterd (dict)) { ret = glusterd_generate_and_set_task_id (dict, GF_REMOVE_BRICK_TID_KEY); if (ret) { @@ -1733,7 +1733,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) /* Set task-id, if available, in ctx dict for operations other than * start */ - if (is_origin_glusterd () && (cmd != GF_OP_CMD_START)) { + if (is_origin_glusterd (dict) && (cmd != GF_OP_CMD_START)) { if (!uuid_is_null (volinfo->rebal.rebalance_id)) { ret = glusterd_copy_uuid_to_dict (volinfo->rebal.rebalance_id, dict, diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c index 3969db17c..a0bc9f737 100644 --- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c @@ -987,6 +987,8 @@ _get_status_mst_slv (dict_t *this, char *key, data_t *value, void *data) param->node); out: + GF_FREE (errmsg); + if (slave_buf) GF_FREE(slave_buf); @@ -1444,6 +1446,7 @@ _get_slave_status (dict_t *dict, char *key, data_t *value, void *data) ret = is_geo_rep_active (param->volinfo,slave, conf_path, ¶m->is_active); out: + GF_FREE(errmsg); return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index e296509d8..5a140bb22 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -33,6 +33,7 @@ #include "glusterd-op-sm.h" #include "glusterd-utils.h" #include "glusterd-store.h" +#include "glusterd-locks.h" #include "glusterd1-xdr.h" #include "cli1-xdr.h" @@ -56,6 +57,7 @@ #endif extern glusterd_op_info_t opinfo; +extern uuid_t global_txn_id; int glusterd_big_locked_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, @@ -591,10 +593,16 @@ int32_t glusterd_op_txn_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx, char *err_str, size_t err_len) { - int32_t ret = -1; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; - int32_t locked = 0; + int32_t ret = -1; + dict_t *dict = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int32_t locked = 0; + char *tmp = NULL; + char *volname = NULL; + uuid_t *txn_id = NULL; + glusterd_op_info_t txn_op_info = {{0},}; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; GF_ASSERT (req); GF_ASSERT ((op > GD_OP_NONE) && (op < GD_OP_MAX)); @@ -605,33 +613,122 @@ glusterd_op_txn_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx, priv = this->private; GF_ASSERT (priv); - ret = glusterd_lock (MY_UUID); + dict = ctx; + + /* Generate a transaction-id for this operation and + * save it in the dict. This transaction id distinguishes + * each transaction, and helps separate opinfos in the + * op state machine. */ + ret = glusterd_generate_txn_id (dict, &txn_id); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to generate transaction id"); + goto out; + + } + + /* Save the MY_UUID as the originator_uuid. This originator_uuid + * will be used by is_origin_glusterd() to determine if a node + * is the originator node for a command. */ + ret = glusterd_set_originator_uuid (dict); if (ret) { gf_log (this->name, GF_LOG_ERROR, - "Unable to acquire lock on localhost, ret: %d", ret); - snprintf (err_str, err_len, "Another transaction is in progress. " - "Please try again after sometime."); + "Failed to set originator_uuid."); goto out; } + /* Based on the op_version, acquire a cluster or volume lock */ + if (priv->op_version < GD_OP_VERSION_4) { + ret = glusterd_lock (MY_UUID); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire lock on localhost, ret: %d", + ret); + snprintf (err_str, err_len, + "Another transaction is in progress. " + "Please try again after sometime."); + goto out; + } + } else { + /* If no volname is given as a part of the command, locks will + * not be held */ + ret = dict_get_str (dict, "volname", &tmp); + if (ret) { + gf_log ("", GF_LOG_INFO, + "No Volume name present. " + "Locks not being held."); + goto local_locking_done; + } else { + /* Use a copy of volname, as cli response will be + * sent before the unlock, and the volname in the + * dict, might be removed */ + volname = gf_strdup (tmp); + if (!volname) + goto out; + } + + ret = glusterd_volume_lock (volname, MY_UUID); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire lock for %s", volname); + snprintf (err_str, err_len, + "Another transaction is in progress for %s. " + "Please try again after sometime.", volname); + goto out; + } + } + locked = 1; gf_log (this->name, GF_LOG_DEBUG, "Acquired lock on localhost"); - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_START_LOCK, NULL); +local_locking_done: + + /* If no volname is given as a part of the command, locks will + * not be held, hence sending stage event. */ + if (volname) + event_type = GD_OP_EVENT_START_LOCK; + else { + txn_op_info.state.state = GD_OP_STATE_LOCK_SENT; + event_type = GD_OP_EVENT_ALL_ACC; + } + + /* Save opinfo for this transaction with the transaction id */ + glusterd_txn_opinfo_init (&txn_op_info, NULL, &op, ctx, req); + + ret = glusterd_set_txn_opinfo (txn_id, &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set transaction's opinfo"); + if (ctx) + dict_unref (ctx); + goto out; + } + + ret = glusterd_op_sm_inject_event (event_type, txn_id, ctx); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to acquire cluster" " lock."); goto out; } - glusterd_op_set_op (op); - glusterd_op_set_ctx (ctx); - glusterd_op_set_req (req); - - out: - if (locked && ret) - glusterd_unlock (MY_UUID); + if (locked && ret) { + /* Based on the op-version, we release the + * cluster or volume lock */ + if (priv->op_version < GD_OP_VERSION_4) + glusterd_unlock (MY_UUID); + else { + ret = glusterd_volume_unlock (volname, MY_UUID); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to release lock for %s", + volname); + ret = -1; + } + } + + if (volname) + GF_FREE (volname); gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; @@ -640,11 +737,15 @@ out: int __glusterd_handle_cluster_lock (rpcsvc_request_t *req) { - gd1_mgmt_cluster_lock_req lock_req = {{0},}; - int32_t ret = -1; - glusterd_op_lock_ctx_t *ctx = NULL; - glusterd_peerinfo_t *peerinfo = NULL; - xlator_t *this = NULL; + dict_t *op_ctx = NULL; + int32_t ret = -1; + gd1_mgmt_cluster_lock_req lock_req = {{0},}; + glusterd_op_lock_ctx_t *ctx = NULL; + glusterd_op_t op = GD_OP_EVENT_LOCK; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_op_info_t txn_op_info = {{0},}; + uuid_t *txn_id = &global_txn_id; + xlator_t *this = NULL; this = THIS; GF_ASSERT (this); @@ -679,8 +780,29 @@ __glusterd_handle_cluster_lock (rpcsvc_request_t *req) uuid_copy (ctx->uuid, lock_req.uuid); ctx->req = req; + ctx->dict = NULL; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_LOCK, ctx); + op_ctx = dict_new (); + if (!op_ctx) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set new dict"); + goto out; + } + + glusterd_txn_opinfo_init (&txn_op_info, NULL, &op, op_ctx, req); + + ret = glusterd_set_txn_opinfo (txn_id, &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set transaction's opinfo"); + dict_unref (txn_op_info.op_ctx); + goto out; + } + + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_LOCK, txn_id, ctx); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to inject event GD_OP_EVENT_LOCK"); out: gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -698,6 +820,186 @@ glusterd_handle_cluster_lock (rpcsvc_request_t *req) __glusterd_handle_cluster_lock); } +static int +glusterd_handle_volume_lock_fn (rpcsvc_request_t *req) +{ + gd1_mgmt_volume_lock_req lock_req = {{0},}; + int32_t ret = -1; + glusterd_op_lock_ctx_t *ctx = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + glusterd_op_info_t txn_op_info = {{0},}; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &lock_req, + (xdrproc_t)xdr_gd1_mgmt_volume_lock_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode lock " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "Received volume lock req " + "from uuid: %s txn_id: %s", uuid_utoa (lock_req.uuid), + uuid_utoa (lock_req.txn_id)); + + if (glusterd_friend_find_by_uuid (lock_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (lock_req.uuid)); + ret = -1; + goto out; + } + + ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t); + if (!ctx) { + ret = -1; + goto out; + } + + uuid_copy (ctx->uuid, lock_req.uuid); + ctx->req = req; + + ctx->dict = dict_new (); + if (!ctx->dict) { + ret = -1; + goto out; + } + + ret = dict_unserialize (lock_req.dict.dict_val, + lock_req.dict.dict_len, &ctx->dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + glusterd_txn_opinfo_init (&txn_op_info, NULL, &lock_req.op, + ctx->dict, req); + + ret = glusterd_set_txn_opinfo (&lock_req.txn_id, &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set transaction's opinfo"); + goto out; + } + + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_LOCK, + &lock_req.txn_id, ctx); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to inject event GD_OP_EVENT_LOCK"); + +out: + if (ret) { + if (ctx->dict) + dict_destroy (ctx->dict); + if (ctx) + GF_FREE (ctx); + } + + glusterd_friend_sm (); + glusterd_op_sm (); + + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_handle_volume_lock (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_volume_lock_fn); +} + +static int +glusterd_handle_volume_unlock_fn (rpcsvc_request_t *req) +{ + gd1_mgmt_volume_unlock_req lock_req = {{0},}; + int32_t ret = -1; + glusterd_op_lock_ctx_t *ctx = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &lock_req, + (xdrproc_t)xdr_gd1_mgmt_volume_unlock_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode unlock " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "Received volume unlock req " + "from uuid: %s", uuid_utoa (lock_req.uuid)); + + if (glusterd_friend_find_by_uuid (lock_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (lock_req.uuid)); + ret = -1; + goto out; + } + + ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t); + if (!ctx) { + ret = -1; + goto out; + } + + uuid_copy (ctx->uuid, lock_req.uuid); + ctx->req = req; + + ctx->dict = dict_new (); + if (!ctx->dict) { + ret = -1; + goto out; + } + + ret = dict_unserialize (lock_req.dict.dict_val, + lock_req.dict.dict_len, &ctx->dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_UNLOCK, + &lock_req.txn_id, ctx); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to inject event GD_OP_EVENT_UNLOCK"); + +out: + if (ret) { + if (ctx->dict) + dict_destroy (ctx->dict); + if (ctx) + GF_FREE (ctx); + } + + glusterd_friend_sm (); + glusterd_op_sm (); + + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_handle_volume_unlock (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_volume_unlock_fn); +} + int glusterd_req_ctx_create (rpcsvc_request_t *rpc_req, glusterd_op_t op, uuid_t uuid, @@ -756,6 +1058,9 @@ __glusterd_handle_stage_op (rpcsvc_request_t *req) gd1_mgmt_stage_op_req op_req = {{0},}; glusterd_peerinfo_t *peerinfo = NULL; xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; + glusterd_op_info_t txn_op_info = {{0},}; + glusterd_op_sm_state_info_t state = {0,}; this = THIS; GF_ASSERT (this); @@ -784,7 +1089,36 @@ __glusterd_handle_stage_op (rpcsvc_request_t *req) if (ret) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_STAGE_OP, req_ctx); + ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + + /* In cases where there is no volname, the receivers won't have a + * transaction opinfo created, as for those operations, the locking + * phase where the transaction opinfos are created, won't be called. */ + ret = glusterd_get_txn_opinfo (txn_id, &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "No transaction's opinfo set"); + + state.state = GD_OP_STATE_LOCKED; + glusterd_txn_opinfo_init (&txn_op_info, &state, + &op_req.op, req_ctx->dict, req); + + ret = glusterd_set_txn_opinfo (txn_id, &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set transaction's opinfo"); + dict_unref (req_ctx->dict); + goto out; + } + } + + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_STAGE_OP, + txn_id, req_ctx); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to inject event GD_OP_EVENT_STAGE_OP"); out: free (op_req.buf.buf_val);//malloced by xdr @@ -808,6 +1142,7 @@ __glusterd_handle_commit_op (rpcsvc_request_t *req) gd1_mgmt_commit_op_req op_req = {{0},}; glusterd_peerinfo_t *peerinfo = NULL; xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; this = THIS; GF_ASSERT (this); @@ -838,11 +1173,12 @@ __glusterd_handle_commit_op (rpcsvc_request_t *req) if (ret) goto out; - ret = glusterd_op_init_ctx (op_req.op); - if (ret) - goto out; + ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id); - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_OP, req_ctx); + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_OP, + txn_id, req_ctx); out: free (op_req.buf.buf_val);//malloced by xdr @@ -1893,6 +2229,56 @@ glusterd_op_unlock_send_resp (rpcsvc_request_t *req, int32_t status) } int +glusterd_op_volume_lock_send_resp (rpcsvc_request_t *req, uuid_t *txn_id, + int32_t status) +{ + + gd1_mgmt_volume_lock_rsp rsp = {{0},}; + int ret = -1; + + GF_ASSERT (req); + GF_ASSERT (txn_id); + glusterd_get_uuid (&rsp.uuid); + rsp.op_ret = status; + if (rsp.op_ret) + rsp.op_errno = errno; + uuid_copy (rsp.txn_id, *txn_id); + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_volume_lock_rsp); + + gf_log (THIS->name, GF_LOG_DEBUG, "Responded to volume lock, ret: %d", + ret); + + return ret; +} + +int +glusterd_op_volume_unlock_send_resp (rpcsvc_request_t *req, uuid_t *txn_id, + int32_t status) +{ + + gd1_mgmt_volume_unlock_rsp rsp = {{0},}; + int ret = -1; + + GF_ASSERT (req); + GF_ASSERT (txn_id); + rsp.op_ret = status; + if (rsp.op_ret) + rsp.op_errno = errno; + glusterd_get_uuid (&rsp.uuid); + uuid_copy (rsp.txn_id, *txn_id); + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_volume_unlock_rsp); + + gf_log (THIS->name, GF_LOG_DEBUG, "Responded to volume unlock, ret: %d", + ret); + + return ret; +} + +int __glusterd_handle_cluster_unlock (rpcsvc_request_t *req) { gd1_mgmt_cluster_unlock_req unlock_req = {{0}, }; @@ -1900,6 +2286,7 @@ __glusterd_handle_cluster_unlock (rpcsvc_request_t *req) glusterd_op_lock_ctx_t *ctx = NULL; glusterd_peerinfo_t *peerinfo = NULL; xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; this = THIS; GF_ASSERT (this); @@ -1934,8 +2321,9 @@ __glusterd_handle_cluster_unlock (rpcsvc_request_t *req) } uuid_copy (ctx->uuid, unlock_req.uuid); ctx->req = req; + ctx->dict = NULL; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_UNLOCK, ctx); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_UNLOCK, txn_id, ctx); out: glusterd_friend_sm (); @@ -3666,11 +4054,11 @@ get_brickinfo_from_brickid (char *brickid, glusterd_brickinfo_t **brickinfo) volid_str = brickid_dup; brick = strchr (brickid_dup, ':'); - *brick = '\0'; - brick++; if (!volid_str || !brick) goto out; + *brick = '\0'; + brick++; uuid_parse (volid_str, volid); ret = glusterd_volinfo_find_by_volume_id (volid, &volinfo); if (ret) @@ -3850,6 +4238,7 @@ __glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, glusterd_peerinfo_t *peerinfo = NULL; glusterd_peerctx_t *peerctx = NULL; gf_boolean_t quorum_action = _gf_false; + glusterd_volinfo_t *volinfo = NULL; uuid_t uuid; peerctx = mydata; @@ -3878,6 +4267,30 @@ __glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_DISCONNECT %d", peerinfo->state.state); + if (peerinfo->connected) { + if (conf->op_version < GD_OP_VERSION_4) { + glusterd_get_lock_owner (&uuid); + if (!uuid_is_null (uuid) && + !uuid_compare (peerinfo->uuid, uuid)) + glusterd_unlock (peerinfo->uuid); + } else { + list_for_each_entry (volinfo, &conf->volumes, + vol_list) { + ret = glusterd_volume_unlock + (volinfo->volname, + peerinfo->uuid); + if (ret) + gf_log (this->name, + GF_LOG_TRACE, + "Lock not released " + "for %s", + volinfo->volname); + } + } + + ret = 0; + } + if ((peerinfo->quorum_contrib != QUORUM_DOWN) && (peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED)) { peerinfo->quorum_contrib = QUORUM_DOWN; @@ -3892,13 +4305,6 @@ __glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, glusterd_friend_remove_notify (peerctx); goto out; } - glusterd_get_lock_owner (&uuid); - if (!uuid_is_null (uuid) && - !uuid_compare (peerinfo->uuid, uuid)) { - glusterd_unlock (peerinfo->uuid); - if (opinfo.state.state != GD_OP_STATE_DEFAULT) - opinfo.state.state = GD_OP_STATE_DEFAULT; - } peerinfo->connected = 0; break; @@ -4033,3 +4439,18 @@ struct rpcsvc_program gd_svc_cli_prog_ro = { .actors = gd_svc_cli_actors_ro, .synctask = _gf_true, }; + +rpcsvc_actor_t gd_svc_mgmt_v3_actors[] = { + [GLUSTERD_MGMT_V3_NULL] = { "NULL", GLUSTERD_MGMT_V3_NULL, glusterd_null, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_VOLUME_LOCK] = { "VOL_LOCK", GLUSTERD_MGMT_V3_VOLUME_LOCK, glusterd_handle_volume_lock, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_VOLUME_UNLOCK] = { "VOL_UNLOCK", GLUSTERD_MGMT_V3_VOLUME_UNLOCK, glusterd_handle_volume_unlock, NULL, 0, DRC_NA}, +}; + +struct rpcsvc_program gd_svc_mgmt_v3_prog = { + .progname = "GlusterD svc mgmt v3", + .prognum = GD_MGMT_PROGRAM, + .progver = GD_MGMT_V3_VERSION, + .numactors = GLUSTERD_MGMT_V3_MAXVALUE, + .actors = gd_svc_mgmt_v3_actors, + .synctask = _gf_true, +}; diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c index 1420eb692..e0508faf6 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handshake.c +++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c @@ -30,6 +30,7 @@ extern struct rpc_clnt_program gd_peer_prog; extern struct rpc_clnt_program gd_mgmt_prog; +extern struct rpc_clnt_program gd_mgmt_v3_prog; #define TRUSTED_PREFIX "trusted-" @@ -812,6 +813,7 @@ __glusterd_mgmt_hndsk_version_ack_cbk (struct rpc_req *req, struct iovec *iov, */ peerinfo->mgmt = &gd_mgmt_prog; peerinfo->peer = &gd_peer_prog; + peerinfo->mgmt_v3 = &gd_mgmt_v3_prog; ret = default_notify (this, GF_EVENT_CHILD_UP, NULL); @@ -1038,6 +1040,15 @@ glusterd_set_clnt_mgmt_program (glusterd_peerinfo_t *peerinfo, peerinfo->peer->progname, peerinfo->peer->prognum, peerinfo->peer->progver); } + + if (peerinfo->mgmt_v3) { + gf_log ("", GF_LOG_INFO, + "Using Program %s, Num (%d), Version (%d)", + peerinfo->mgmt_v3->progname, + peerinfo->mgmt_v3->prognum, + peerinfo->mgmt_v3->progver); + } + ret = 0; out: return ret; diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.c b/xlators/mgmt/glusterd/src/glusterd-locks.c new file mode 100644 index 000000000..68c6d7426 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-locks.c @@ -0,0 +1,177 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "common-utils.h" +#include "cli1-xdr.h" +#include "xdr-generic.h" +#include "glusterd.h" +#include "glusterd-op-sm.h" +#include "glusterd-store.h" +#include "glusterd-utils.h" +#include "glusterd-volgen.h" +#include "glusterd-locks.h" +#include "run.h" +#include "syscall.h" + +#include <signal.h> + +static dict_t *vol_lock; + +/* Initialize the global vol-lock list(dict) when + * glusterd is spawned */ +int32_t +glusterd_vol_lock_init () +{ + int32_t ret = -1; + + vol_lock = dict_new (); + if (!vol_lock) + goto out; + + ret = 0; +out: + return ret; +} + +/* Destroy the global vol-lock list(dict) when + * glusterd cleanup is performed */ +void +glusterd_vol_lock_fini () +{ + if (vol_lock) + dict_unref (vol_lock); +} + +int32_t +glusterd_get_vol_lock_owner (char *volname, uuid_t *uuid) +{ + int32_t ret = -1; + vol_lock_obj *lock_obj = NULL; + uuid_t no_owner = {0,}; + + if (!volname || !uuid) { + gf_log ("", GF_LOG_ERROR, "volname or uuid is null."); + ret = -1; + goto out; + } + + ret = dict_get_bin (vol_lock, volname, (void **) &lock_obj); + if (!ret) + uuid_copy (*uuid, lock_obj->lock_owner); + else + uuid_copy (*uuid, no_owner); + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_volume_lock (char *volname, uuid_t uuid) +{ + int32_t ret = -1; + vol_lock_obj *lock_obj = NULL; + uuid_t owner = {0}; + + if (!volname) { + gf_log ("", GF_LOG_ERROR, "volname is null."); + ret = -1; + goto out; + } + + ret = glusterd_get_vol_lock_owner (volname, &owner); + if (ret) { + gf_log ("", GF_LOG_DEBUG, "Unable to get volume lock owner"); + goto out; + } + + /* If the lock has already been held for the given volume + * we fail */ + if (!uuid_is_null (owner)) { + gf_log ("", GF_LOG_ERROR, "Unable to acquire lock. " + "Lock for %s held by %s", volname, + uuid_utoa (owner)); + ret = -1; + goto out; + } + + lock_obj = GF_CALLOC (1, sizeof(vol_lock_obj), + gf_common_mt_vol_lock_obj_t); + if (!lock_obj) { + ret = -1; + goto out; + } + + uuid_copy (lock_obj->lock_owner, uuid); + + ret = dict_set_bin (vol_lock, volname, lock_obj, sizeof(vol_lock_obj)); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to set lock owner " + "in volume lock"); + if (lock_obj) + GF_FREE (lock_obj); + goto out; + } + + gf_log ("", GF_LOG_DEBUG, "Lock for %s successfully held by %s", + volname, uuid_utoa (uuid)); + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_volume_unlock (char *volname, uuid_t uuid) +{ + int32_t ret = -1; + uuid_t owner = {0}; + + if (!volname) { + gf_log ("", GF_LOG_ERROR, "volname is null."); + ret = -1; + goto out; + } + + ret = glusterd_get_vol_lock_owner (volname, &owner); + if (ret) + goto out; + + if (uuid_is_null (owner)) { + gf_log ("", GF_LOG_ERROR, "Lock for %s not held", volname); + ret = -1; + goto out; + } + + ret = uuid_compare (uuid, owner); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Lock owner mismatch. " + "Lock for %s held by %s", + volname, uuid_utoa (owner)); + goto out; + } + + /* Removing the volume lock from the global list */ + dict_del (vol_lock, volname); + + gf_log ("", GF_LOG_DEBUG, "Lock for %s successfully released", + volname); + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.h b/xlators/mgmt/glusterd/src/glusterd-locks.h new file mode 100644 index 000000000..2a8cc20ed --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-locks.h @@ -0,0 +1,38 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _GLUSTERD_LOCKS_H_ +#define _GLUSTERD_LOCKS_H_ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +struct volume_lock_object_ { + uuid_t lock_owner; +}; +typedef struct volume_lock_object_ vol_lock_obj; + +int32_t +glusterd_vol_lock_init (); + +void +glusterd_vol_lock_fini (); + +int32_t +glusterd_get_vol_lock_owner (char *volname, uuid_t *uuid); + +int32_t +glusterd_volume_lock (char *volname, uuid_t uuid); + +int32_t +glusterd_volume_unlock (char *volname, uuid_t uuid); + +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 963aa0762..83c91a52d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -37,6 +37,7 @@ #include "glusterd-store.h" #include "glusterd-hooks.h" #include "glusterd-volgen.h" +#include "glusterd-locks.h" #include "syscall.h" #include "cli1-xdr.h" #include "common-utils.h" @@ -67,6 +68,195 @@ static struct list_head gd_op_sm_queue; pthread_mutex_t gd_op_sm_lock; glusterd_op_info_t opinfo = {{0},}; +uuid_t global_txn_id = {0}; /* To be used in + * heterogeneous + * cluster with no + * transaction ids */ + +static dict_t *txn_opinfo; + +struct glusterd_txn_opinfo_object_ { + glusterd_op_info_t opinfo; +}; +typedef struct glusterd_txn_opinfo_object_ glusterd_txn_opinfo_obj; + +int32_t +glusterd_txn_opinfo_dict_init () +{ + int32_t ret = -1; + + txn_opinfo = dict_new (); + if (!txn_opinfo) { + ret = -1; + goto out; + } + + ret = 0; +out: + return ret; +} + +void +glusterd_txn_opinfo_dict_fini () +{ + if (txn_opinfo) + dict_destroy (txn_opinfo); +} + +void +glusterd_txn_opinfo_init (glusterd_op_info_t *opinfo, + glusterd_op_sm_state_info_t *state, + glusterd_op_t *op, + dict_t *op_ctx, + rpcsvc_request_t *req) +{ + GF_ASSERT (opinfo); + + if (state) + opinfo->state = *state; + + if (op) + opinfo->op = *op; + + opinfo->op_ctx = dict_ref(op_ctx); + + if (req) + opinfo->req = req; + + return; +} + +int32_t +glusterd_generate_txn_id (dict_t *dict, uuid_t **txn_id) +{ + int32_t ret = -1; + + GF_ASSERT (dict); + + *txn_id = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t); + if (!*txn_id) + goto out; + + uuid_generate (**txn_id); + + ret = dict_set_bin (dict, "transaction_id", + *txn_id, sizeof (uuid_t)); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Failed to set transaction id."); + goto out; + } + + gf_log ("", GF_LOG_DEBUG, + "Transaction_id = %s", uuid_utoa (**txn_id)); +out: + if (ret && *txn_id) + GF_FREE (*txn_id); + + return ret; +} + +int32_t +glusterd_get_txn_opinfo (uuid_t *txn_id, glusterd_op_info_t *opinfo) +{ + int32_t ret = -1; + glusterd_txn_opinfo_obj *opinfo_obj = NULL; + + if (!txn_id || !opinfo) { + gf_log ("", GF_LOG_ERROR, + "Empty transaction id or opinfo received."); + ret = -1; + goto out; + } + + ret = dict_get_bin(txn_opinfo, uuid_utoa (*txn_id), + (void **) &opinfo_obj); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get transaction opinfo"); + goto out; + } + + (*opinfo) = opinfo_obj->opinfo; + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_set_txn_opinfo (uuid_t *txn_id, glusterd_op_info_t *opinfo) +{ + int32_t ret = -1; + glusterd_txn_opinfo_obj *opinfo_obj = NULL; + + if (!txn_id) { + gf_log ("", GF_LOG_ERROR, "Empty transaction id received."); + ret = -1; + goto out; + } + + ret = dict_get_bin(txn_opinfo, uuid_utoa (*txn_id), + (void **) &opinfo_obj); + if (ret) { + opinfo_obj = GF_CALLOC (1, sizeof(glusterd_txn_opinfo_obj), + gf_common_mt_txn_opinfo_obj_t); + if (!opinfo_obj) { + ret = -1; + goto out; + } + + ret = dict_set_bin(txn_opinfo, uuid_utoa (*txn_id), opinfo_obj, + sizeof(glusterd_txn_opinfo_obj)); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set opinfo for transaction ID : %s", + uuid_utoa (*txn_id)); + goto out; + } + } + + opinfo_obj->opinfo = (*opinfo); + + ret = 0; +out: + if (ret) + if (opinfo_obj) + GF_FREE (opinfo_obj); + + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_clear_txn_opinfo (uuid_t *txn_id) +{ + int32_t ret = -1; + glusterd_op_info_t txn_op_info = {{0},}; + + if (!txn_id) { + gf_log ("", GF_LOG_ERROR, "Empty transaction id received."); + ret = -1; + goto out; + } + + ret = glusterd_get_txn_opinfo (txn_id, &txn_op_info); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Transaction opinfo not found"); + goto out; + } + + dict_unref (txn_op_info.op_ctx); + + dict_del(txn_opinfo, uuid_utoa (*txn_id)); + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + static int glusterfs_port = GLUSTERD_DEFAULT_PORT; static char *glusterd_op_sm_state_names[] = { "Default", @@ -147,10 +337,10 @@ glusterd_is_volume_started (glusterd_volinfo_t *volinfo) } static int -glusterd_op_sm_inject_all_acc () +glusterd_op_sm_inject_all_acc (uuid_t *txn_id) { int32_t ret = -1; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, txn_id, NULL); gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } @@ -354,6 +544,10 @@ glusterd_validate_quorum_options (xlator_t *this, char *fullkey, char *value, if (!glusterd_is_quorum_option (fullkey)) goto out; key = strchr (fullkey, '.'); + if (key == NULL) { + ret = -1; + goto out; + } key++; opt = xlator_volume_option_get (this, key); ret = xlator_option_validate (this, key, value, opt, op_errstr); @@ -441,7 +635,7 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr) * This check is not done on the originator glusterd. The originator * glusterd sets this value. */ - origin_glusterd = is_origin_glusterd (); + origin_glusterd = is_origin_glusterd (dict); if (!origin_glusterd) { /* Check for v3.3.x origin glusterd */ @@ -2211,7 +2405,7 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr, GF_ASSERT (dict); - origin_glusterd = is_origin_glusterd (); + origin_glusterd = is_origin_glusterd (dict); ret = dict_get_uint32 (dict, "cmd", &cmd); if (ret) @@ -2416,6 +2610,7 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx) xlator_t *this = NULL; glusterd_peerinfo_t *peerinfo = NULL; uint32_t pending_count = 0; + dict_t *dict = NULL; this = THIS; priv = this->private; @@ -2430,27 +2625,61 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx) (glusterd_op_get_op() != GD_OP_SYNC_VOLUME)) continue; - proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_CLUSTER_LOCK]; - if (proc->fn) { - ret = proc->fn (NULL, this, peerinfo); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, "Failed to " - "send lock request for operation " - "'Volume %s' to peer %s", - gd_op_list[opinfo.op], - peerinfo->hostname); - continue; + /* Based on the op_version, acquire a cluster or volume lock */ + if (priv->op_version < GD_OP_VERSION_4) { + proc = &peerinfo->mgmt->proctable + [GLUSTERD_MGMT_CLUSTER_LOCK]; + if (proc->fn) { + ret = proc->fn (NULL, this, peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Failed to send lock request " + "for operation 'Volume %s' to " + "peer %s", + gd_op_list[opinfo.op], + peerinfo->hostname); + continue; + } + pending_count++; + } + } else { + dict = glusterd_op_get_ctx (); + dict_ref (dict); + + proc = &peerinfo->mgmt_v3->proctable + [GLUSTERD_MGMT_V3_VOLUME_LOCK]; + if (proc->fn) { + ret = dict_set_static_ptr (dict, "peerinfo", + peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to set peerinfo"); + dict_unref (dict); + goto out; + } + + ret = proc->fn (NULL, this, dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Failed to send volume lock " + "request for operation " + "'Volume %s' to peer %s", + gd_op_list[opinfo.op], + peerinfo->hostname); + dict_unref (dict); + continue; + } + pending_count++; } - pending_count++; } } opinfo.pending_count = pending_count; if (!opinfo.pending_count) - ret = glusterd_op_sm_inject_all_acc (); + ret = glusterd_op_sm_inject_all_acc (&event->txn_id); +out: gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); - return ret; } @@ -2463,17 +2692,12 @@ glusterd_op_ac_send_unlock (glusterd_op_sm_event_t *event, void *ctx) xlator_t *this = NULL; glusterd_peerinfo_t *peerinfo = NULL; uint32_t pending_count = 0; + dict_t *dict = NULL; this = THIS; priv = this->private; GF_ASSERT (priv); - /*ret = glusterd_unlock (MY_UUID); - - if (ret) - goto out; - */ - list_for_each_entry (peerinfo, &priv->peers, uuid_list) { GF_ASSERT (peerinfo); @@ -2483,29 +2707,63 @@ glusterd_op_ac_send_unlock (glusterd_op_sm_event_t *event, void *ctx) (glusterd_op_get_op() != GD_OP_SYNC_VOLUME)) continue; - proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_CLUSTER_UNLOCK]; - if (proc->fn) { - ret = proc->fn (NULL, this, peerinfo); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, "Failed to " - "send unlock request for operation " - "'Volume %s' to peer %s", - gd_op_list[opinfo.op], - peerinfo->hostname); - continue; + /* Based on the op_version, + * release the cluster or volume lock */ + if (priv->op_version < GD_OP_VERSION_4) { + proc = &peerinfo->mgmt->proctable + [GLUSTERD_MGMT_CLUSTER_UNLOCK]; + if (proc->fn) { + ret = proc->fn (NULL, this, peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Failed to send unlock request " + "for operation 'Volume %s' to " + "peer %s", + gd_op_list[opinfo.op], + peerinfo->hostname); + continue; + } + pending_count++; + } + } else { + dict = glusterd_op_get_ctx (); + dict_ref (dict); + + proc = &peerinfo->mgmt_v3->proctable + [GLUSTERD_MGMT_V3_VOLUME_UNLOCK]; + if (proc->fn) { + ret = dict_set_static_ptr (dict, "peerinfo", + peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to set peerinfo"); + dict_unref (dict); + goto out; + } + + ret = proc->fn (NULL, this, dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Failed to send volume unlock " + "request for operation " + "'Volume %s' to peer %s", + gd_op_list[opinfo.op], + peerinfo->hostname); + dict_unref (dict); + continue; + } + pending_count++; } - pending_count++; } } opinfo.pending_count = pending_count; if (!opinfo.pending_count) - ret = glusterd_op_sm_inject_all_acc (); + ret = glusterd_op_sm_inject_all_acc (&event->txn_id); +out: gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); - return ret; - } static int @@ -2517,7 +2775,8 @@ glusterd_op_ac_ack_drain (glusterd_op_sm_event_t *event, void *ctx) opinfo.pending_count--; if (!opinfo.pending_count) - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, + &event->txn_id, NULL); gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret); @@ -2533,43 +2792,93 @@ glusterd_op_ac_send_unlock_drain (glusterd_op_sm_event_t *event, void *ctx) static int glusterd_op_ac_lock (glusterd_op_sm_event_t *event, void *ctx) { - glusterd_op_lock_ctx_t *lock_ctx = NULL; - int32_t ret = 0; + int32_t ret = 0; + char *volname = NULL; + glusterd_op_lock_ctx_t *lock_ctx = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; GF_ASSERT (event); GF_ASSERT (ctx); + this = THIS; + priv = this->private; + lock_ctx = (glusterd_op_lock_ctx_t *)ctx; - ret = glusterd_lock (lock_ctx->uuid); + /* If the req came from a node running on older op_version + * the dict won't be present. Based on it acquiring a cluster + * or volume lock */ + if (lock_ctx->dict == NULL) { + ret = glusterd_lock (lock_ctx->uuid); + glusterd_op_lock_send_resp (lock_ctx->req, ret); + } else { + ret = dict_get_str (lock_ctx->dict, "volname", &volname); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire volname"); + else { + ret = glusterd_volume_lock (volname, lock_ctx->uuid); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire lock for %s", + volname); + } - gf_log (THIS->name, GF_LOG_DEBUG, "Lock Returned %d", ret); + glusterd_op_volume_lock_send_resp (lock_ctx->req, + &event->txn_id, ret); - glusterd_op_lock_send_resp (lock_ctx->req, ret); + dict_unref (lock_ctx->dict); + } + gf_log (THIS->name, GF_LOG_DEBUG, "Lock Returned %d", ret); return ret; } static int glusterd_op_ac_unlock (glusterd_op_sm_event_t *event, void *ctx) { - int ret = 0; - glusterd_op_lock_ctx_t *lock_ctx = NULL; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; + int32_t ret = 0; + char *volname = NULL; + glusterd_op_lock_ctx_t *lock_ctx = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + GF_ASSERT (event); GF_ASSERT (ctx); this = THIS; priv = this->private; + lock_ctx = (glusterd_op_lock_ctx_t *)ctx; - ret = glusterd_unlock (lock_ctx->uuid); + /* If the req came from a node running on older op_version + * the dict won't be present. Based on it releasing the cluster + * or volume lock */ + if (lock_ctx->dict == NULL) { + ret = glusterd_unlock (lock_ctx->uuid); + glusterd_op_unlock_send_resp (lock_ctx->req, ret); + } else { + ret = dict_get_str (lock_ctx->dict, "volname", &volname); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire volname"); + else { + ret = glusterd_volume_unlock (volname, lock_ctx->uuid); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to release lock for %s", + volname); + } - gf_log (this->name, GF_LOG_DEBUG, "Unlock Returned %d", ret); + glusterd_op_volume_unlock_send_resp (lock_ctx->req, + &event->txn_id, ret); + + dict_unref (lock_ctx->dict); + } - glusterd_op_unlock_send_resp (lock_ctx->req, ret); + gf_log (this->name, GF_LOG_DEBUG, "Unlock Returned %d", ret); if (priv->pending_quorum_action) glusterd_do_quorum_action (); @@ -2607,7 +2916,8 @@ glusterd_op_ac_rcvd_lock_acc (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.pending_count > 0) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, + &event->txn_id, NULL); gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -3039,7 +3349,8 @@ out: if (dict) dict_unref (dict); if (ret) { - glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, NULL); + glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, + &event->txn_id, NULL); opinfo.op_ret = ret; } @@ -3048,7 +3359,7 @@ out: opinfo.pending_count); if (!opinfo.pending_count) - ret = glusterd_op_sm_inject_all_acc (); + ret = glusterd_op_sm_inject_all_acc (&event->txn_id); gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); @@ -3057,7 +3368,7 @@ out: } static int32_t -glusterd_op_start_rb_timer (dict_t *dict) +glusterd_op_start_rb_timer (dict_t *dict, uuid_t *txn_id) { int32_t op = 0; struct timespec timeout = {0, }; @@ -3076,7 +3387,7 @@ glusterd_op_start_rb_timer (dict_t *dict) } if (op != GF_REPLACE_OP_START) { - ret = glusterd_op_sm_inject_all_acc (); + ret = glusterd_op_sm_inject_all_acc (txn_id); goto out; } @@ -3091,6 +3402,17 @@ glusterd_op_start_rb_timer (dict_t *dict) ret = -1; goto out; } + + ret = dict_set_bin (rb_ctx, "transaction_id", + txn_id, sizeof (uuid_t)); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Failed to set transaction id."); + goto out; + } else + gf_log ("", GF_LOG_DEBUG, + "transaction_id = %s", uuid_utoa (*txn_id)); + priv->timer = gf_timer_call_after (THIS->ctx, timeout, glusterd_do_replace_brick, (void *) rb_ctx); @@ -3580,17 +3902,19 @@ out: if (dict) dict_unref (dict); if (ret) { - glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, NULL); + glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, + &event->txn_id, NULL); opinfo.op_ret = ret; } if (!opinfo.pending_count) { if (op == GD_OP_REPLACE_BRICK) { - ret = glusterd_op_start_rb_timer (op_dict); + ret = glusterd_op_start_rb_timer (op_dict, + &event->txn_id); } else { glusterd_op_modify_op_ctx (op, NULL); - ret = glusterd_op_sm_inject_all_acc (); + ret = glusterd_op_sm_inject_all_acc (&event->txn_id); } goto err; } @@ -3615,7 +3939,8 @@ glusterd_op_ac_rcvd_stage_op_acc (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.pending_count > 0) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_STAGE_ACC, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_STAGE_ACC, + &event->txn_id, NULL); out: gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -3636,7 +3961,8 @@ glusterd_op_ac_stage_op_failed (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.pending_count > 0) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, + &event->txn_id, NULL); out: gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -3657,7 +3983,8 @@ glusterd_op_ac_commit_op_failed (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.pending_count > 0) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, + &event->txn_id, NULL); out: gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -3700,7 +4027,8 @@ glusterd_op_ac_brick_op_failed (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.brick_pending_count > 0) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, ev_ctx->commit_ctx); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, + &event->txn_id, ev_ctx->commit_ctx); out: if (ev_ctx->rsp_dict) @@ -3742,7 +4070,7 @@ glusterd_op_ac_rcvd_commit_op_acc (glusterd_op_sm_event_t *event, void *ctx) goto out; } - ret = glusterd_op_start_rb_timer (op_ctx); + ret = glusterd_op_start_rb_timer (op_ctx, &event->txn_id); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Couldn't start " "replace-brick operation."); @@ -3757,10 +4085,14 @@ glusterd_op_ac_rcvd_commit_op_acc (glusterd_op_sm_event_t *event, void *ctx) out: if (commit_ack_inject) { if (ret) - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, + &event->txn_id, + NULL); else if (!opinfo.pending_count) { glusterd_op_modify_op_ctx (op, NULL); - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_ACC, NULL); + ret = glusterd_op_sm_inject_event + (GD_OP_EVENT_COMMIT_ACC, + &event->txn_id, NULL); } /*else do nothing*/ } @@ -3781,7 +4113,8 @@ glusterd_op_ac_rcvd_unlock_acc (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.pending_count > 0) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, + &event->txn_id, NULL); gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -3815,7 +4148,7 @@ glusterd_op_reset_ctx () } int32_t -glusterd_op_txn_complete () +glusterd_op_txn_complete (uuid_t *txn_id) { int32_t ret = -1; glusterd_conf_t *priv = NULL; @@ -3825,6 +4158,7 @@ glusterd_op_txn_complete () rpcsvc_request_t *req = NULL; void *ctx = NULL; char *op_errstr = NULL; + char *volname = NULL; xlator_t *this = NULL; this = THIS; @@ -3847,14 +4181,29 @@ glusterd_op_txn_complete () glusterd_op_reset_ctx (); glusterd_op_clear_errstr (); - ret = glusterd_unlock (MY_UUID); - - /* unlock cant/shouldnt fail here!! */ - if (ret) { - gf_log (this->name, GF_LOG_CRITICAL, - "Unable to clear local lock, ret: %d", ret); + /* Based on the op-version, we release the cluster or volume lock */ + if (priv->op_version < GD_OP_VERSION_4) { + ret = glusterd_unlock (MY_UUID); + /* unlock cant/shouldnt fail here!! */ + if (ret) + gf_log (this->name, GF_LOG_CRITICAL, + "Unable to clear local lock, ret: %d", ret); + else + gf_log (this->name, GF_LOG_DEBUG, "Cleared local lock"); } else { - gf_log (this->name, GF_LOG_DEBUG, "Cleared local lock"); + ret = dict_get_str (ctx, "volname", &volname); + if (ret) + gf_log ("", GF_LOG_INFO, + "No Volume name present. " + "Locks have not been held."); + + if (volname) { + ret = glusterd_volume_unlock (volname, MY_UUID); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to release lock for %s", + volname); + } } ret = glusterd_op_send_cli_response (op, op_ret, @@ -3873,6 +4222,13 @@ glusterd_op_txn_complete () if (priv->pending_quorum_action) glusterd_do_quorum_action (); + + /* Clearing the transaction opinfo */ + ret = glusterd_clear_txn_opinfo (txn_id); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to clear transaction's opinfo"); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } @@ -3884,7 +4240,7 @@ glusterd_op_ac_unlocked_all (glusterd_op_sm_event_t *event, void *ctx) GF_ASSERT (event); - ret = glusterd_op_txn_complete (); + ret = glusterd_op_txn_complete (&event->txn_id); gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -3901,6 +4257,7 @@ glusterd_op_ac_stage_op (glusterd_op_sm_event_t *event, void *ctx) char *op_errstr = NULL; dict_t *dict = NULL; xlator_t *this = NULL; + uuid_t *txn_id = NULL; this = THIS; GF_ASSERT (this); @@ -3926,9 +4283,27 @@ glusterd_op_ac_stage_op (glusterd_op_sm_event_t *event, void *ctx) status); } + txn_id = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t); + + if (txn_id) + uuid_copy (*txn_id, event->txn_id); + else { + ret = -1; + goto out; + } + + ret = dict_set_bin (rsp_dict, "transaction_id", + txn_id, sizeof(uuid_t *)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set transaction id."); + goto out; + } + ret = glusterd_op_stage_send_resp (req_ctx->req, req_ctx->op, status, op_errstr, rsp_dict); +out: if (op_errstr && (strcmp (op_errstr, ""))) GF_FREE (op_errstr); @@ -3990,6 +4365,7 @@ glusterd_op_ac_commit_op (glusterd_op_sm_event_t *event, void *ctx) dict_t *dict = NULL; dict_t *rsp_dict = NULL; xlator_t *this = NULL; + uuid_t *txn_id = NULL; this = THIS; GF_ASSERT (this); @@ -4019,10 +4395,27 @@ glusterd_op_ac_commit_op (glusterd_op_sm_event_t *event, void *ctx) "'Volume %s' failed: %d", gd_op_list[req_ctx->op], status); + txn_id = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t); + + if (txn_id) + uuid_copy (*txn_id, event->txn_id); + else { + ret = -1; + goto out; + } + + ret = dict_set_bin (rsp_dict, "transaction_id", + txn_id, sizeof(uuid_t)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set transaction id."); + goto out; + } + ret = glusterd_op_commit_send_resp (req_ctx->req, req_ctx->op, status, op_errstr, rsp_dict); - glusterd_op_fini_ctx (); +out: if (op_errstr && (strcmp (op_errstr, ""))) GF_FREE (op_errstr); @@ -4051,7 +4444,6 @@ glusterd_op_ac_send_commit_failed (glusterd_op_sm_event_t *event, void *ctx) opinfo.op_ret, opinfo.op_errstr, op_ctx); - glusterd_op_fini_ctx (); if (opinfo.op_errstr && (strcmp (opinfo.op_errstr, ""))) { GF_FREE (opinfo.op_errstr); opinfo.op_errstr = NULL; @@ -5233,7 +5625,8 @@ glusterd_op_ac_send_brick_op (glusterd_op_sm_event_t *event, void *ctx) if (!opinfo.pending_count && !opinfo.brick_pending_count) { glusterd_clear_pending_nodes (&opinfo.pending_bricks); - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, req_ctx); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, + &event->txn_id, req_ctx); } out: @@ -5287,7 +5680,8 @@ glusterd_op_ac_rcvd_brick_op_acc (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.brick_pending_count > 0) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, ev_ctx->commit_ctx); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, &event->txn_id, + ev_ctx->commit_ctx); out: if (ev_ctx->rsp_dict) @@ -5661,7 +6055,7 @@ glusterd_op_sm_new_event (glusterd_op_sm_event_type_t event_type, int glusterd_op_sm_inject_event (glusterd_op_sm_event_type_t event_type, - void *ctx) + uuid_t *txn_id, void *ctx) { int32_t ret = -1; glusterd_op_sm_event_t *event = NULL; @@ -5676,6 +6070,9 @@ glusterd_op_sm_inject_event (glusterd_op_sm_event_type_t event_type, event->ctx = ctx; + if (txn_id) + uuid_copy (event->txn_id, *txn_id); + gf_log (THIS->name, GF_LOG_DEBUG, "Enqueue event: '%s'", glusterd_op_sm_event_name_get (event->event)); list_add_tail (&event->list, &gd_op_sm_queue); @@ -5736,6 +6133,7 @@ glusterd_op_sm () glusterd_op_sm_t *state = NULL; glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; xlator_t *this = NULL; + glusterd_op_info_t txn_op_info; this = THIS; GF_ASSERT (this); @@ -5756,6 +6154,20 @@ glusterd_op_sm () "type: '%s'", glusterd_op_sm_event_name_get(event_type)); + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", + uuid_utoa (event->txn_id)); + + ret = glusterd_get_txn_opinfo (&event->txn_id, + &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to get transaction's opinfo"); + glusterd_destroy_op_event_ctx (event); + GF_FREE (event); + continue; + } else + opinfo = txn_op_info; + state = glusterd_op_state_table[opinfo.state.state]; GF_ASSERT (state); @@ -5786,8 +6198,27 @@ glusterd_op_sm () return ret; } + if ((state[event_type].next_state == + GD_OP_STATE_DEFAULT) && + (event_type == GD_OP_EVENT_UNLOCK)) { + /* Clearing the transaction opinfo */ + ret = glusterd_clear_txn_opinfo(&event->txn_id); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to clear " + "transaction's opinfo"); + } else { + ret = glusterd_set_txn_opinfo (&event->txn_id, + &opinfo); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to set " + "transaction's opinfo"); + } + glusterd_destroy_op_event_ctx (event); GF_FREE (event); + } } @@ -5841,52 +6272,6 @@ glusterd_op_clear_op (glusterd_op_t op) } int32_t -glusterd_op_init_ctx (glusterd_op_t op) -{ - int ret = 0; - dict_t *dict = NULL; - xlator_t *this = NULL; - - this = THIS; - GF_ASSERT (this); - GF_ASSERT (GD_OP_NONE < op && op < GD_OP_MAX); - - if (_gf_false == glusterd_need_brick_op (op)) { - gf_log (this->name, GF_LOG_DEBUG, "Received op: %s, returning", - gd_op_list[op]); - goto out; - } - dict = dict_new (); - if (dict == NULL) { - ret = -1; - goto out; - } - ret = glusterd_op_set_ctx (dict); - if (ret) - goto out; -out: - gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); - return ret; -} - - - -int32_t -glusterd_op_fini_ctx () -{ - dict_t *dict = NULL; - - dict = glusterd_op_get_ctx (); - if (dict) - dict_unref (dict); - - glusterd_op_reset_ctx (); - return 0; -} - - - -int32_t glusterd_op_free_ctx (glusterd_op_t op, void *ctx) { diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h index 1125368ce..53d4e2ff4 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h @@ -77,6 +77,7 @@ struct glusterd_op_sm_event_ { struct list_head list; void *ctx; glusterd_op_sm_event_type_t event; + uuid_t txn_id; }; typedef struct glusterd_op_sm_event_ glusterd_op_sm_event_t; @@ -119,6 +120,7 @@ typedef struct glusterd_op_log_filename_ctx_ glusterd_op_log_filename_ctx_t; struct glusterd_op_lock_ctx_ { uuid_t uuid; + dict_t *dict; rpcsvc_request_t *req; }; @@ -180,7 +182,7 @@ glusterd_op_sm_new_event (glusterd_op_sm_event_type_t event_type, glusterd_op_sm_event_t **new_event); int glusterd_op_sm_inject_event (glusterd_op_sm_event_type_t event_type, - void *ctx); + uuid_t *txn_id, void *ctx); int glusterd_op_sm_init (); @@ -264,10 +266,7 @@ glusterd_op_init_commit_rsp_dict (glusterd_op_t op); void glusterd_op_modify_op_ctx (glusterd_op_t op, void *op_ctx); -int32_t -glusterd_op_init_ctx (glusterd_op_t op); -int32_t -glusterd_op_fini_ctx (); + int32_t glusterd_volume_stats_read_perf (char *brick_path, int32_t blk_size, int32_t blk_count, double *throughput, double *time); @@ -296,4 +295,16 @@ int glusterd_is_valid_vg (glusterd_brickinfo_t *brick, int check_tag, char *msg); #endif +int32_t +glusterd_get_txn_opinfo (uuid_t *txn_id, glusterd_op_info_t *opinfo); + +int32_t +glusterd_set_txn_opinfo (uuid_t *txn_id, glusterd_op_info_t *opinfo); + +int32_t +glusterd_clear_txn_opinfo (uuid_t *txn_id); + +int32_t +glusterd_generate_txn_id (dict_t *dict, uuid_t **txn_id); + #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-quota.c b/xlators/mgmt/glusterd/src/glusterd-quota.c index d13533aa6..cf23b6404 100644 --- a/xlators/mgmt/glusterd/src/glusterd-quota.c +++ b/xlators/mgmt/glusterd/src/glusterd-quota.c @@ -824,7 +824,7 @@ glusterd_quota_limit_usage (glusterd_volinfo_t *volinfo, dict_t *dict, } } - if (is_origin_glusterd ()) { + if (is_origin_glusterd (dict)) { ret = glusterd_set_quota_limit (volinfo->volname, path, hard_limit, soft_limit, op_errstr); @@ -919,7 +919,7 @@ glusterd_quota_remove_limits (glusterd_volinfo_t *volinfo, dict_t *dict, if (ret) goto out; - if (is_origin_glusterd ()) { + if (is_origin_glusterd (dict)) { ret = glusterd_remove_quota_limit (volinfo->volname, path, op_errstr); if (ret) @@ -1385,7 +1385,7 @@ glusterd_op_stage_quota (dict_t *dict, char **op_errstr, dict_t *rsp_dict) case GF_QUOTA_OPTION_TYPE_ENABLE: case GF_QUOTA_OPTION_TYPE_LIST: /* Fuse mount req. only for enable & list-usage options*/ - if (is_origin_glusterd () && + if (is_origin_glusterd (dict) && !glusterd_is_fuse_available ()) { *op_errstr = gf_strdup ("Fuse unavailable"); ret = -1; diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index b28056135..b274e3367 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -545,7 +545,7 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr) case GF_DEFRAG_CMD_START: case GF_DEFRAG_CMD_START_LAYOUT_FIX: case GF_DEFRAG_CMD_START_FORCE: - if (is_origin_glusterd ()) { + if (is_origin_glusterd (dict)) { op_ctx = glusterd_op_get_ctx (); if (!op_ctx) { ret = -1; diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c index 5c3fc2d82..f26108f89 100644 --- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c +++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c @@ -31,6 +31,7 @@ DEFAULT_VAR_RUN_DIRECTORY"/%s-"RB_CLIENT_MOUNTPOINT, \ volinfo->volname); +extern uuid_t global_txn_id; int glusterd_get_replace_op_str (gf1_cli_replace_op op, char *op_str) @@ -325,7 +326,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, ret = -1; goto out; } - if (is_origin_glusterd ()) { + if (is_origin_glusterd (dict)) { if (!ctx) { ret = -1; gf_log (this->name, GF_LOG_ERROR, @@ -1631,7 +1632,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) /* Set task-id, if available, in op_ctx dict for operations * other than start */ - if (is_origin_glusterd ()) { + if (is_origin_glusterd (dict)) { ctx = glusterd_op_get_ctx(); if (!ctx) { gf_log (this->name, GF_LOG_ERROR, "Failed to " @@ -1894,6 +1895,7 @@ glusterd_do_replace_brick (void *data) glusterd_brickinfo_t *src_brickinfo = NULL; glusterd_brickinfo_t *dst_brickinfo = NULL; glusterd_conf_t *priv = NULL; + uuid_t *txn_id = &global_txn_id; int ret = 0; @@ -1913,6 +1915,10 @@ glusterd_do_replace_brick (void *data) gf_log ("", GF_LOG_DEBUG, "Replace brick operation detected"); + ret = dict_get_bin (dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + ret = dict_get_int32 (dict, "operation", &op); if (ret) { gf_log ("", GF_LOG_DEBUG, @@ -2008,9 +2014,11 @@ glusterd_do_replace_brick (void *data) out: if (ret) - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, + txn_id, NULL); else - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_ACC, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_ACC, + txn_id, NULL); synclock_lock (&priv->big_lock); { diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c index cd81383e9..9af26cfab 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c @@ -33,6 +33,7 @@ extern glusterd_op_info_t opinfo; +extern uuid_t global_txn_id; int32_t glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret, @@ -574,6 +575,7 @@ __glusterd_cluster_lock_cbk (struct rpc_req *req, struct iovec *iov, glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; glusterd_peerinfo_t *peerinfo = NULL; xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; this = THIS; GF_ASSERT (this); @@ -618,7 +620,7 @@ out: event_type = GD_OP_EVENT_RCVD_ACC; } - ret = glusterd_op_sm_inject_event (event_type, NULL); + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); if (!ret) { glusterd_friend_sm (); @@ -637,6 +639,168 @@ glusterd_cluster_lock_cbk (struct rpc_req *req, struct iovec *iov, __glusterd_cluster_lock_cbk); } +static int32_t +glusterd_vol_lock_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + gd1_mgmt_volume_lock_rsp rsp = {{0},}; + int ret = -1; + int32_t op_ret = -1; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + uuid_t *txn_id = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + if (-1 == req->rpc_status) { + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_volume_lock_rsp); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to decode volume lock " + "response received from peer"); + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + + op_ret = rsp.op_ret; + + txn_id = &rsp.txn_id; + + gf_log (this->name, (op_ret) ? GF_LOG_ERROR : GF_LOG_DEBUG, + "Received volume lock %s from uuid: %s", + (op_ret) ? "RJT" : "ACC", uuid_utoa (rsp.uuid)); + + ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "Volume lock response received " + "from unknown peer: %s. Ignoring response", + uuid_utoa (rsp.uuid)); + goto out; + } + + if (op_ret) { + event_type = GD_OP_EVENT_RCVD_RJT; + opinfo.op_ret = op_ret; + opinfo.op_errstr = gf_strdup ("Another transaction could be in " + "progress. Please try again after" + " sometime."); + } else { + event_type = GD_OP_EVENT_RCVD_ACC; + } + + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); + + if (!ret) { + glusterd_friend_sm (); + glusterd_op_sm (); + } + +out: + GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); + return ret; +} + +int32_t +glusterd_vol_lock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + glusterd_vol_lock_cbk_fn); +} + +static int32_t +glusterd_vol_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + gd1_mgmt_volume_unlock_rsp rsp = {{0},}; + int ret = -1; + int32_t op_ret = -1; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + uuid_t *txn_id = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + if (-1 == req->rpc_status) { + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_volume_unlock_rsp); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to decode volume unlock " + "response received from peer"); + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + + op_ret = rsp.op_ret; + + txn_id = &rsp.txn_id; + + gf_log (this->name, (op_ret) ? GF_LOG_ERROR : GF_LOG_DEBUG, + "Received volume unlock %s from uuid: %s", + (op_ret) ? "RJT" : "ACC", + uuid_utoa (rsp.uuid)); + + ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); + + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "Volume unlock response received " + "from unknown peer: %s. Ignoring response", + uuid_utoa (rsp.uuid)); + goto out; + } + + if (op_ret) { + event_type = GD_OP_EVENT_RCVD_RJT; + opinfo.op_ret = op_ret; + opinfo.op_errstr = gf_strdup ("Another transaction could be in " + "progress. Please try again after" + " sometime."); + } else { + event_type = GD_OP_EVENT_RCVD_ACC; + } + + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); + + if (!ret) { + glusterd_friend_sm (); + glusterd_op_sm (); + } + +out: + GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); + return ret; +} + +int32_t +glusterd_vol_unlock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + glusterd_vol_unlock_cbk_fn); +} + int32_t __glusterd_cluster_unlock_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) @@ -647,6 +811,7 @@ __glusterd_cluster_unlock_cbk (struct rpc_req *req, struct iovec *iov, glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; glusterd_peerinfo_t *peerinfo = NULL; xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; this = THIS; GF_ASSERT (this); @@ -688,7 +853,7 @@ out: event_type = GD_OP_EVENT_RCVD_ACC; } - ret = glusterd_op_sm_inject_event (event_type, NULL); + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); if (!ret) { glusterd_friend_sm (); @@ -720,6 +885,7 @@ __glusterd_stage_op_cbk (struct rpc_req *req, struct iovec *iov, char err_str[2048] = {0}; char *peer_str = NULL; xlator_t *this = NULL; + uuid_t *txn_id = NULL; this = THIS; GF_ASSERT (this); @@ -772,11 +938,17 @@ out: "Received stage %s from uuid: %s", (op_ret) ? "RJT" : "ACC", uuid_utoa (rsp.uuid)); + ret = dict_get_bin (dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); if (ret) { gf_log (this->name, GF_LOG_CRITICAL, "Stage response received " - "from unknown peer: %s", uuid_utoa (rsp.uuid)); + "from unknown peer: %s. Ignoring response.", + uuid_utoa (rsp.uuid)); + goto out; } if (op_ret) { @@ -807,7 +979,7 @@ out: break; } - ret = glusterd_op_sm_inject_event (event_type, NULL); + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); if (!ret) { glusterd_friend_sm (); @@ -847,6 +1019,8 @@ __glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov, char err_str[2048] = {0}; char *peer_str = NULL; xlator_t *this = NULL; + uuid_t *txn_id = NULL; + this = THIS; GF_ASSERT (this); @@ -900,6 +1074,10 @@ __glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov, "Received commit %s from uuid: %s", (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid)); + ret = dict_get_bin (dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); if (ret) { @@ -979,7 +1157,7 @@ __glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov, } out: - ret = glusterd_op_sm_inject_event (event_type, NULL); + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); if (!ret) { glusterd_friend_sm (); @@ -1216,6 +1394,107 @@ out: } int32_t +glusterd_vol_lock (call_frame_t *frame, xlator_t *this, + void *data) +{ + gd1_mgmt_volume_lock_req req = {{0},}; + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + call_frame_t *dummy_frame = NULL; + dict_t *dict = NULL; + + if (!this) + goto out; + + dict = data; + + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo)); + if (ret) + goto out; + + //peerinfo should not be in payload + dict_del (dict, "peerinfo"); + + glusterd_get_uuid (&req.uuid); + + ret = dict_allocate_and_serialize (dict, &req.dict.dict_val, + &req.dict.dict_len); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to serialize dict " + "to request buffer"); + goto out; + } + + dummy_frame = create_frame (this, this->ctx->pool); + if (!dummy_frame) + goto out; + + ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame, + peerinfo->mgmt_v3, + GLUSTERD_MGMT_V3_VOLUME_LOCK, NULL, + this, glusterd_vol_lock_cbk, + (xdrproc_t)xdr_gd1_mgmt_volume_lock_req); +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_vol_unlock (call_frame_t *frame, xlator_t *this, + void *data) +{ + gd1_mgmt_volume_unlock_req req = {{0},}; + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + call_frame_t *dummy_frame = NULL; + dict_t *dict = NULL; + + if (!this) + goto out; + + dict = data; + + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo)); + if (ret) + goto out; + + //peerinfo should not be in payload + dict_del (dict, "peerinfo"); + + glusterd_get_uuid (&req.uuid); + + ret = dict_allocate_and_serialize (dict, &req.dict.dict_val, + &req.dict.dict_len); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to serialize dict " + "to request buffer"); + goto out; + } + + dummy_frame = create_frame (this, this->ctx->pool); + if (!dummy_frame) + goto out; + + ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame, + peerinfo->mgmt_v3, + GLUSTERD_MGMT_V3_VOLUME_UNLOCK, NULL, + this, glusterd_vol_unlock_cbk, + (xdrproc_t) + xdr_gd1_mgmt_volume_unlock_req); +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t glusterd_cluster_unlock (call_frame_t *frame, xlator_t *this, void *data) { @@ -1378,6 +1657,7 @@ __glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov, glusterd_req_ctx_t *req_ctx = NULL; glusterd_pending_node_t *node = NULL; xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; this = THIS; GF_ASSERT (this); @@ -1440,6 +1720,11 @@ __glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov, } } out: + + ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + ev_ctx = GF_CALLOC (1, sizeof (*ev_ctx), gf_gld_mt_brick_rsp_ctx_t); GF_ASSERT (ev_ctx); if (op_ret) { @@ -1452,7 +1737,7 @@ out: ev_ctx->pending_node = frame->cookie; ev_ctx->rsp_dict = dict; ev_ctx->commit_ctx = frame->local; - ret = glusterd_op_sm_inject_event (event_type, ev_ctx); + ret = glusterd_op_sm_inject_event (event_type, txn_id, ev_ctx); if (!ret) { glusterd_friend_sm (); glusterd_op_sm (); @@ -1477,6 +1762,7 @@ int32_t glusterd_brick_op (call_frame_t *frame, xlator_t *this, void *data) { + gd1_mgmt_brick_op_req *req = NULL; int ret = 0; glusterd_conf_t *priv = NULL; @@ -1487,6 +1773,7 @@ glusterd_brick_op (call_frame_t *frame, xlator_t *this, glusterd_req_ctx_t *req_ctx = NULL; struct rpc_clnt *rpc = NULL; dict_t *op_ctx = NULL; + uuid_t *txn_id = &global_txn_id; if (!this) { ret = -1; @@ -1509,6 +1796,11 @@ glusterd_brick_op (call_frame_t *frame, xlator_t *this, goto out; } + + ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + list_for_each_entry (pending_node, &opinfo.pending_bricks, list) { dummy_frame = create_frame (this, this->ctx->pool); if (!dummy_frame) @@ -1587,7 +1879,8 @@ glusterd_brick_op (call_frame_t *frame, xlator_t *this, out: if (ret) { - glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, data); + glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, + txn_id, data); opinfo.op_ret = ret; } gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -1615,6 +1908,12 @@ struct rpc_clnt_procedure gd_mgmt_actors[GLUSTERD_MGMT_MAXVALUE] = { [GLUSTERD_MGMT_COMMIT_OP] = {"COMMIT_OP", glusterd_commit_op}, }; +struct rpc_clnt_procedure gd_mgmt_v3_actors[GLUSTERD_MGMT_V3_MAXVALUE] = { + [GLUSTERD_MGMT_V3_NULL] = {"NULL", NULL }, + [GLUSTERD_MGMT_V3_VOLUME_LOCK] = {"VOLUME_LOCK", glusterd_vol_lock}, + [GLUSTERD_MGMT_V3_VOLUME_UNLOCK] = {"VOLUME_UNLOCK", glusterd_vol_unlock}, +}; + struct rpc_clnt_program gd_mgmt_prog = { .progname = "glusterd mgmt", .prognum = GD_MGMT_PROGRAM, @@ -1639,4 +1938,10 @@ struct rpc_clnt_program gd_peer_prog = { .numproc = GLUSTERD_FRIEND_MAXVALUE, }; - +struct rpc_clnt_program gd_mgmt_v3_prog = { + .progname = "glusterd mgmt v3", + .prognum = GD_MGMT_PROGRAM, + .progver = GD_MGMT_V3_VERSION, + .proctable = gd_mgmt_v3_actors, + .numproc = GLUSTERD_MGMT_V3_MAXVALUE, +}; diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.h b/xlators/mgmt/glusterd/src/glusterd-sm.h index e29bb7277..b9bedbe69 100644 --- a/xlators/mgmt/glusterd/src/glusterd-sm.h +++ b/xlators/mgmt/glusterd/src/glusterd-sm.h @@ -97,6 +97,7 @@ struct glusterd_peerinfo_ { struct rpc_clnt *rpc; rpc_clnt_prog_t *mgmt; rpc_clnt_prog_t *peer; + rpc_clnt_prog_t *mgmt_v3; int connected; gf_store_handle_t *shandle; glusterd_sm_tr_log_t sm_log; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 2c2fc6fb4..0ee430969 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -2085,7 +2085,8 @@ glusterd_store_retrieve_volume (char *volname) goto out; - list_add_tail (&volinfo->vol_list, &priv->volumes); + list_add_order (&volinfo->vol_list, &priv->volumes, + glusterd_compare_volume_name); out: gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c index c975d01f1..5eb5e9f38 100644 --- a/xlators/mgmt/glusterd/src/glusterd-syncop.c +++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c @@ -17,6 +17,9 @@ #include "glusterd.h" #include "glusterd-op-sm.h" #include "glusterd-utils.h" +#include "glusterd-locks.h" + +extern glusterd_op_info_t opinfo; static inline void gd_synctask_barrier_wait (struct syncargs *args, int count) @@ -31,6 +34,62 @@ gd_synctask_barrier_wait (struct syncargs *args, int count) } static void +gd_mgmt_v3_collate_errors (struct syncargs *args, int op_ret, int op_errno, + char *op_errstr, int op_code, + glusterd_peerinfo_t *peerinfo, u_char *uuid) +{ + char err_str[PATH_MAX] = "Please check log file for details."; + char op_err[PATH_MAX] = ""; + char *peer_str = NULL; + + if (op_ret) { + args->op_ret = op_ret; + args->op_errno = op_errno; + + if (peerinfo) + peer_str = peerinfo->hostname; + else + peer_str = uuid_utoa (uuid); + + if (op_errstr && strcmp (op_errstr, "")) + snprintf (err_str, sizeof(err_str) - 1, + "Error: %s", op_errstr); + + switch (op_code) { + case GLUSTERD_MGMT_V3_VOLUME_LOCK: + { + snprintf (op_err, sizeof(op_err) - 1, + "Locking volume failed " + "on %s. %s", peer_str, err_str); + break; + } + case GLUSTERD_MGMT_V3_VOLUME_UNLOCK: + { + snprintf (op_err, sizeof(op_err) - 1, + "Unlocking volume failed " + "on %s. %s", peer_str, err_str); + break; + } + } + + if (args->errstr) { + snprintf (err_str, sizeof(err_str) - 1, + "%s\n%s", args->errstr, + op_err); + GF_FREE (args->errstr); + args->errstr = NULL; + } else + snprintf (err_str, sizeof(err_str) - 1, + "%s", op_err); + + gf_log ("", GF_LOG_ERROR, "%s", op_err); + args->errstr = gf_strdup (err_str); + } + + return; +} + +static void gd_collate_errors (struct syncargs *args, int op_ret, int op_errno, char *op_errstr, int op_code, glusterd_peerinfo_t *peerinfo, u_char *uuid) @@ -207,6 +266,7 @@ out: /* Defined in glusterd-rpc-ops.c */ extern struct rpc_clnt_program gd_mgmt_prog; extern struct rpc_clnt_program gd_brick_prog; +extern struct rpc_clnt_program gd_mgmt_v3_prog; static int glusterd_syncop_aggr_rsp_dict (glusterd_op_t op, dict_t *aggr, dict_t *rsp) @@ -287,6 +347,185 @@ out: } int32_t +_gd_syncop_mgmt_volume_lock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_volume_lock_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int op_ret = -1; + int op_errno = -1; + + GF_ASSERT(req); + GF_ASSERT(iov); + GF_ASSERT(myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_volume_lock_rsp); + if (ret < 0) + goto out; + + uuid_copy (args->uuid, rsp.uuid); + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; +out: + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_VOLUME_LOCK, + peerinfo, rsp.uuid); + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_syncop_mgmt_volume_lock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + _gd_syncop_mgmt_volume_lock_cbk); +} + +int +gd_syncop_mgmt_volume_lock (glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid, uuid_t txn_id) +{ + int ret = -1; + gd1_mgmt_volume_lock_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + + GF_ASSERT(op_ctx); + GF_ASSERT(peerinfo); + GF_ASSERT(args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + uuid_copy (req.txn_id, txn_id); + req.op = op; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_VOLUME_LOCK, + gd_syncop_mgmt_volume_lock_cbk, + (xdrproc_t) + xdr_gd1_mgmt_volume_lock_req); + synclock_lock (&conf->big_lock); +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +_gd_syncop_mgmt_volume_unlock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_volume_unlock_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int op_ret = -1; + int op_errno = -1; + + GF_ASSERT(req); + GF_ASSERT(iov); + GF_ASSERT(myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_volume_unlock_rsp); + if (ret < 0) + goto out; + + uuid_copy (args->uuid, rsp.uuid); + + /* Set peer as locked, so we unlock only the locked peers */ + if (rsp.op_ret == 0) + peerinfo->locked = _gf_true; + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; +out: + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_VOLUME_UNLOCK, + peerinfo, rsp.uuid); + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_syncop_mgmt_volume_unlock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + _gd_syncop_mgmt_volume_unlock_cbk); +} + +int +gd_syncop_mgmt_volume_unlock (dict_t *op_ctx, glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid, uuid_t txn_id) +{ + int ret = -1; + gd1_mgmt_volume_unlock_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + + GF_ASSERT(op_ctx); + GF_ASSERT(peerinfo); + GF_ASSERT(args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + uuid_copy (req.txn_id, txn_id); + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_VOLUME_UNLOCK, + gd_syncop_mgmt_volume_unlock_cbk, + (xdrproc_t) + xdr_gd1_mgmt_volume_unlock_req); + synclock_lock (&conf->big_lock); +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t _gd_syncop_mgmt_lock_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { @@ -830,8 +1069,8 @@ gd_build_peers_list (struct list_head *peers, struct list_head *xact_peers, } int -gd_lock_op_phase (struct list_head *peers, glusterd_op_t op, dict_t *op_ctx, - char **op_errstr, int npeers) +gd_lock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, dict_t *op_ctx, + char **op_errstr, int npeers, uuid_t txn_id) { int ret = -1; int peer_cnt = 0; @@ -839,6 +1078,9 @@ gd_lock_op_phase (struct list_head *peers, glusterd_op_t op, dict_t *op_ctx, xlator_t *this = NULL; glusterd_peerinfo_t *peerinfo = NULL; struct syncargs args = {0}; + struct list_head *peers = NULL; + + peers = &conf->xaction_peers; if (!npeers) { ret = 0; @@ -849,22 +1091,38 @@ gd_lock_op_phase (struct list_head *peers, glusterd_op_t op, dict_t *op_ctx, synctask_barrier_init((&args)); peer_cnt = 0; list_for_each_entry (peerinfo, peers, op_peers_list) { - /* Reset lock status */ - peerinfo->locked = _gf_false; - gd_syncop_mgmt_lock (peerinfo, &args, MY_UUID, peer_uuid); + if (conf->op_version < GD_OP_VERSION_4) { + /* Reset lock status */ + peerinfo->locked = _gf_false; + gd_syncop_mgmt_lock (peerinfo, &args, + MY_UUID, peer_uuid); + } else + gd_syncop_mgmt_volume_lock (op, op_ctx, peerinfo, &args, + MY_UUID, peer_uuid, txn_id); peer_cnt++; } gd_synctask_barrier_wait((&args), peer_cnt); - ret = args.op_ret; - if (ret) { - gf_asprintf (op_errstr, "Another transaction could be " - "in progress. Please try again after " - "sometime."); - gf_log (this->name, GF_LOG_ERROR, "Failed to acquire lock"); - goto out; + + if (args.op_ret) { + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + else { + ret = gf_asprintf (op_errstr, "Another transaction " + "could be in progress. Please try " + "again after sometime."); + if (ret == -1) + *op_errstr = NULL; + + gf_log (this->name, GF_LOG_ERROR, + "Failed to acquire lock"); + + } } - ret = 0; + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent lock op req for 'Volume %s' " + "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret); out: return ret; } @@ -1055,9 +1313,10 @@ out: } int -gd_unlock_op_phase (struct list_head *peers, glusterd_op_t op, int op_ret, +gd_unlock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, int op_ret, rpcsvc_request_t *req, dict_t *op_ctx, char *op_errstr, - int npeers, gf_boolean_t is_locked) + int npeers, char *volname, gf_boolean_t is_acquired, + uuid_t txn_id) { glusterd_peerinfo_t *peerinfo = NULL; glusterd_peerinfo_t *tmp = NULL; @@ -1066,6 +1325,9 @@ gd_unlock_op_phase (struct list_head *peers, glusterd_op_t op, int op_ret, int ret = -1; xlator_t *this = NULL; struct syncargs args = {0}; + struct list_head *peers = NULL; + + peers = &conf->xaction_peers; if (!npeers) { ret = 0; @@ -1074,24 +1336,40 @@ gd_unlock_op_phase (struct list_head *peers, glusterd_op_t op, int op_ret, /* If the lock has not been held during this * transaction, do not send unlock requests */ - if (!is_locked) + if (!is_acquired) goto out; this = THIS; synctask_barrier_init((&args)); peer_cnt = 0; - list_for_each_entry_safe (peerinfo, tmp, peers, op_peers_list) { - /* Only unlock peers that were locked */ - if (peerinfo->locked) { - gd_syncop_mgmt_unlock (peerinfo, &args, MY_UUID, - tmp_uuid); - peer_cnt++; + if (conf->op_version < GD_OP_VERSION_4) { + list_for_each_entry_safe (peerinfo, tmp, peers, op_peers_list) { + /* Only unlock peers that were locked */ + if (peerinfo->locked) { + gd_syncop_mgmt_unlock (peerinfo, &args, + MY_UUID, tmp_uuid); + peer_cnt++; + list_del_init (&peerinfo->op_peers_list); + } + } + } else { + if (volname) { + list_for_each_entry_safe (peerinfo, tmp, + peers, op_peers_list) { + gd_syncop_mgmt_volume_unlock (op_ctx, peerinfo, + &args, MY_UUID, + tmp_uuid, txn_id); + peer_cnt++; + list_del_init (&peerinfo->op_peers_list); + } } - - list_del_init (&peerinfo->op_peers_list); } gd_synctask_barrier_wait((&args), peer_cnt); + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent unlock op req for 'Volume %s' " + "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to unlock " "on some peer(s)"); @@ -1099,9 +1377,25 @@ gd_unlock_op_phase (struct list_head *peers, glusterd_op_t op, int op_ret, out: glusterd_op_send_cli_response (op, op_ret, 0, req, op_ctx, op_errstr); - glusterd_op_clear_op (op); - if (is_locked) - glusterd_unlock (MY_UUID); + + if (is_acquired) { + /* Based on the op-version, + * we release the cluster or volume lock + * and clear the op */ + + glusterd_op_clear_op (op); + if (conf->op_version < GD_OP_VERSION_4) + glusterd_unlock (MY_UUID); + else { + if (volname) { + ret = glusterd_volume_unlock (volname, MY_UUID); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to release lock for %s", + volname); + } + } + } return 0; } @@ -1118,7 +1412,8 @@ gd_get_brick_count (struct list_head *bricks) } int -gd_brick_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, char **op_errstr) +gd_brick_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + char **op_errstr) { glusterd_pending_node_t *pending_node = NULL; struct list_head selected = {0,}; @@ -1190,21 +1485,43 @@ out: void gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req) { - int ret = -1; - int npeers = 0; - dict_t *req_dict = NULL; - glusterd_conf_t *conf = NULL; - glusterd_op_t op = 0; - int32_t tmp_op = 0; - char *op_errstr = NULL; - xlator_t *this = NULL; - gf_boolean_t is_locked = _gf_false; + int ret = -1; + int npeers = 0; + dict_t *req_dict = NULL; + glusterd_conf_t *conf = NULL; + glusterd_op_t op = 0; + int32_t tmp_op = 0; + char *op_errstr = NULL; + char *tmp = NULL; + char *volname = NULL; + xlator_t *this = NULL; + gf_boolean_t is_acquired = _gf_false; + uuid_t *txn_id = NULL; + glusterd_op_info_t txn_opinfo; this = THIS; GF_ASSERT (this); conf = this->private; GF_ASSERT (conf); + /* Generate a transaction-id for this operation and + * save it in the dict */ + ret = glusterd_generate_txn_id (op_ctx, &txn_id); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to generate transaction id"); + goto out; + + } + + /* Save the MY_UUID as the originator_uuid */ + ret = glusterd_set_originator_uuid (op_ctx); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set originator_uuid."); + goto out; + } + ret = dict_get_int32 (op_ctx, GD_SYNC_OPCODE_KEY, &tmp_op); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to get volume " @@ -1213,28 +1530,73 @@ gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req) } op = tmp_op; - ret = glusterd_lock (MY_UUID); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Unable to acquire lock"); - gf_asprintf (&op_errstr, "Another transaction is in progress. " - "Please try again after sometime."); - goto out; + + /* Based on the op_version, acquire a cluster or volume lock */ + if (conf->op_version < GD_OP_VERSION_4) { + ret = glusterd_lock (MY_UUID); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire lock"); + gf_asprintf (&op_errstr, + "Another transaction is in progress. " + "Please try again after sometime."); + goto out; + } + } else { + + /* If no volname is given as a part of the command, locks will + * not be held */ + ret = dict_get_str (op_ctx, "volname", &tmp); + if (ret) { + gf_log ("", GF_LOG_DEBUG, "Failed to get volume " + "name"); + goto local_locking_done; + } else { + /* Use a copy of volname, as cli response will be + * sent before the unlock, and the volname in the + * dict, might be removed */ + volname = gf_strdup (tmp); + if (!volname) + goto out; + } + + ret = glusterd_volume_lock (volname, MY_UUID); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire lock for %s", volname); + gf_asprintf (&op_errstr, + "Another transaction is in progress " + "for %s. Please try again after sometime.", + volname); + goto out; + } } - is_locked = _gf_true; + is_acquired = _gf_true; + +local_locking_done: + + /* Save opinfo for this transaction with the transaction id */ + glusterd_txn_opinfo_init (&txn_opinfo, NULL, &op, NULL, NULL); + ret = glusterd_set_txn_opinfo (txn_id, &txn_opinfo); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to set transaction's opinfo"); + + opinfo = txn_opinfo; - /* storing op globally to access in synctask code paths - * This is still acceptable, as we are performing this under - * the 'cluster' lock*/ - glusterd_op_set_op (op); INIT_LIST_HEAD (&conf->xaction_peers); npeers = gd_build_peers_list (&conf->peers, &conf->xaction_peers, op); - ret = gd_lock_op_phase (&conf->xaction_peers, op, op_ctx, &op_errstr, - npeers); - if (ret) - goto out; + /* If no volname is given as a part of the command, locks will + * not be held */ + if (volname) { + ret = gd_lock_op_phase (conf, op, op_ctx, &op_errstr, + npeers, *txn_id); + if (ret) + goto out; + } ret = glusterd_op_build_payload (&req_dict, &op_errstr, op_ctx); if (ret) { @@ -1261,8 +1623,17 @@ gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req) ret = 0; out: - (void) gd_unlock_op_phase (&conf->xaction_peers, op, ret, req, - op_ctx, op_errstr, npeers, is_locked); + (void) gd_unlock_op_phase (conf, op, ret, req, op_ctx, op_errstr, + npeers, volname, is_acquired, *txn_id); + + /* Clearing the transaction opinfo */ + ret = glusterd_clear_txn_opinfo (txn_id); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to clear transaction's opinfo"); + + if (volname) + GF_FREE (volname); if (req_dict) dict_unref (req_dict); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 21a973086..1eb03f1de 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -37,6 +37,7 @@ #include "glusterd-pmap.h" #include "glusterfs-acl.h" #include "glusterd-syncop.h" +#include "glusterd-locks.h" #include "xdr-generic.h" #include <sys/resource.h> @@ -1307,16 +1308,6 @@ out: return ret; } -/* Caller should ensure that brick process is not running*/ -static void -_reap_brick_process (char *pidfile, char *brickpath) -{ - unlink (pidfile); - /* Brick process is not running and pmap may have an entry for it.*/ - pmap_registry_remove (THIS, 0, brickpath, - GF_PMAP_PORT_BRICKSERVER, NULL); -} - static int _mk_rundir_p (glusterd_volinfo_t *volinfo) { @@ -1371,8 +1362,6 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, if (gf_is_service_running (pidfile, NULL)) goto connect; - _reap_brick_process (pidfile, brickinfo->path); - port = brickinfo->port; if (!port) port = pmap_registry_alloc (THIS); @@ -3544,7 +3533,8 @@ glusterd_import_friend_volume (dict_t *vols, size_t count) if (ret) goto out; - list_add_tail (&new_volinfo->vol_list, &priv->volumes); + list_add_order (&new_volinfo->vol_list, &priv->volumes, + glusterd_compare_volume_name); out: gf_log ("", GF_LOG_DEBUG, "Returning with ret: %d", ret); return ret; @@ -7911,7 +7901,7 @@ glusterd_volume_status_copy_to_op_ctx_dict (dict_t *aggr, dict_t *rsp_dict) if (ret) goto out; - if (cmd & GF_CLI_STATUS_ALL && is_origin_glusterd ()) { + if (cmd & GF_CLI_STATUS_ALL && is_origin_glusterd (ctx_dict)) { ret = dict_get_int32 (rsp_dict, "vol_count", &vol_count); if (ret == 0) { ret = dict_set_int32 (ctx_dict, "vol_count", @@ -8802,20 +8792,66 @@ glusterd_handle_node_rsp (dict_t *req_dict, void *pending_entry, return ret; } +int32_t +glusterd_set_originator_uuid (dict_t *dict) +{ + int ret = -1; + uuid_t *originator_uuid = NULL; + + GF_ASSERT (dict); + + originator_uuid = GF_CALLOC (1, sizeof(uuid_t), + gf_common_mt_uuid_t); + if (!originator_uuid) { + ret = -1; + goto out; + } + + uuid_copy (*originator_uuid, MY_UUID); + ret = dict_set_bin (dict, "originator_uuid", + originator_uuid, sizeof (uuid_t)); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Failed to set originator_uuid."); + goto out; + } + +out: + if (ret && originator_uuid) + GF_FREE (originator_uuid); + + return ret; +} + /* Should be used only when an operation is in progress, as that is the only * time a lock_owner is set */ gf_boolean_t -is_origin_glusterd () +is_origin_glusterd (dict_t *dict) { - int ret = 0; - uuid_t lock_owner = {0,}; + gf_boolean_t ret = _gf_false; + uuid_t lock_owner = {0,}; + uuid_t *originator_uuid = NULL; - ret = glusterd_get_lock_owner (&lock_owner); - if (ret) - return _gf_false; + GF_ASSERT (dict); + + ret = dict_get_bin (dict, "originator_uuid", + (void **) &originator_uuid); + if (ret) { + /* If not originator_uuid has been set, then the command + * has been originated from a glusterd running on older version + * Hence fetching the lock owner */ + ret = glusterd_get_lock_owner (&lock_owner); + if (ret) { + ret = _gf_false; + goto out; + } + ret = !uuid_compare (MY_UUID, lock_owner); + } else + ret = !uuid_compare (MY_UUID, *originator_uuid); - return (uuid_compare (MY_UUID, lock_owner) == 0); +out: + return ret; } int @@ -9352,3 +9388,13 @@ glusterd_rpc_clnt_unref (glusterd_conf_t *conf, rpc_clnt_t *rpc) return ret; } +int32_t +glusterd_compare_volume_name(struct list_head *list1, struct list_head *list2) +{ + glusterd_volinfo_t *volinfo1 = NULL; + glusterd_volinfo_t *volinfo2 = NULL; + + volinfo1 = list_entry(list1, glusterd_volinfo_t, vol_list); + volinfo2 = list_entry(list2, glusterd_volinfo_t, vol_list); + return strcmp(volinfo1->volname, volinfo2->volname); +} diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 05d5c7172..cd22b2960 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -529,11 +529,14 @@ glusterd_are_vol_all_peers_up (glusterd_volinfo_t *volinfo, struct list_head *peers, char **down_peerstr); +int32_t +glusterd_set_originator_uuid (dict_t *dict); + /* Should be used only when an operation is in progress, as that is the only * time a lock_owner is set */ gf_boolean_t -is_origin_glusterd (); +is_origin_glusterd (dict_t *dict); gf_boolean_t glusterd_is_quorum_changed (dict_t *options, char *option, char *value); @@ -644,4 +647,7 @@ gd_stop_rebalance_process (glusterd_volinfo_t *volinfo); rpc_clnt_t * glusterd_rpc_clnt_unref (glusterd_conf_t *conf, rpc_clnt_t *rpc); + +int32_t +glusterd_compare_volume_name(struct list_head *, struct list_head *); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 303abd1bc..102bd9d8b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -361,10 +361,9 @@ volopt_trie_section (int lvl, char **patt, char *word, char **hint, int hints) GF_ASSERT (hints <= 2); nodevec.cnt = hints; ret = trie_measure_vec (trie, word, &nodevec); - if (ret || !nodevec.nodes[0]) - trie_destroy (trie); + if (!ret && nodevec.nodes[0]) + ret = process_nodevec (&nodevec, hint); - ret = process_nodevec (&nodevec, hint); trie_destroy (trie); return ret; @@ -2839,6 +2838,9 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, xlator_t *xl = NULL; char *volname = NULL; glusterd_conf_t *conf = THIS->private; + char *tmp = NULL; + gf_boolean_t var = _gf_false; + gf_boolean_t ob = _gf_false; GF_ASSERT (conf); @@ -2904,7 +2906,93 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, } } - ret = client_graph_set_perf_options(graph, volinfo, set_dict); + /* Do not allow changing read-after-open option if root-squash is + enabled. + */ + ret = dict_get_str (set_dict, "performance.read-after-open", &tmp); + if (!ret) { + ret = dict_get_str (volinfo->dict, "server.root-squash", &tmp); + if (!ret) { + ob = _gf_false; + ret = gf_string2boolean (tmp, &ob); + if (!ret && ob) { + gf_log (THIS->name, GF_LOG_WARNING, + "root-squash is enabled. Please turn it" + " off to change read-after-open " + "option"); + ret = -1; + goto out; + } + } + } + + /* open behind causes problems when root-squash is enabled + (by allowing reads to happen even though the squashed user + does not have permissions to do so) as it fakes open to be + successful and later sends reads on anonymous fds. So when + root-squash is enabled, open-behind's option to read after + open is done is also enabled. + */ + ret = dict_get_str (set_dict, "server.root-squash", &tmp); + if (!ret) { + ret = gf_string2boolean (tmp, &var); + if (ret) + goto out; + + if (var) { + ret = dict_get_str (volinfo->dict, + "performance.read-after-open", + &tmp); + if (!ret) { + ret = gf_string2boolean (tmp, &ob); + /* go ahead with turning read-after-open on + even if string2boolean conversion fails, + OR if read-after-open option is turned off + */ + if (ret || !ob) + ret = dict_set_str (set_dict, + "performance.read-after-open", + "yes"); + } else { + ret = dict_set_str (set_dict, + "performance.read-after-open", + "yes"); + } + } else { + /* When root-squash has to be turned off, open-behind's + read-after-open option should be reset to what was + there before root-squash was turned on. If the option + cannot be found in volinfo's dict, it means that + option was not set before turning on root-squash. + */ + ob = _gf_false; + ret = dict_get_str (volinfo->dict, + "performance.read-after-open", + &tmp); + if (!ret) { + ret = gf_string2boolean (tmp, &ob); + + if (!ret && ob) { + ret = dict_set_str (set_dict, + "performance.read-after-open", + "yes"); + } + } + /* consider operation is failure only if read-after-open + option is enabled and could not set into set_dict + */ + if (!ob) + ret = 0; + } + if (ret) { + gf_log (THIS->name, GF_LOG_WARNING, "setting " + "open behind option as part of root " + "squash failed"); + goto out; + } + } + + ret = client_graph_set_perf_options(graph, volinfo, set_dict); if (ret) goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 051e7d756..4acea7686 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -1708,7 +1708,8 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr) } volinfo->rebal.defrag_status = 0; - list_add_tail (&volinfo->vol_list, &priv->volumes); + list_add_order (&volinfo->vol_list, &priv->volumes, + glusterd_compare_volume_name); vol_added = _gf_true; out: diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 8d29e8edc..4e3162f01 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -711,6 +711,12 @@ struct volopt_map_entry glusterd_volopt_map[] = { .op_version = 3, .flags = OPT_FLAG_CLIENT_OPT }, + { .key = "performance.read-after-open", + .voltype = "performance/open-behind", + .option = "read-after-open", + .op_version = 3, + .flags = OPT_FLAG_CLIENT_OPT + }, { .key = "performance.read-ahead-page-count", .voltype = "performance/read-ahead", .option = "page-count", diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index 834a39968..1b04143be 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -36,6 +36,7 @@ #include "glusterd-store.h" #include "glusterd-hooks.h" #include "glusterd-utils.h" +#include "glusterd-locks.h" #include "common-utils.h" #include "run.h" @@ -48,6 +49,7 @@ extern struct rpcsvc_program gluster_cli_getspec_prog; extern struct rpcsvc_program gluster_pmap_prog; extern glusterd_op_info_t opinfo; extern struct rpcsvc_program gd_svc_mgmt_prog; +extern struct rpcsvc_program gd_svc_mgmt_v3_prog; extern struct rpcsvc_program gd_svc_peer_prog; extern struct rpcsvc_program gd_svc_cli_prog; extern struct rpcsvc_program gd_svc_cli_prog_ro; @@ -64,6 +66,7 @@ struct rpcsvc_program *gd_inet_programs[] = { &gd_svc_peer_prog, &gd_svc_cli_prog_ro, &gd_svc_mgmt_prog, + &gd_svc_mgmt_v3_prog, &gluster_pmap_prog, &gluster_handshake_prog, &glusterd_mgmt_hndsk_prog, @@ -1303,6 +1306,8 @@ init (xlator_t *this) glusterd_friend_sm_init (); glusterd_op_sm_init (); glusterd_opinfo_init (); + glusterd_vol_lock_init (); + glusterd_txn_opinfo_dict_init (); ret = glusterd_sm_tr_log_init (&conf->op_sm_log, glusterd_op_sm_state_name_get, glusterd_op_sm_event_name_get, @@ -1438,6 +1443,8 @@ fini (xlator_t *this) if (conf->handle) gf_store_handle_destroy (conf->handle); glusterd_sm_tr_log_delete (&conf->op_sm_log); + glusterd_vol_lock_fini (); + glusterd_txn_opinfo_dict_fini (); GF_FREE (conf); this->private = NULL; diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index e704de44b..d041be4a2 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -545,6 +545,14 @@ int glusterd_op_unlock_send_resp (rpcsvc_request_t *req, int32_t status); int +glusterd_op_volume_lock_send_resp (rpcsvc_request_t *req, + uuid_t *txn_id, int32_t status); + +int +glusterd_op_volume_unlock_send_resp (rpcsvc_request_t *req, + uuid_t *txn_id, int32_t status); + +int glusterd_op_stage_send_resp (rpcsvc_request_t *req, int32_t op, int32_t status, char *op_errstr, dict_t *rsp_dict); @@ -817,4 +825,14 @@ int32_t glusterd_op_begin_synctask (rpcsvc_request_t *req, glusterd_op_t op, void *dict); int32_t glusterd_defrag_event_notify_handle (dict_t *dict); + +int32_t +glusterd_txn_opinfo_dict_init (); + +void +glusterd_txn_opinfo_dict_fini (); + +void +glusterd_txn_opinfo_init (); + #endif diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index 315259ece..d9055468e 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -5323,6 +5323,18 @@ init (xlator_t *this_xl) GF_OPTION_INIT ("congestion-threshold", priv->congestion_threshold, int32, cleanup_exit); + GF_OPTION_INIT("no-root-squash", priv->no_root_squash, bool, + cleanup_exit); + /* change the client_pid to no-root-squash pid only if the + client is none of defrag process, hadoop access and gsyncd process. + */ + if (!priv->client_pid_set) { + if (priv->no_root_squash == _gf_true) { + priv->client_pid_set = _gf_true; + priv->client_pid = GF_CLIENT_PID_NO_ROOT_SQUASH; + } + } + /* user has set only background-qlen, not congestion-threshold, use the fuse kernel driver formula to set congestion. ie, 75% */ if (dict_get (this_xl->options, "background-qlen") && @@ -5563,5 +5575,15 @@ struct volume_options options[] = { .type = GF_OPTION_TYPE_BOOL, .default_value = "yes" }, + { .key = {"no-root-squash"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", + .description = "This is the mount option for disabling the " + "root squash for the client irrespective of whether the root-squash " + "option for the volume is set or not. But this option is honoured " + "only for the trusted clients. For non trusted clients this value " + "does not have any affect and the volume option for root-squash is " + "honoured.", + }, { .key = {NULL} }, }; diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h index 34794b6ea..f1c4cb3f0 100644 --- a/xlators/mount/fuse/src/fuse-bridge.h +++ b/xlators/mount/fuse/src/fuse-bridge.h @@ -104,6 +104,14 @@ struct fuse_private { int32_t fopen_keep_cache; int32_t gid_cache_timeout; gf_boolean_t enable_ino32; + /* This is the mount option for disabling the root-squash for the + mount irrespective of whether the root-squash option for the + volume is set or not. But this option is honoured only for + thr trusted clients. For non trusted clients this value does + not have any affect and the volume option for root-squash is + honoured. + */ + gf_boolean_t no_root_squash; fdtable_t *fdtable; gid_cache_t gid_cache; char *fuse_mountopts; diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in index 2799ec847..d22f6a69b 100755 --- a/xlators/mount/fuse/utils/mount.glusterfs.in +++ b/xlators/mount/fuse/utils/mount.glusterfs.in @@ -151,6 +151,9 @@ start_glusterfs () if [ -n "$worm" ]; then cmd_line=$(echo "$cmd_line --worm"); fi + if [ -n "$volfile_max_fetch_attempts" ]; then + cmd_line=$(echo "$cmd_line --volfile-max-fetch-attempts=$volfile_max_fetch_attempts") + fi if [ -n "$fopen_keep_cache" ]; then cmd_line=$(echo "$cmd_line --fopen-keep-cache"); @@ -168,7 +171,11 @@ start_glusterfs () cmd_line=$(echo "$cmd_line --aux-gfid-mount"); fi - # options with values start here + if [ -n "$no_root_squash" ]; then + cmd_line=$(echo "$cmd_line --no-root-squash"); + fi + +#options with values start here if [ -n "$log_level" ]; then cmd_line=$(echo "$cmd_line --log-level=$log_level"); fi @@ -424,6 +431,9 @@ with_options() "backupvolfile-server") backupvolfile_server=$value ;; + "fetch-attempts") + volfile_max_fetch_attempts=$value + ;; "congestion-threshold") cong_threshold=$value ;; @@ -436,6 +446,13 @@ with_options() "use-readdirp") use_readdirp=$value ;; + "root-squash") + if [ $value == "no" ] || + [ $value == "off" ] || + [ $value == "disable" ] || + [ $value == "false" ] ; then + no_root_squash=1; + fi ;; *) echo "Invalid option: $key" exit 0 diff --git a/xlators/nfs/server/src/acl3.c b/xlators/nfs/server/src/acl3.c index 25476ebbe..43156eb44 100644 --- a/xlators/nfs/server/src/acl3.c +++ b/xlators/nfs/server/src/acl3.c @@ -229,13 +229,21 @@ acl3svc_null (rpcsvc_request_t *req) } int -acl3_getacl_reply (nfs3_call_state_t *cs, getaclreply *reply) +acl3_getacl_reply (rpcsvc_request_t *req, getaclreply *reply) { - acl3svc_submit_reply (cs->req, (void *)reply, + acl3svc_submit_reply (req, (void *)reply, (acl3_serializer)xdr_serialize_getaclreply); return 0; } +int +acl3_setacl_reply (rpcsvc_request_t *req, setaclreply *reply) +{ + acl3svc_submit_reply (req, (void *)reply, + (acl3_serializer)xdr_serialize_setaclreply); + return 0; +} + int acl3_getacl_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -300,14 +308,14 @@ acl3_getacl_cbk (call_frame_t *frame, void *cookie, xlator_t *this, getaclreply->daclentry.daclentry_len = aclcount; } - acl3_getacl_reply (cs, getaclreply); + acl3_getacl_reply (cs->req, getaclreply); nfs3_call_state_wipe (cs); return 0; err: if (getaclreply) getaclreply->status = stat; - acl3_getacl_reply (cs, getaclreply); + acl3_getacl_reply (cs->req, getaclreply); nfs3_call_state_wipe (cs); return 0; } @@ -354,7 +362,7 @@ acl3_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, return 0; err: getaclreply->status = stat; - acl3_getacl_reply (cs, getaclreply); + acl3_getacl_reply (cs->req, getaclreply); nfs3_call_state_wipe (cs); return 0; } @@ -382,7 +390,7 @@ acl3err: if (ret < 0) { gf_log (GF_ACL, GF_LOG_ERROR, "unable to open_and_resume"); cs->args.getaclreply.status = nfs3_errno_to_nfsstat3 (stat); - acl3_getacl_reply (cs, &cs->args.getaclreply); + acl3_getacl_reply (cs->req, &cs->args.getaclreply); nfs3_call_state_wipe (cs); } @@ -401,6 +409,7 @@ acl3svc_getacl (rpcsvc_request_t *req) nfsstat3 stat = NFS3ERR_SERVERFAULT; struct nfs3_fh fh, *fhp = NULL; getaclargs getaclargs; + getaclreply getaclreply; if (!req) return ret; @@ -408,6 +417,7 @@ acl3svc_getacl (rpcsvc_request_t *req) acl3_validate_nfs3_state (req, nfs3, stat, rpcerr, ret); nfs = nfs_state (nfs3->nfsx); memset (&getaclargs, 0, sizeof (getaclargs)); + memset (&getaclreply, 0, sizeof (getaclreply)); getaclargs.fh.n_bytes = (char *)&fh; if (xdr_to_getaclargs(req->msg[0], &getaclargs) <= 0) { gf_log (GF_ACL, GF_LOG_ERROR, "Error decoding args"); @@ -423,26 +433,23 @@ acl3svc_getacl (rpcsvc_request_t *req) fhp = &fh; acl3_validate_gluster_fh (&fh, stat, acl3err); - acl3_map_fh_to_volume (nfs->nfs3state, fhp, req, - vol, stat, acl3err); + acl3_map_fh_to_volume (nfs->nfs3state, fhp, req, vol, stat, acl3err); + acl3_volume_started_check (nfs3, vol, ret, rpcerr); acl3_handle_call_state_init (nfs->nfs3state, cs, req, - vol, stat, rpcerr); + vol, stat, acl3err); cs->vol = vol; cs->args.getaclreply.mask = getaclargs.mask; - acl3_volume_started_check (nfs3, vol, ret, acl3err); - ret = nfs3_fh_resolve_and_resume (cs, fhp, - NULL, acl3_getacl_resume); + ret = nfs3_fh_resolve_and_resume (cs, fhp, NULL, acl3_getacl_resume); + stat = nfs3_errno_to_nfsstat3 (-ret); acl3err: if (ret < 0) { gf_log (GF_ACL, GF_LOG_ERROR, "unable to resolve and resume"); - if (cs) { - cs->args.getaclreply.status = stat; - acl3_getacl_reply (cs, &cs->args.getaclreply); - nfs3_call_state_wipe (cs); - } + getaclreply.status = stat; + acl3_getacl_reply (req, &getaclreply); + nfs3_call_state_wipe (cs); return 0; } @@ -462,8 +469,8 @@ acl3_setacl_cbk (call_frame_t *frame, void *cookie, cs->args.setaclreply.status = status; } - acl3svc_submit_reply (cs->req, (void *)&cs->args.setaclreply, - (acl3_serializer)xdr_serialize_setaclreply); + acl3_setacl_reply (cs->req, &cs->args.setaclreply); + return 0; } @@ -500,8 +507,7 @@ acl3err: stat = -ret; gf_log (GF_ACL, GF_LOG_ERROR, "unable to open_and_resume"); cs->args.setaclreply.status = nfs3_errno_to_nfsstat3 (stat); - acl3svc_submit_reply (cs->req, (void *)&cs->args.setaclreply, - (acl3_serializer)xdr_serialize_setaclreply); + acl3_setacl_reply (cs->req, &cs->args.setaclreply); nfs3_call_state_wipe (cs); } @@ -521,8 +527,9 @@ acl3svc_setacl (rpcsvc_request_t *req) struct nfs3_fh fh; struct nfs3_fh *fhp = NULL; setaclargs setaclargs; + setaclreply setaclreply; + aclentry *daclentry = NULL; aclentry *aclentry = NULL; - struct aclentry *daclentry = NULL; int aclerrno = 0; int defacl = 1; @@ -542,6 +549,7 @@ acl3svc_setacl (rpcsvc_request_t *req) acl3_validate_nfs3_state (req, nfs3, stat, rpcerr, ret); nfs = nfs_state (nfs3->nfsx); memset (&setaclargs, 0, sizeof (setaclargs)); + memset (&setaclreply, 0, sizeof (setaclreply)); memset (&fh, 0, sizeof (fh)); setaclargs.fh.n_bytes = (char *)&fh; setaclargs.aclentry.aclentry_val = aclentry; @@ -560,14 +568,12 @@ acl3svc_setacl (rpcsvc_request_t *req) fhp = &fh; acl3_validate_gluster_fh (fhp, stat, acl3err); - acl3_map_fh_to_volume (nfs->nfs3state, fhp, req, - vol, stat, acl3err); + acl3_map_fh_to_volume (nfs->nfs3state, fhp, req, vol, stat, acl3err); + acl3_volume_started_check (nfs3, vol, ret, rpcerr); acl3_handle_call_state_init (nfs->nfs3state, cs, req, - vol, stat, rpcerr); + vol, stat, acl3err); cs->vol = vol; - acl3_volume_started_check (nfs3, vol, ret, rpcerr); - cs->aclcount = setaclargs.aclcount; cs->daclcount = setaclargs.daclcount; @@ -593,15 +599,14 @@ acl3svc_setacl (rpcsvc_request_t *req) goto acl3err; } - ret = nfs3_fh_resolve_and_resume (cs, fhp, - NULL, acl3_setacl_resume); + ret = nfs3_fh_resolve_and_resume (cs, fhp, NULL, acl3_setacl_resume); + stat = nfs3_errno_to_nfsstat3 (-ret); acl3err: if (ret < 0) { gf_log (GF_ACL, GF_LOG_ERROR, "unable to resolve and resume"); - cs->args.setaclreply.status = stat; - acl3svc_submit_reply (cs->req, (void *)&cs->args.setaclreply, - (acl3_serializer)xdr_serialize_setaclreply); + setaclreply.status = stat; + acl3_setacl_reply (req, &setaclreply); nfs3_call_state_wipe (cs); GF_FREE(aclentry); GF_FREE(daclentry); diff --git a/xlators/performance/io-cache/src/page.c b/xlators/performance/io-cache/src/page.c index 94b8f229b..a21b31816 100644 --- a/xlators/performance/io-cache/src/page.c +++ b/xlators/performance/io-cache/src/page.c @@ -315,6 +315,7 @@ __ioc_wait_on_page (ioc_page_t *page, call_frame_t *frame, off_t offset, local->op_errno = ENOMEM; gf_log (frame->this->name, GF_LOG_WARNING, "asked to wait on a NULL page"); + goto out; } waitq = GF_CALLOC (1, sizeof (*waitq), gf_ioc_mt_ioc_waitq_t); @@ -476,6 +477,7 @@ ioc_fault_cbk (call_frame_t *frame, void *cookie, xlator_t *this, iobref_unref (page->iobref); GF_FREE (page->vector); page->vector = NULL; + page->iobref = NULL; } /* keep a copy of the page for our cache */ diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c index 29ef64364..742e4df3f 100644 --- a/xlators/performance/open-behind/src/open-behind.c +++ b/xlators/performance/open-behind/src/open-behind.c @@ -23,6 +23,11 @@ typedef struct ob_conf { like mandatory locks */ gf_boolean_t lazy_open; /* delay backend open as much as possible */ + gf_boolean_t read_after_open; /* instead of sending readvs on + anonymous fds, open the file + first and then send readv i.e + similar to what writev does + */ } ob_conf_t; @@ -367,8 +372,14 @@ ob_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, { call_stub_t *stub = NULL; fd_t *wind_fd = NULL; + ob_conf_t *conf = NULL; - wind_fd = ob_get_wind_fd (this, fd); + conf = this->private; + + if (!conf->read_after_open) + wind_fd = ob_get_wind_fd (this, fd); + else + wind_fd = fd_ref (fd); stub = fop_readv_stub (frame, default_readv_resume, wind_fd, size, offset, flags, xdata); @@ -894,6 +905,8 @@ reconfigure (xlator_t *this, dict_t *options) bool, out); GF_OPTION_RECONF ("lazy-open", conf->lazy_open, options, bool, out); + GF_OPTION_RECONF ("read-after-open", conf->read_after_open, options, + bool, out); ret = 0; out: @@ -924,7 +937,7 @@ init (xlator_t *this) GF_OPTION_INIT ("use-anonymous-fd", conf->use_anonymous_fd, bool, err); GF_OPTION_INIT ("lazy-open", conf->lazy_open, bool, err); - + GF_OPTION_INIT ("read-after-open", conf->read_after_open, bool, err); this->private = conf; return 0; @@ -996,6 +1009,12 @@ struct volume_options options[] = { "FOP arrives (e.g writev on the FD, unlink of the file). When option " "is disabled, perform backend open right after unwinding open().", }, + { .key = {"read-after-open"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "no", + .description = "read is sent only after actual open happens and real " + "fd is obtained, instead of doing on anonymous fd (similar to write)", + }, { .key = {NULL} } }; diff --git a/xlators/performance/quick-read/src/quick-read.c b/xlators/performance/quick-read/src/quick-read.c index 445ea8658..402da886f 100644 --- a/xlators/performance/quick-read/src/quick-read.c +++ b/xlators/performance/quick-read/src/quick-read.c @@ -101,6 +101,7 @@ qr_inode_ctx_get_or_new (xlator_t *this, inode_t *inode) if (ret) { __qr_inode_prune (&priv->table, qr_inode); GF_FREE (qr_inode); + qr_inode = NULL; } } unlock: @@ -417,10 +418,11 @@ qr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (content) { /* new content came along, always replace old content */ qr_inode = qr_inode_ctx_get_or_new (this, inode); - if (!qr_inode) + if (!qr_inode) { /* no harm done */ + GF_FREE (content); goto out; - + } qr_content_update (this, qr_inode, content, buf); } else { /* purge old content if necessary */ @@ -565,7 +567,6 @@ qr_readv_cached (call_frame_t *frame, qr_inode_t *qr_inode, size_t size, iobref = iobref_new (); if (!iobref) { op_ret = -1; - iobuf_unref (iobuf); goto unlock; } diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c index 2be211fbf..b9cef1152 100644 --- a/xlators/performance/write-behind/src/write-behind.c +++ b/xlators/performance/write-behind/src/write-behind.c @@ -967,11 +967,11 @@ __wb_collapse_small_writes (wb_request_t *holder, wb_request_t *req) ret = iobref_add (iobref, iobuf); if (ret != 0) { - iobuf_unref (iobuf); - iobref_unref (iobref); gf_log (req->wb_inode->this->name, GF_LOG_WARNING, "cannot add iobuf (%p) into iobref (%p)", iobuf, iobref); + iobuf_unref (iobuf); + iobref_unref (iobref); goto out; } diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c index d4941011d..708acd936 100644 --- a/xlators/protocol/server/src/server-handshake.c +++ b/xlators/protocol/server/src/server-handshake.c @@ -448,6 +448,8 @@ server_setvolume (rpcsvc_request_t *req) if (req->trans->xl_private != client) req->trans->xl_private = client; + auth_set_username_passwd (params, config_params, client); + ret = dict_get_int32 (params, "fops-version", &fop_version); if (ret < 0) { ret = dict_set_str (reply, "ERROR", diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c index b2b6c486f..c11011abf 100644 --- a/xlators/protocol/server/src/server-helpers.c +++ b/xlators/protocol/server/src/server-helpers.c @@ -302,6 +302,11 @@ get_frame_from_request (rpcsvc_request_t *req) { call_frame_t *frame = NULL; client_t *client = NULL; + client_t *tmp_client = NULL; + xlator_t *this = NULL; + server_conf_t *priv = NULL; + clienttable_t *clienttable = NULL; + unsigned int i = 0; GF_VALIDATE_OR_GOTO ("server", req, out); @@ -315,6 +320,57 @@ get_frame_from_request (rpcsvc_request_t *req) frame->root->unique = req->xid; + client = req->trans->xl_private; + this = req->trans->xl; + priv = this->private; + clienttable = this->ctx->clienttable; + + for (i = 0; i < clienttable->max_clients; i++) { + tmp_client = clienttable->cliententries[i].client; + if (client == tmp_client) { + /* for non trusted clients username and password + would not have been set. So for non trusted clients + (i.e clients not from the same machine as the brick, + and clients from outside the storage pool) + do the root-squashing. + TODO: If any client within the storage pool (i.e + mounting within a machine from the pool but using + other machine's ip/hostname from the same pool) + is present treat it as a trusted client + */ + if (!client->auth.username && req->pid != NFS_PID) + RPC_AUTH_ROOT_SQUASH (req); + + /* Problem: If we just check whether the client is + trusted client and do not do root squashing for + them, then for smb clients and UFO clients root + squashing will never happen as they use the fuse + mounts done within the trusted pool (i.e they are + trusted clients). + Solution: To fix it, do root squashing for trusted + clients also. If one wants to have a client within + the storage pool for which root-squashing does not + happen, then the client has to be mounted with + --no-root-squash option. But for defrag client and + gsyncd client do not do root-squashing. + */ + if (client->auth.username && + req->pid != GF_CLIENT_PID_NO_ROOT_SQUASH && + req->pid != GF_CLIENT_PID_GSYNCD && + req->pid != GF_CLIENT_PID_DEFRAG) + RPC_AUTH_ROOT_SQUASH (req); + + /* For nfs clients the server processes will be running + within the trusted storage pool machines. So if we + do not do root-squashing for nfs servers, thinking + that its a trusted client, then root-squashing wont + work for nfs clients. + */ + if (req->pid == NFS_PID) + RPC_AUTH_ROOT_SQUASH (req); + } + } + frame->root->uid = req->uid; frame->root->gid = req->gid; frame->root->pid = req->pid; @@ -935,3 +991,104 @@ server_ctx_get (client_t *client, xlator_t *xlator) out: return ctx; } + +int +auth_set_username_passwd (dict_t *input_params, dict_t *config_params, + client_t *client) +{ + int ret = 0; + data_t *allow_user = NULL; + data_t *passwd_data = NULL; + char *username = NULL; + char *password = NULL; + char *brick_name = NULL; + char *searchstr = NULL; + char *username_str = NULL; + char *tmp = NULL; + char *username_cpy = NULL; + + ret = dict_get_str (input_params, "username", &username); + if (ret) { + gf_log ("auth/login", GF_LOG_DEBUG, + "username not found, returning DONT-CARE"); + /* For non trusted clients username and password + will not be there. So dont reject the client. + */ + ret = 0; + goto out; + } + + ret = dict_get_str (input_params, "password", &password); + if (ret) { + gf_log ("auth/login", GF_LOG_WARNING, + "password not found, returning DONT-CARE"); + goto out; + } + + ret = dict_get_str (input_params, "remote-subvolume", &brick_name); + if (ret) { + gf_log ("auth/login", GF_LOG_ERROR, + "remote-subvolume not specified"); + ret = -1; + goto out; + } + + ret = gf_asprintf (&searchstr, "auth.login.%s.allow", brick_name); + if (-1 == ret) { + ret = 0; + goto out; + } + + allow_user = dict_get (config_params, searchstr); + GF_FREE (searchstr); + + if (allow_user) { + username_cpy = gf_strdup (allow_user->data); + if (!username_cpy) + goto out; + + username_str = strtok_r (username_cpy, " ,", &tmp); + + while (username_str) { + if (!fnmatch (username_str, username, 0)) { + ret = gf_asprintf (&searchstr, + "auth.login.%s.password", + username); + if (-1 == ret) + goto out; + + passwd_data = dict_get (config_params, + searchstr); + GF_FREE (searchstr); + + if (!passwd_data) { + gf_log ("auth/login", GF_LOG_ERROR, + "wrong username/password " + "combination"); + ret = -1; + goto out; + } + + ret = !((strcmp (data_to_str (passwd_data), + password))?0: -1); + if (!ret) { + client->auth.username = + gf_strdup (username); + client->auth.passwd = + gf_strdup (password); + } + if (ret == -1) + gf_log ("auth/login", GF_LOG_ERROR, + "wrong password for user %s", + username); + break; + } + username_str = strtok_r (NULL, " ,", &tmp); + } + } + +out: + GF_FREE (username_cpy); + + return ret; +} diff --git a/xlators/protocol/server/src/server-helpers.h b/xlators/protocol/server/src/server-helpers.h index 93ea35851..3c257b3bc 100644 --- a/xlators/protocol/server/src/server-helpers.h +++ b/xlators/protocol/server/src/server-helpers.h @@ -53,6 +53,8 @@ int serialize_rsp_dirent (gf_dirent_t *entries, gfs3_readdir_rsp *rsp); int serialize_rsp_direntp (gf_dirent_t *entries, gfs3_readdirp_rsp *rsp); int readdirp_rsp_cleanup (gfs3_readdirp_rsp *rsp); int readdir_rsp_cleanup (gfs3_readdir_rsp *rsp); +int auth_set_username_passwd (dict_t *input_params, dict_t *config_params, + struct _client_t *client); server_ctx_t *server_ctx_get (client_t *client, xlator_t *xlator); diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index c1cbc83f4..fde322099 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -3269,7 +3269,7 @@ posix_get_ancestry_non_directory (xlator_t *this, inode_t *leaf_inode, dirpath + priv->base_path_length, pathlen, head, - POSIX_ANCESTRY_PATH | POSIX_ANCESTRY_DENTRY, + type | POSIX_ANCESTRY_PATH, pgfid, handle_size, priv->base_path, @@ -3383,7 +3383,7 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, dict = dict_new (); if (!dict) { - op_errno = ENOMEM; + op_errno = ENOMEM; goto out; } @@ -3550,6 +3550,7 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char); if (!value) { op_ret = -1; + op_errno = ENOMEM; goto out; } size = sys_lgetxattr (real_path, key, value, size); @@ -3565,6 +3566,7 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, value [size] = '\0'; op_ret = dict_set_dynptr (dict, key, value, size); if (op_ret < 0) { + op_errno = -op_ret; gf_log (this->name, GF_LOG_ERROR, "dict set operation " "on %s for the key %s failed.", real_path, key); GF_FREE (value); @@ -3642,6 +3644,7 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, value [size] = '\0'; op_ret = dict_set_dynptr (dict, key, value, size); if (op_ret < 0) { + op_errno = -op_ret; gf_log (this->name, GF_LOG_ERROR, "dict set operation " "on %s for the key %s failed.", real_path, key); GF_FREE (value); |