summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
authorSaravanakumar Arumugam <sarumuga@redhat.com>2015-05-05 17:03:39 +0530
committerVijay Bellur <vbellur@redhat.com>2015-05-08 22:24:44 -0700
commitf1ac02a52f4019e7890ce501af7e825ef703d14d (patch)
treef9188171947f1fda220efb70128ff9c49405fb8e /xlators
parente88837ed0ff68093912c2b8e996c5851c53674ca (diff)
geo-rep: rename handling in dht volume(changelog changes)
Background: Glusterfs changelogs are stored in each brick, which records the changes happened in that brick. Georep will run in all the nodes of master and processes changelogs "independently". Processing changelogs is in brick level, but all the fops will be replayed on "slave mount" point. Problem: With a DHT volume, in changelog "internal fops" are NOT recorded. For Rename case, Rename is recorded in "hashed" brick changelog. (DHT's internal fops like creating linkto file, unlink is NOT recorded). This lead us to inconsistent rename operations. For example, Distribute volume created with Two bricks B1, B2. //Consider master volume mounted @ /mnt/master and following operations executed: cd /mnt/master touch f1 // f1 falls on B1 Hash mv f1 f2 // f2 falls on B2 Hash // Here, Changelogs are recorded as below: @B1 CREATE f1 @B2 RENAME f1 f2 Here, race exists between Brick B1 and B2, say B2 will get executed first. Source file f1 itself is "NOT PRESENT", so it will go ahead and create f2 (Current implementation). We have this problem When rename falls in another brick and file is unlinked in Master. Similar kind of issue exists in following case too(multiple rename): CREATE f1 RENAME f1 f2 RENAME f2 f1 Solution: Instead of carrying out "changelogging" at "HASHED volume", carry out at the "CACHED volume". This way we have rename operations carried out where actual files are present. So,Changelog recorded as : @B1 CREATE f1 RENAME f1 f2 Note: This patch is dependent on dht changes from this patch. http://review.gluster.org/10410/ changelog related changes are separated out for review. In changelog, xdata passed from DHT is considered as : 1. In case of unlink (internal operation as part of rename), xdata value is set , it is considered as RENAME and recorded accordingly. 2. In case of rename (Hash and Cache different), xdata value is NOT set, recording rename operation is SKIPPED. BUG: 1141379 Change-Id: Ifca675e6d4ef8c4e3b7ef4a7ec85de8b3a38dc08 Signed-off-by: Saravanakumar Arumugam <sarumuga@redhat.com> Reviewed-on: http://review.gluster.org/10220 Tested-by: Gluster Build System <jenkins@build.gluster.com> Tested-by: NetBSD Build System Reviewed-by: Kotresh HR <khiremat@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r--xlators/features/changelog/src/changelog.c115
1 files changed, 77 insertions, 38 deletions
diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c
index 53c0cf85728..b07009d3f8f 100644
--- a/xlators/features/changelog/src/changelog.c
+++ b/xlators/features/changelog/src/changelog.c
@@ -212,40 +212,76 @@ int32_t
changelog_unlink (call_frame_t *frame, xlator_t *this,
loc_t *loc, int xflags, dict_t *xdata)
{
- size_t xtra_len = 0;
- changelog_priv_t *priv = NULL;
- changelog_opt_t *co = NULL;
- call_stub_t *stub = NULL;
- struct list_head queue = {0, };
- gf_boolean_t barrier_enabled = _gf_false;
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ call_stub_t *stub = NULL;
+ struct list_head queue = {0, };
+ gf_boolean_t barrier_enabled = _gf_false;
+ dht_changelog_rename_info_t *info = NULL;
+ int ret = 0;
+ char old_name[NAME_MAX] = {0};
+ char new_name[NAME_MAX] = {0};
+ char *nname = NULL;
INIT_LIST_HEAD (&queue);
-
priv = this->private;
- CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
- CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, wind);
- CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, loc->inode->gfid, 2);
-
- co = changelog_get_usable_buffer (frame->local);
- if (!co)
- goto wind;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
- CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+ ret = dict_get_bin (xdata, DHT_CHANGELOG_RENAME_OP_KEY, (void **)&info);
+ if (!ret) { /* special case: unlink considered as rename */
+ /* 3 == fop + oldloc + newloc */
+ CHANGELOG_INIT_NOCHECK (this, frame->local,
+ NULL, loc->inode->gfid, 3);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, GF_FOP_RENAME, fop_fn, xtra_len);
+
+ co++;
+ strncpy (old_name, info->buffer, info->oldname_len);
+ CHANGELOG_FILL_ENTRY (co, info->old_pargfid, old_name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ co++;
+ /* new name resides just after old name */
+ nname = info->buffer + info->oldname_len;
+ strncpy (new_name, nname, info->newname_len);
+ CHANGELOG_FILL_ENTRY (co, info->new_pargfid, new_name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local,
+ xtra_len, 3);
+ } else { /* default unlink */
+ CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, wind);
+ CHANGELOG_INIT_NOCHECK (this, frame->local, NULL,
+ loc->inode->gfid, 2);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op,
+ fop_fn, xtra_len);
+
+ co++;
+ if (priv->capture_del_path) {
+ CHANGELOG_FILL_ENTRY_DIR_PATH (co, loc->pargfid,
+ loc->name, del_entry_fn, del_entry_free_fn,
+ xtra_len, wind, _gf_true);
+ } else {
+ CHANGELOG_FILL_ENTRY_DIR_PATH (co, loc->pargfid,
+ loc->name, del_entry_fn, del_entry_free_fn,
+ xtra_len, wind, _gf_false);
+ }
- co++;
- if (priv->capture_del_path) {
- CHANGELOG_FILL_ENTRY_DIR_PATH (co, loc->pargfid, loc->name,
- del_entry_fn, del_entry_free_fn,
- xtra_len, wind, _gf_true);
- } else {
- CHANGELOG_FILL_ENTRY_DIR_PATH (co, loc->pargfid, loc->name,
- del_entry_fn, del_entry_free_fn,
- xtra_len, wind, _gf_false);
+ changelog_set_usable_record_and_length (frame->local,
+ xtra_len, 2);
}
- changelog_set_usable_record_and_length (frame->local, xtra_len, 2);
-
/* changelog barrier */
LOCK (&priv->lock);
{
@@ -295,16 +331,13 @@ changelog_rename_cbk (call_frame_t *frame,
struct iatt *postoldparent, struct iatt *prenewparent,
struct iatt *postnewparent, dict_t *xdata)
{
- changelog_priv_t *priv = NULL;
- changelog_local_t *local = NULL;
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
priv = this->private;
local = frame->local;
-
CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
-
changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
-
unwind:
changelog_dec_fop_cnt (this, priv, local);
CHANGELOG_STACK_UNWIND (rename, frame, op_ret, op_errno,
@@ -313,7 +346,6 @@ changelog_rename_cbk (call_frame_t *frame,
return 0;
}
-
int32_t
changelog_rename_resume (call_frame_t *frame, xlator_t *this,
loc_t *oldloc, loc_t *newloc, dict_t *xdata)
@@ -334,18 +366,25 @@ int32_t
changelog_rename (call_frame_t *frame, xlator_t *this,
loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- size_t xtra_len = 0;
- changelog_priv_t *priv = NULL;
- changelog_opt_t *co = NULL;
- call_stub_t *stub = NULL;
- struct list_head queue = {0, };
- gf_boolean_t barrier_enabled = _gf_false;
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ call_stub_t *stub = NULL;
+ struct list_head queue = {0, };
+ gf_boolean_t barrier_enabled = _gf_false;
+ dht_changelog_rename_info_t *info = NULL;
+ int ret = 0;
INIT_LIST_HEAD (&queue);
priv = this->private;
CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+ ret = dict_get_bin (xdata, DHT_CHANGELOG_RENAME_OP_KEY, (void **)&info);
+ if (ret) { /* xdata "NOT" set, Special rename => avoid logging */
+ goto wind;
+ }
+
/* 3 == fop + oldloc + newloc */
CHANGELOG_INIT_NOCHECK (this, frame->local,
NULL, oldloc->inode->gfid, 3);