diff options
author | Saravanakumar Arumugam <sarumuga@redhat.com> | 2015-05-05 17:03:39 +0530 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2015-05-09 02:08:10 -0700 |
commit | c466b137b0cabb844ce7a1f92549ff9b72369830 (patch) | |
tree | 4a807535ec36c7613debdca29b075bb1bf70e0fd | |
parent | 719c927592cfdb0de88243769d477ca211a2b494 (diff) |
geo-rep: rename handling in dht volume(changelog changes)
Background:
Glusterfs changelogs are stored in each brick, which records the changes
happened in that brick. Georep will run in all the nodes of master and processes
changelogs "independently". Processing changelogs is in brick level, but
all the fops will be replayed on "slave mount" point.
Problem:
With a DHT volume, in changelog "internal fops" are NOT recorded.
For Rename case, Rename is recorded in "hashed" brick changelog.
(DHT's internal fops like creating linkto file, unlink is NOT recorded).
This lead us to inconsistent rename operations.
For example,
Distribute volume created with Two bricks B1, B2.
//Consider master volume mounted @ /mnt/master and following operations
executed:
cd /mnt/master
touch f1 // f1 falls on B1 Hash
mv f1 f2 // f2 falls on B2 Hash
// Here, Changelogs are recorded as below:
@B1
CREATE f1
@B2
RENAME f1 f2
Here, race exists between Brick B1 and B2, say B2 will get executed first.
Source file f1 itself is "NOT PRESENT", so it will go ahead and create
f2 (Current implementation).
We have this problem When rename falls in another brick and
file is unlinked in Master.
Similar kind of issue exists in following case too(multiple rename):
CREATE f1
RENAME f1 f2
RENAME f2 f1
Solution:
Instead of carrying out "changelogging" at "HASHED volume",
carry out at the "CACHED volume".
This way we have rename operations carried out where actual files are present.
So,Changelog recorded as :
@B1
CREATE f1
RENAME f1 f2
Note:
This patch is dependent on dht changes from this patch.
http://review.gluster.org/10410/
changelog related changes are separated out for review.
In changelog, xdata passed from DHT is considered as:
1. In case of unlink (internal operation as part of rename), xdata value
is set , it is considered as RENAME and recorded accordingly.
2. In case of rename (Hash and Cache different), xdata value is NOT
set, recording rename operation is SKIPPED.
BUG: 1219412
Change-Id: I7691166c84991482b2cfe073df64e2317c935b13
Reviewed-On: http://review.gluster.org/#/c/10220/
Signed-off-by: Saravanakumar Arumugam <sarumuga@redhat.com>
Reviewed-on: http://review.gluster.org/10633
Reviewed-by: Aravinda VK <avishwan@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r-- | xlators/features/changelog/src/changelog.c | 115 |
1 files changed, 77 insertions, 38 deletions
diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c index 53c0cf85728..b07009d3f8f 100644 --- a/xlators/features/changelog/src/changelog.c +++ b/xlators/features/changelog/src/changelog.c @@ -212,40 +212,76 @@ int32_t changelog_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, dict_t *xdata) { - size_t xtra_len = 0; - changelog_priv_t *priv = NULL; - changelog_opt_t *co = NULL; - call_stub_t *stub = NULL; - struct list_head queue = {0, }; - gf_boolean_t barrier_enabled = _gf_false; + size_t xtra_len = 0; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + call_stub_t *stub = NULL; + struct list_head queue = {0, }; + gf_boolean_t barrier_enabled = _gf_false; + dht_changelog_rename_info_t *info = NULL; + int ret = 0; + char old_name[NAME_MAX] = {0}; + char new_name[NAME_MAX] = {0}; + char *nname = NULL; INIT_LIST_HEAD (&queue); - priv = this->private; - CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); - CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, wind); - CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, loc->inode->gfid, 2); - - co = changelog_get_usable_buffer (frame->local); - if (!co) - goto wind; + CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); - CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); + ret = dict_get_bin (xdata, DHT_CHANGELOG_RENAME_OP_KEY, (void **)&info); + if (!ret) { /* special case: unlink considered as rename */ + /* 3 == fop + oldloc + newloc */ + CHANGELOG_INIT_NOCHECK (this, frame->local, + NULL, loc->inode->gfid, 3); + + co = changelog_get_usable_buffer (frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER (co, GF_FOP_RENAME, fop_fn, xtra_len); + + co++; + strncpy (old_name, info->buffer, info->oldname_len); + CHANGELOG_FILL_ENTRY (co, info->old_pargfid, old_name, + entry_fn, entry_free_fn, xtra_len, wind); + + co++; + /* new name resides just after old name */ + nname = info->buffer + info->oldname_len; + strncpy (new_name, nname, info->newname_len); + CHANGELOG_FILL_ENTRY (co, info->new_pargfid, new_name, + entry_fn, entry_free_fn, xtra_len, wind); + + changelog_set_usable_record_and_length (frame->local, + xtra_len, 3); + } else { /* default unlink */ + CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, wind); + CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, + loc->inode->gfid, 2); + + co = changelog_get_usable_buffer (frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, + fop_fn, xtra_len); + + co++; + if (priv->capture_del_path) { + CHANGELOG_FILL_ENTRY_DIR_PATH (co, loc->pargfid, + loc->name, del_entry_fn, del_entry_free_fn, + xtra_len, wind, _gf_true); + } else { + CHANGELOG_FILL_ENTRY_DIR_PATH (co, loc->pargfid, + loc->name, del_entry_fn, del_entry_free_fn, + xtra_len, wind, _gf_false); + } - co++; - if (priv->capture_del_path) { - CHANGELOG_FILL_ENTRY_DIR_PATH (co, loc->pargfid, loc->name, - del_entry_fn, del_entry_free_fn, - xtra_len, wind, _gf_true); - } else { - CHANGELOG_FILL_ENTRY_DIR_PATH (co, loc->pargfid, loc->name, - del_entry_fn, del_entry_free_fn, - xtra_len, wind, _gf_false); + changelog_set_usable_record_and_length (frame->local, + xtra_len, 2); } - changelog_set_usable_record_and_length (frame->local, xtra_len, 2); - /* changelog barrier */ LOCK (&priv->lock); { @@ -295,16 +331,13 @@ changelog_rename_cbk (call_frame_t *frame, struct iatt *postoldparent, struct iatt *prenewparent, struct iatt *postnewparent, dict_t *xdata) { - changelog_priv_t *priv = NULL; - changelog_local_t *local = NULL; + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; priv = this->private; local = frame->local; - CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); - changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY); - unwind: changelog_dec_fop_cnt (this, priv, local); CHANGELOG_STACK_UNWIND (rename, frame, op_ret, op_errno, @@ -313,7 +346,6 @@ changelog_rename_cbk (call_frame_t *frame, return 0; } - int32_t changelog_rename_resume (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata) @@ -334,18 +366,25 @@ int32_t changelog_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata) { - size_t xtra_len = 0; - changelog_priv_t *priv = NULL; - changelog_opt_t *co = NULL; - call_stub_t *stub = NULL; - struct list_head queue = {0, }; - gf_boolean_t barrier_enabled = _gf_false; + size_t xtra_len = 0; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + call_stub_t *stub = NULL; + struct list_head queue = {0, }; + gf_boolean_t barrier_enabled = _gf_false; + dht_changelog_rename_info_t *info = NULL; + int ret = 0; INIT_LIST_HEAD (&queue); priv = this->private; CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); + ret = dict_get_bin (xdata, DHT_CHANGELOG_RENAME_OP_KEY, (void **)&info); + if (ret) { /* xdata "NOT" set, Special rename => avoid logging */ + goto wind; + } + /* 3 == fop + oldloc + newloc */ CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, oldloc->inode->gfid, 3); |