summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSaravanakumar Arumugam <sarumuga@redhat.com>2015-05-05 17:03:39 +0530
committerVijay Bellur <vbellur@redhat.com>2015-05-09 02:08:10 -0700
commitc466b137b0cabb844ce7a1f92549ff9b72369830 (patch)
tree4a807535ec36c7613debdca29b075bb1bf70e0fd
parent719c927592cfdb0de88243769d477ca211a2b494 (diff)
geo-rep: rename handling in dht volume(changelog changes)
Background: Glusterfs changelogs are stored in each brick, which records the changes happened in that brick. Georep will run in all the nodes of master and processes changelogs "independently". Processing changelogs is in brick level, but all the fops will be replayed on "slave mount" point. Problem: With a DHT volume, in changelog "internal fops" are NOT recorded. For Rename case, Rename is recorded in "hashed" brick changelog. (DHT's internal fops like creating linkto file, unlink is NOT recorded). This lead us to inconsistent rename operations. For example, Distribute volume created with Two bricks B1, B2. //Consider master volume mounted @ /mnt/master and following operations executed: cd /mnt/master touch f1 // f1 falls on B1 Hash mv f1 f2 // f2 falls on B2 Hash // Here, Changelogs are recorded as below: @B1 CREATE f1 @B2 RENAME f1 f2 Here, race exists between Brick B1 and B2, say B2 will get executed first. Source file f1 itself is "NOT PRESENT", so it will go ahead and create f2 (Current implementation). We have this problem When rename falls in another brick and file is unlinked in Master. Similar kind of issue exists in following case too(multiple rename): CREATE f1 RENAME f1 f2 RENAME f2 f1 Solution: Instead of carrying out "changelogging" at "HASHED volume", carry out at the "CACHED volume". This way we have rename operations carried out where actual files are present. So,Changelog recorded as : @B1 CREATE f1 RENAME f1 f2 Note: This patch is dependent on dht changes from this patch. http://review.gluster.org/10410/ changelog related changes are separated out for review. In changelog, xdata passed from DHT is considered as: 1. In case of unlink (internal operation as part of rename), xdata value is set , it is considered as RENAME and recorded accordingly. 2. In case of rename (Hash and Cache different), xdata value is NOT set, recording rename operation is SKIPPED. BUG: 1219412 Change-Id: I7691166c84991482b2cfe073df64e2317c935b13 Reviewed-On: http://review.gluster.org/#/c/10220/ Signed-off-by: Saravanakumar Arumugam <sarumuga@redhat.com> Reviewed-on: http://review.gluster.org/10633 Reviewed-by: Aravinda VK <avishwan@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r--xlators/features/changelog/src/changelog.c115
1 files changed, 77 insertions, 38 deletions
diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c
index 53c0cf85728..b07009d3f8f 100644
--- a/xlators/features/changelog/src/changelog.c
+++ b/xlators/features/changelog/src/changelog.c
@@ -212,40 +212,76 @@ int32_t
changelog_unlink (call_frame_t *frame, xlator_t *this,
loc_t *loc, int xflags, dict_t *xdata)
{
- size_t xtra_len = 0;
- changelog_priv_t *priv = NULL;
- changelog_opt_t *co = NULL;
- call_stub_t *stub = NULL;
- struct list_head queue = {0, };
- gf_boolean_t barrier_enabled = _gf_false;
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ call_stub_t *stub = NULL;
+ struct list_head queue = {0, };
+ gf_boolean_t barrier_enabled = _gf_false;
+ dht_changelog_rename_info_t *info = NULL;
+ int ret = 0;
+ char old_name[NAME_MAX] = {0};
+ char new_name[NAME_MAX] = {0};
+ char *nname = NULL;
INIT_LIST_HEAD (&queue);
-
priv = this->private;
- CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
- CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, wind);
- CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, loc->inode->gfid, 2);
-
- co = changelog_get_usable_buffer (frame->local);
- if (!co)
- goto wind;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
- CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+ ret = dict_get_bin (xdata, DHT_CHANGELOG_RENAME_OP_KEY, (void **)&info);
+ if (!ret) { /* special case: unlink considered as rename */
+ /* 3 == fop + oldloc + newloc */
+ CHANGELOG_INIT_NOCHECK (this, frame->local,
+ NULL, loc->inode->gfid, 3);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, GF_FOP_RENAME, fop_fn, xtra_len);
+
+ co++;
+ strncpy (old_name, info->buffer, info->oldname_len);
+ CHANGELOG_FILL_ENTRY (co, info->old_pargfid, old_name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ co++;
+ /* new name resides just after old name */
+ nname = info->buffer + info->oldname_len;
+ strncpy (new_name, nname, info->newname_len);
+ CHANGELOG_FILL_ENTRY (co, info->new_pargfid, new_name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local,
+ xtra_len, 3);
+ } else { /* default unlink */
+ CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, wind);
+ CHANGELOG_INIT_NOCHECK (this, frame->local, NULL,
+ loc->inode->gfid, 2);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op,
+ fop_fn, xtra_len);
+
+ co++;
+ if (priv->capture_del_path) {
+ CHANGELOG_FILL_ENTRY_DIR_PATH (co, loc->pargfid,
+ loc->name, del_entry_fn, del_entry_free_fn,
+ xtra_len, wind, _gf_true);
+ } else {
+ CHANGELOG_FILL_ENTRY_DIR_PATH (co, loc->pargfid,
+ loc->name, del_entry_fn, del_entry_free_fn,
+ xtra_len, wind, _gf_false);
+ }
- co++;
- if (priv->capture_del_path) {
- CHANGELOG_FILL_ENTRY_DIR_PATH (co, loc->pargfid, loc->name,
- del_entry_fn, del_entry_free_fn,
- xtra_len, wind, _gf_true);
- } else {
- CHANGELOG_FILL_ENTRY_DIR_PATH (co, loc->pargfid, loc->name,
- del_entry_fn, del_entry_free_fn,
- xtra_len, wind, _gf_false);
+ changelog_set_usable_record_and_length (frame->local,
+ xtra_len, 2);
}
- changelog_set_usable_record_and_length (frame->local, xtra_len, 2);
-
/* changelog barrier */
LOCK (&priv->lock);
{
@@ -295,16 +331,13 @@ changelog_rename_cbk (call_frame_t *frame,
struct iatt *postoldparent, struct iatt *prenewparent,
struct iatt *postnewparent, dict_t *xdata)
{
- changelog_priv_t *priv = NULL;
- changelog_local_t *local = NULL;
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
priv = this->private;
local = frame->local;
-
CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
-
changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
-
unwind:
changelog_dec_fop_cnt (this, priv, local);
CHANGELOG_STACK_UNWIND (rename, frame, op_ret, op_errno,
@@ -313,7 +346,6 @@ changelog_rename_cbk (call_frame_t *frame,
return 0;
}
-
int32_t
changelog_rename_resume (call_frame_t *frame, xlator_t *this,
loc_t *oldloc, loc_t *newloc, dict_t *xdata)
@@ -334,18 +366,25 @@ int32_t
changelog_rename (call_frame_t *frame, xlator_t *this,
loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- size_t xtra_len = 0;
- changelog_priv_t *priv = NULL;
- changelog_opt_t *co = NULL;
- call_stub_t *stub = NULL;
- struct list_head queue = {0, };
- gf_boolean_t barrier_enabled = _gf_false;
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ call_stub_t *stub = NULL;
+ struct list_head queue = {0, };
+ gf_boolean_t barrier_enabled = _gf_false;
+ dht_changelog_rename_info_t *info = NULL;
+ int ret = 0;
INIT_LIST_HEAD (&queue);
priv = this->private;
CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+ ret = dict_get_bin (xdata, DHT_CHANGELOG_RENAME_OP_KEY, (void **)&info);
+ if (ret) { /* xdata "NOT" set, Special rename => avoid logging */
+ goto wind;
+ }
+
/* 3 == fop + oldloc + newloc */
CHANGELOG_INIT_NOCHECK (this, frame->local,
NULL, oldloc->inode->gfid, 3);