diff options
author | Pranith Kumar K <pkarampu@redhat.com> | 2016-03-17 19:42:00 +0530 |
---|---|---|
committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2016-04-15 02:51:06 -0700 |
commit | 8deedef565df49def75083678f8d1558c7b1f7d3 (patch) | |
tree | ee593ac5d6a88411ba1110e9cdef80270b8b4a43 /xlators | |
parent | d5409aae63a7ac5e5b3ea6cfa16c6250a028291c (diff) |
cluster/afr: Fix partial heals in 3-way replication
Problem:
When there are 2 sources and one sink and if two self-heal daemons
try to acquire locks at the same time, there is a chance that it
gets a lock on one source and sink leading partial to heal. This will
need one more heal from the remaining source to sink for the complete
self-heal. This is not optimal.
Fix:
Upgrade non-blocking locks to blocking lock on all the subvolumes, if
the number of locks acquired is majority and there were eagains.
BUG: 1318751
Change-Id: Iae10b8d3402756c4164b98cc49876056ff7a61e5
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: http://review.gluster.org/13766
Smoke: Gluster Build System <jenkins@build.gluster.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Ravishankar N <ravishankar@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 121 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 10 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 7 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-metadata.c | 2 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal.h | 13 |
5 files changed, 138 insertions, 15 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index be4cc75910e..68b5bb06799 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -726,11 +726,14 @@ afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this, void afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type, - int source, unsigned char *healed_sinks) + int source, unsigned char *sources, + unsigned char *healed_sinks) { char *status = NULL; char *sinks_str = NULL; char *p = NULL; + char *sources_str = NULL; + char *q = NULL; afr_private_t *priv = NULL; gf_loglevel_t loglevel = GF_LOG_NONE; int i = 0; @@ -738,10 +741,18 @@ afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type, priv = this->private; sinks_str = alloca0 (priv->child_count * 8); p = sinks_str; + sources_str = alloca0 (priv->child_count * 8); + q = sources_str; for (i = 0; i < priv->child_count; i++) { - if (!healed_sinks[i]) - continue; - p += sprintf (p, "%d ", i); + if (healed_sinks[i]) + p += sprintf (p, "%d ", i); + if (sources[i]) { + if (source == i) { + q += sprintf (q, "[%d] ", i); + } else { + q += sprintf (q, "%d ", i); + } + } } if (ret < 0) { @@ -754,8 +765,8 @@ afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type, gf_msg (this->name, loglevel, 0, AFR_MSG_SELF_HEAL_INFO, "%s %s selfheal on %s. " - "source=%d sinks=%s", status, type, uuid_utoa (gfid), - source, sinks_str); + "sources=%s sinks=%s", status, type, uuid_utoa (gfid), + sources_str, sinks_str); } int @@ -1011,6 +1022,67 @@ afr_selfheal_inodelk (call_frame_t *frame, xlator_t *this, inode_t *inode, return afr_locked_fill (frame, this, locked_on); } +static void +afr_get_lock_and_eagain_counts (afr_private_t *priv, struct afr_reply *replies, + int *lock_count, int *eagain_count) +{ + int i = 0; + + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid) + continue; + if (replies[i].op_ret == 0) { + (*lock_count)++; + } else if (replies[i].op_ret == -1 && + replies[i].op_errno == EAGAIN) { + (*eagain_count)++; + } + } +} + +/*Do blocking locks if number of locks acquired is majority and there were some + * EAGAINs. Useful for odd-way replication*/ +int +afr_selfheal_tie_breaker_inodelk (call_frame_t *frame, xlator_t *this, + inode_t *inode, char *dom, off_t off, + size_t size, unsigned char *locked_on) +{ + loc_t loc = {0,}; + struct gf_flock flock = {0, }; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int lock_count = 0; + int eagain_count = 0; + + priv = this->private; + local = frame->local; + + loc.inode = inode_ref (inode); + gf_uuid_copy (loc.gfid, inode->gfid); + + flock.l_type = F_WRLCK; + flock.l_start = off; + flock.l_len = size; + + AFR_ONALL (frame, afr_selfheal_lock_cbk, inodelk, dom, + &loc, F_SETLK, &flock, NULL); + + afr_get_lock_and_eagain_counts (priv, local->replies, &lock_count, + &eagain_count); + + if (lock_count > priv->child_count/2 && eagain_count) { + afr_locked_fill (frame, this, locked_on); + afr_selfheal_uninodelk (frame, this, inode, dom, off, + size, locked_on); + + AFR_SEQ (frame, afr_selfheal_lock_cbk, inodelk, dom, + &loc, F_SETLKW, &flock, NULL); + } + + loc_wipe (&loc); + + return afr_locked_fill (frame, this, locked_on); +} int afr_selfheal_uninodelk (call_frame_t *frame, xlator_t *this, inode_t *inode, @@ -1091,6 +1163,43 @@ afr_selfheal_entrylk (call_frame_t *frame, xlator_t *this, inode_t *inode, return afr_locked_fill (frame, this, locked_on); } +int +afr_selfheal_tie_breaker_entrylk (call_frame_t *frame, xlator_t *this, + inode_t *inode, char *dom, const char *name, + unsigned char *locked_on) +{ + loc_t loc = {0,}; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int lock_count = 0; + int eagain_count = 0; + + priv = this->private; + local = frame->local; + + loc.inode = inode_ref (inode); + gf_uuid_copy (loc.gfid, inode->gfid); + + AFR_ONALL (frame, afr_selfheal_lock_cbk, entrylk, dom, &loc, + name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL); + + afr_get_lock_and_eagain_counts (priv, local->replies, &lock_count, + &eagain_count); + + if (lock_count > priv->child_count/2 && eagain_count) { + afr_locked_fill (frame, this, locked_on); + afr_selfheal_unentrylk (frame, this, inode, dom, name, + locked_on); + + AFR_SEQ (frame, afr_selfheal_lock_cbk, entrylk, dom, + &loc, name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); + } + + loc_wipe (&loc); + + return afr_locked_fill (frame, this, locked_on); +} + int afr_selfheal_unentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode, diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index a2dddafe5c0..b55cddc334c 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -781,7 +781,7 @@ out: if (did_sh) afr_log_selfheal (fd->inode->gfid, this, ret, "data", source, - healed_sinks); + sources, healed_sinks); else ret = 1; @@ -839,8 +839,9 @@ afr_selfheal_data (call_frame_t *frame, xlator_t *this, inode_t *inode) locked_on = alloca0 (priv->child_count); - ret = afr_selfheal_tryinodelk (frame, this, inode, priv->sh_domain, 0, 0, - locked_on); + ret = afr_selfheal_tie_breaker_inodelk (frame, this, inode, + priv->sh_domain, 0, 0, + locked_on); { if (ret < AFR_SH_MIN_PARTICIPANTS) { gf_msg_debug (this->name, 0, "%s: Skipping " @@ -859,7 +860,8 @@ afr_selfheal_data (call_frame_t *frame, xlator_t *this, inode_t *inode) ret = __afr_selfheal_data (frame, this, fd, locked_on); } unlock: - afr_selfheal_uninodelk (frame, this, inode, priv->sh_domain, 0, 0, locked_on); + afr_selfheal_uninodelk (frame, this, inode, priv->sh_domain, 0, 0, + locked_on); if (fd) fd_unref (fd); diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index e4d616ea20a..c8e2c98db0e 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -699,7 +699,7 @@ postop_unlock: out: if (did_sh) afr_log_selfheal (fd->inode->gfid, this, ret, "entry", source, - healed_sinks); + sources, healed_sinks); else ret = 1; @@ -754,8 +754,9 @@ afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode) locked_on = alloca0 (priv->child_count); long_name_locked = alloca0 (priv->child_count); - ret = afr_selfheal_tryentrylk (frame, this, inode, priv->sh_domain, NULL, - locked_on); + ret = afr_selfheal_tie_breaker_entrylk (frame, this, inode, + priv->sh_domain, NULL, + locked_on); { if (ret < AFR_SH_MIN_PARTICIPANTS) { gf_msg_debug (this->name, 0, "%s: Skipping " diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index 65c25abcb4a..62eedd9ea09 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -422,7 +422,7 @@ unlock: if (did_sh) afr_log_selfheal (inode->gfid, this, ret, "metadata", source, - healed_sinks); + sources, healed_sinks); else ret = 1; diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index 0bee7a023ad..afc086c0560 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -113,6 +113,11 @@ afr_selfheal_tryinodelk (call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *locked_on); int +afr_selfheal_tie_breaker_inodelk (call_frame_t *frame, xlator_t *this, + inode_t *inode, char *dom, off_t off, + size_t size, unsigned char *locked_on); + +int afr_selfheal_uninodelk (call_frame_t *frame, xlator_t *this, inode_t *inode, char *dom, off_t off, size_t size, const unsigned char *locked_on); @@ -126,6 +131,11 @@ afr_selfheal_tryentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode, char *dom, const char *name, unsigned char *locked_on); int +afr_selfheal_tie_breaker_entrylk (call_frame_t *frame, xlator_t *this, + inode_t *inode, char *dom, const char *name, + unsigned char *locked_on); + +int afr_selfheal_unentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode, char *dom, const char *name, unsigned char *locked_on); @@ -194,7 +204,8 @@ afr_success_count (struct afr_reply *replies, unsigned int count); void afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type, - int source, unsigned char *healed_sinks); + int source, unsigned char *sources, + unsigned char *healed_sinks); void afr_mark_largest_file_as_source (xlator_t *this, unsigned char *sources, |