diff options
author | Raghavendra Bhat <raghavendra@redhat.com> | 2015-05-13 14:35:47 +0530 |
---|---|---|
committer | Venky Shankar <vshankar@redhat.com> | 2015-05-30 21:15:04 -0700 |
commit | 2ef27f82c3705b18e2c3c030b8debcdb7b30e33b (patch) | |
tree | 49240fbcaeb66243a8a2669699a4b026d55c15af /xlators/features/bit-rot | |
parent | 85173e43faba9bbedd287a4103c129e289829e1f (diff) |
features/bitrot: refactor brick connection logic
Backport of http://review.gluster.org/10763
Brick connection was bloated (and not implemented efficiently) with
calls which were not required to be called under lock. This resulted
in starvation of lock by critical code paths. This eventally did not
scale when the number of bricks per volume increases (add-brick and
the likes).
Also, this patch cleans up some of the weird reconnection logic that
added more to the starvation of resources and cleans up uncontrolled
growing of log files.
Change-Id: I05e737f2a9742944a4a543327d167de2489236a4
BUG: 1226146
Original-Author: Raghavendra Bhat <raghavendra@redhat.com>
Signed-off-by: Venky Shankar <vshankar@redhat.com>
Reviewed-on: http://review.gluster.org/10986
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Tested-by: NetBSD Build System <jenkins@build.gluster.org>
Diffstat (limited to 'xlators/features/bit-rot')
-rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot.c | 124 | ||||
-rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot.h | 7 |
2 files changed, 68 insertions, 63 deletions
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c index e7cfe89e1dd..81490830570 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot.c @@ -1080,8 +1080,11 @@ static inline int32_t br_enact_signer (xlator_t *this, br_child_t *child, br_stub_init_t *stub) { int32_t ret = 0; + br_private_t *priv = NULL; struct gf_brick_spec *brick = NULL; + priv = this->private; + brick = GF_CALLOC (1, sizeof (struct gf_brick_spec), gf_common_mt_gf_brick_spec_t); if (!brick) @@ -1105,7 +1108,7 @@ br_enact_signer (xlator_t *this, br_child_t *child, br_stub_init_t *stub) child->threadrunning = 1; /* it's OK to continue, "old" objects would be signed when modified */ - list_del_init (&child->list); + list_add_tail (&child->list, &priv->signing); return 0; dealloc: @@ -1157,7 +1160,7 @@ br_enact_scrubber (xlator_t *this, br_child_t *child) */ pthread_mutex_lock (&fsscrub->mutex); { - list_move (&child->list, &fsscrub->scrublist); + list_add_tail (&child->list, &fsscrub->scrublist); pthread_cond_broadcast (&fsscrub->cond); } pthread_mutex_unlock (&fsscrub->mutex); @@ -1165,6 +1168,10 @@ br_enact_scrubber (xlator_t *this, br_child_t *child) return 0; error_return: + LOCK_DESTROY (&fsscan->entrylock); + pthread_mutex_destroy (&fsscan->waitlock); + pthread_cond_destroy (&fsscan->waitcond); + return -1; } @@ -1233,6 +1240,10 @@ br_brick_connect (xlator_t *this, br_child_t *child) else ret = br_enact_signer (this, child, stub); + if (!ret) + gf_log (this->name, GF_LOG_INFO, + "Connected to brick %s..", child->brick_path); + free_dict: dict_unref (xattr); wipeloc: @@ -1249,10 +1260,10 @@ br_brick_connect (xlator_t *this, br_child_t *child) void * br_handle_events (void *arg) { + int32_t ret = 0; xlator_t *this = NULL; br_private_t *priv = NULL; br_child_t *child = NULL; - int32_t ret = -1; this = arg; priv = this->private; @@ -1268,25 +1279,19 @@ br_handle_events (void *arg) while (1) { pthread_mutex_lock (&priv->lock); { - while (list_empty (&priv->bricks)) { - pthread_cond_wait (&priv->cond, - &priv->lock); - } - - child = list_entry (priv->bricks.next, br_child_t, - list); - if (child && child->child_up) { - ret = br_brick_connect (this, child); - if (ret == -1) - gf_log (this->name, GF_LOG_ERROR, - "failed to connect to the " - "child (subvolume: %s)", - child->xl->name); - - } + while (list_empty (&priv->bricks)) + pthread_cond_wait (&priv->cond, &priv->lock); + child = list_first_entry + (&priv->bricks, br_child_t, list); + list_del_init (&child->list); } pthread_mutex_unlock (&priv->lock); + + ret = br_brick_connect (this, child); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "failed to connect " + "to subvolume %s", child->xl->name); } return NULL; @@ -1295,7 +1300,7 @@ br_handle_events (void *arg) int32_t mem_acct_init (xlator_t *this) { - int32_t ret = -1; + int32_t ret = -1; if (!this) return ret; @@ -1314,60 +1319,52 @@ mem_acct_init (xlator_t *this) int notify (xlator_t *this, int32_t event, void *data, ...) { - xlator_t *subvol = NULL; - br_private_t *priv = NULL; - int idx = -1; - br_child_t *child = NULL; + int idx = -1; + xlator_t *subvol = NULL; + br_child_t *child = NULL; + br_private_t *priv = NULL; subvol = (xlator_t *)data; priv = this->private; - gf_log (this->name, GF_LOG_TRACE, "Notification received: %d", - event); + gf_log (this->name, GF_LOG_TRACE, "Notification received: %d", event); + + idx = br_find_child_index (this, subvol); switch (event) { case GF_EVENT_CHILD_UP: - /* should this be done under lock? or is it ok to do it - without lock? */ - idx = br_find_child_index (this, subvol); + if (idx < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Got event %d from invalid subvolume", event); + goto out; + } pthread_mutex_lock (&priv->lock); { - if (idx < 0) { - gf_log (this->name, GF_LOG_ERROR, "got child " - "up from invalid subvolume"); - } else { - child = &priv->children[idx]; - if (child->child_up != 1) - child->child_up = 1; - if (!child->xl) - child->xl = subvol; - if (!child->table) - child->table = inode_table_new (4096, - subvol); - priv->up_children++; - list_add_tail (&child->list, &priv->bricks); - pthread_cond_signal (&priv->cond); - } + child = &priv->children[idx]; + if (child->child_up == 1) + goto unblock; + + child->child_up = 1; + child->xl = subvol; + child->table = inode_table_new (4096, subvol); + + priv->up_children++; + + list_add_tail (&child->list, &priv->bricks); + pthread_cond_signal (&priv->cond); } + unblock: pthread_mutex_unlock (&priv->lock); - break; - case GF_EVENT_CHILD_MODIFIED: - idx = br_find_child_index (this, subvol); - if (idx < 0) { - gf_log (this->name, GF_LOG_ERROR, "received child up " - "from invalid subvolume"); - goto out; - } - priv = this->private; - /* ++(priv->generation); */ + if (priv->up_children == priv->child_count) + default_notify (this, event, data); break; + case GF_EVENT_CHILD_DOWN: - idx = br_find_child_index (this, subvol); if (idx < 0) { - gf_log (this->name, GF_LOG_ERROR, "received child down " - "from invalid subvolume"); + gf_log (this->name, GF_LOG_ERROR, + "Got event %d from invalid subvolume", event); goto out; } @@ -1379,13 +1376,15 @@ notify (xlator_t *this, int32_t event, void *data, ...) } } pthread_mutex_unlock (&priv->lock); + + if (priv->up_children == 0) + default_notify (this, event, data); break; - case GF_EVENT_PARENT_UP: - default_notify (this, GF_EVENT_PARENT_UP, data); - break; + default: + default_notify (this, event, data); } -out: + out: return 0; } @@ -1569,6 +1568,7 @@ init (xlator_t *this) for (i = 0; i < priv->child_count; i++) INIT_LIST_HEAD (&priv->children[i].list); INIT_LIST_HEAD (&priv->bricks); + INIT_LIST_HEAD (&priv->signing); priv->timer_wheel = glusterfs_global_timer_wheel (this); if (!priv->timer_wheel) { diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h index ec943e9131f..40e02ebf378 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot.h @@ -143,13 +143,17 @@ struct br_private { struct list_head bricks; /* list of bricks from which CHILD_UP has been received */ - pthread_cond_t cond; /* handling CHILD_UP notifications */ + struct list_head signing; + pthread_cond_t object_cond; /* handling signing of objects */ int child_count; br_child_t *children; /* list of subvolumes */ int up_children; + + pthread_cond_t cond; /* handling CHILD_UP notifications */ pthread_t thread; /* thread for connecting each UP child with changelog */ + struct tvec_base *timer_wheel; /* timer wheel where the objects which changelog has sent sits and waits for expiry */ @@ -163,6 +167,7 @@ struct br_private { br_tbf_t *tbf; /* token bucket filter */ gf_boolean_t iamscrubber; /* function as a fs scrubber */ + struct br_scrubber fsscrub; /* scrubbers for this subvolume */ }; |