diff options
author | Vijaikumar M <vmallika@redhat.com> | 2014-05-22 11:58:06 +0530 |
---|---|---|
committer | Krishnan Parthasarathi <kparthas@redhat.com> | 2014-05-22 05:20:58 -0700 |
commit | 15f698833de54793880505a1f8e549b956eca137 (patch) | |
tree | 8fae52b3d0f7be990ca1982227156638bb83271f | |
parent | cec37c9b66b8711b213f114875d215f56b8120aa (diff) |
glusterd/snapshot: brick_start shouldn't be done from child thread
When creating a volume snapshot, the back-end operation 'taking a
lvm_snapshot and starting brick' for the each brick
are executed in parallel using synctask framework.
brick_start was releasing a big_lock with brick_connect and does a lock
again.
This will cause a deadlock in some race condition where main-thread waiting
for one of the synctask thread to finish and
synctask-thread waiting for the big_lock.
Solution is not to start_brick from from synctask
Change-Id: Iaaf0be3070fb71e63c2de8fc2938d2b77d40057d
BUG: 1100218
Signed-off-by: Vijaikumar M <vmallika@redhat.com>
Reviewed-on: http://review.gluster.org/7842
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
Reviewed-by: Krishnan Parthasarathi <kparthas@redhat.com>
Tested-by: Krishnan Parthasarathi <kparthas@redhat.com>
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-snapshot.c | 27 |
1 files changed, 19 insertions, 8 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c index 3a16d08a96b..522525f5a0d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c @@ -3879,14 +3879,6 @@ glusterd_take_brick_snapshot (dict_t *dict, glusterd_volinfo_t *snap_vol, goto out; } - ret = glusterd_brick_start (snap_vol, brickinfo, _gf_false); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, "starting the " - "brick %s:%s for the snap %s (volume: %s) " - "failed", brickinfo->hostname, brickinfo->path, - snap_vol->snapshot->snapname, snap_vol->volname); - } - out: gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); return ret; @@ -4871,6 +4863,7 @@ glusterd_snapshot_create_commit (dict_t *dict, char **op_errstr, glusterd_snap_t *snap = NULL; glusterd_volinfo_t *origin_vol = NULL; glusterd_volinfo_t *snap_vol = NULL; + glusterd_brickinfo_t *brickinfo = NULL; glusterd_conf_t *priv = NULL; this = THIS; @@ -4956,7 +4949,25 @@ glusterd_snapshot_create_commit (dict_t *dict, char **op_errstr, goto out; } + /*TODO: As of now start the bricks as part of snapshot creation op. + brick_start releases the big_lock and this can cause regression + for bug# 1088355. + We need to fix brick_connect not to release big_lock*/ list_for_each_entry (snap_vol, &snap->volumes, vol_list) { + list_for_each_entry (brickinfo, &snap_vol->bricks, brick_list) { + ret = glusterd_brick_start (snap_vol, brickinfo, + _gf_false); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "starting " + "the brick %s:%s for the snap %s " + "(volume: %s) failed", + brickinfo->hostname, brickinfo->path, + snap_vol->snapshot->snapname, + snap_vol->volname); + goto out; + } + } + snap_vol->status = GLUSTERD_STATUS_STARTED; ret = glusterd_store_volinfo (snap_vol, GLUSTERD_VOLINFO_VER_AC_INCREMENT); |