summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd
diff options
context:
space:
mode:
authorVijaikumar M <vmallika@redhat.com>2014-05-22 11:58:06 +0530
committerKrishnan Parthasarathi <kparthas@redhat.com>2014-05-22 05:20:58 -0700
commit15f698833de54793880505a1f8e549b956eca137 (patch)
tree8fae52b3d0f7be990ca1982227156638bb83271f /xlators/mgmt/glusterd
parentcec37c9b66b8711b213f114875d215f56b8120aa (diff)
glusterd/snapshot: brick_start shouldn't be done from child thread
When creating a volume snapshot, the back-end operation 'taking a lvm_snapshot and starting brick' for the each brick are executed in parallel using synctask framework. brick_start was releasing a big_lock with brick_connect and does a lock again. This will cause a deadlock in some race condition where main-thread waiting for one of the synctask thread to finish and synctask-thread waiting for the big_lock. Solution is not to start_brick from from synctask Change-Id: Iaaf0be3070fb71e63c2de8fc2938d2b77d40057d BUG: 1100218 Signed-off-by: Vijaikumar M <vmallika@redhat.com> Reviewed-on: http://review.gluster.org/7842 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Atin Mukherjee <amukherj@redhat.com> Reviewed-by: Krishnan Parthasarathi <kparthas@redhat.com> Tested-by: Krishnan Parthasarathi <kparthas@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot.c27
1 files changed, 19 insertions, 8 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
index 3a16d08a96b..522525f5a0d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
@@ -3879,14 +3879,6 @@ glusterd_take_brick_snapshot (dict_t *dict, glusterd_volinfo_t *snap_vol,
goto out;
}
- ret = glusterd_brick_start (snap_vol, brickinfo, _gf_false);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING, "starting the "
- "brick %s:%s for the snap %s (volume: %s) "
- "failed", brickinfo->hostname, brickinfo->path,
- snap_vol->snapshot->snapname, snap_vol->volname);
- }
-
out:
gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
return ret;
@@ -4871,6 +4863,7 @@ glusterd_snapshot_create_commit (dict_t *dict, char **op_errstr,
glusterd_snap_t *snap = NULL;
glusterd_volinfo_t *origin_vol = NULL;
glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
glusterd_conf_t *priv = NULL;
this = THIS;
@@ -4956,7 +4949,25 @@ glusterd_snapshot_create_commit (dict_t *dict, char **op_errstr,
goto out;
}
+ /*TODO: As of now start the bricks as part of snapshot creation op.
+ brick_start releases the big_lock and this can cause regression
+ for bug# 1088355.
+ We need to fix brick_connect not to release big_lock*/
list_for_each_entry (snap_vol, &snap->volumes, vol_list) {
+ list_for_each_entry (brickinfo, &snap_vol->bricks, brick_list) {
+ ret = glusterd_brick_start (snap_vol, brickinfo,
+ _gf_false);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "starting "
+ "the brick %s:%s for the snap %s "
+ "(volume: %s) failed",
+ brickinfo->hostname, brickinfo->path,
+ snap_vol->snapshot->snapname,
+ snap_vol->volname);
+ goto out;
+ }
+ }
+
snap_vol->status = GLUSTERD_STATUS_STARTED;
ret = glusterd_store_volinfo (snap_vol,
GLUSTERD_VOLINFO_VER_AC_INCREMENT);