summaryrefslogtreecommitdiffstats
path: root/libglusterfs
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2016-12-09 09:50:43 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2016-12-09 02:24:21 -0800
commit2d012c4558046afd6adb3992ff88f937c5f835e4 (patch)
treee41cf9a6eeca0d299296472d6d2bc331f3960e00 /libglusterfs
parent64451d0f25e7cc7aafc1b6589122648281e4310a (diff)
syncop: fix conditional wait bug in parallel dir scan
Problem: The issue as seen by the user is detailed in the BZ but what is happening is if the no. of items in the wait queue == max-qlen, syncop_mt_dir_scan() does a pthread_cond_wait until the launched synctask workers dequeue the queue. But if for some reason the worker fails, the queue is never emptied due to which further invocations of syncop_mt_dir_scan() are blocked forever. Fix: Made some changes to _dir_scan_job_fn - If a worker encounters error while processing an entry, notify the readdir loop in syncop_mt_dir_scan() of the error but continue to process other entries in the queue, decrementing the qlen as and when we dequeue elements, and ending only when the queue is empty. - If the readdir loop in syncop_mt_dir_scan() gets an error form the worker, stop the readdir+queueing of further entries. Change-Id: I39ce073e01a68c7ff18a0e9227389245a6f75b88 BUG: 1402841 Signed-off-by: Ravishankar N <ravishankar@redhat.com> Reviewed-on: http://review.gluster.org/16073 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Diffstat (limited to 'libglusterfs')
-rw-r--r--libglusterfs/src/syncop-utils.c15
1 files changed, 9 insertions, 6 deletions
diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c
index fa9e6a28768..b743bdfae88 100644
--- a/libglusterfs/src/syncop-utils.c
+++ b/libglusterfs/src/syncop-utils.c
@@ -266,9 +266,10 @@ _dir_scan_job_fn (void *data)
entry = NULL;
pthread_mutex_lock (scan_data->mut);
{
- if (ret || list_empty (&scan_data->q->list)) {
- (*scan_data->jobs_running)--;
+ if (ret)
*scan_data->retval |= ret;
+ if (list_empty (&scan_data->q->list)) {
+ (*scan_data->jobs_running)--;
pthread_cond_broadcast (scan_data->cond);
} else {
entry = list_first_entry (&scan_data->q->list,
@@ -406,10 +407,13 @@ syncop_mt_dir_scan (call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid,
ret = fn (subvol, entry, loc, data);
gf_dirent_entry_free (entry);
if (ret)
- break;
+ goto out;
continue;
}
+ if (retval) /*Any jobs failed?*/
+ goto out;
+
pthread_mutex_lock (&mut);
{
while (qlen == max_qlen)
@@ -423,8 +427,7 @@ syncop_mt_dir_scan (call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid,
}
}
pthread_mutex_unlock (&mut);
- if (retval) /*Any jobs failed?*/
- break;
+
if (!entry)
continue;
@@ -433,7 +436,7 @@ syncop_mt_dir_scan (call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid,
&retval, &mut, &cond,
&jobs_running, &qlen, fn, data);
if (ret)
- break;
+ goto out;
}
}