From e66add8a304ca610b74ecbbe48cec72dba582340 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Wed, 4 May 2016 19:05:28 +0530 Subject: cluster/afr: Do heals with shd pid Multi-threaded healing doesn't create synctask with shd pid, this leads to healing problems when quota exceeds. BUG: 1332994 Change-Id: I80f57c1923756f3298730b8820498127024e1209 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.org/14211 Smoke: Gluster Build System NetBSD-regression: NetBSD Build System CentOS-regression: Gluster Build System Reviewed-by: Ravishankar N --- libglusterfs/src/syncop-utils.c | 19 +++++++++-------- libglusterfs/src/syncop-utils.h | 6 +++--- tests/basic/afr/heal-quota.t | 35 ++++++++++++++++++++++++++++++++ xlators/cluster/afr/src/afr-self-heald.c | 11 +++++++++- 4 files changed, 58 insertions(+), 13 deletions(-) create mode 100644 tests/basic/afr/heal-quota.t diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c index 7421f81f46c..8f25db237f5 100644 --- a/libglusterfs/src/syncop-utils.c +++ b/libglusterfs/src/syncop-utils.c @@ -284,10 +284,11 @@ _dir_scan_job_fn (void *data) } static int -_run_dir_scan_task (xlator_t *subvol, loc_t *parent, gf_dirent_t *q, - gf_dirent_t *entry, int *retval, pthread_mutex_t *mut, - pthread_cond_t *cond, uint32_t *jobs_running, - uint32_t *qlen, syncop_dir_scan_fn_t fn, void *data) +_run_dir_scan_task (call_frame_t *frame, xlator_t *subvol, loc_t *parent, + gf_dirent_t *q, gf_dirent_t *entry, int *retval, + pthread_mutex_t *mut, pthread_cond_t *cond, + uint32_t *jobs_running, uint32_t *qlen, + syncop_dir_scan_fn_t fn, void *data) { int ret = 0; struct syncop_dir_scan_data *scan_data = NULL; @@ -313,7 +314,7 @@ _run_dir_scan_task (xlator_t *subvol, loc_t *parent, gf_dirent_t *q, scan_data->retval = retval; ret = synctask_new (subvol->ctx->env, _dir_scan_job_fn, - _dir_scan_job_fn_cbk, NULL, scan_data); + _dir_scan_job_fn_cbk, frame, scan_data); out: if (ret < 0) { gf_dirent_entry_free (entry); @@ -329,9 +330,9 @@ out: } int -syncop_mt_dir_scan (xlator_t *subvol, loc_t *loc, int pid, void *data, - syncop_dir_scan_fn_t fn, dict_t *xdata, uint32_t max_jobs, - uint32_t max_qlen) +syncop_mt_dir_scan (call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid, + void *data, syncop_dir_scan_fn_t fn, dict_t *xdata, + uint32_t max_jobs, uint32_t max_qlen) { fd_t *fd = NULL; uint64_t offset = 0; @@ -428,7 +429,7 @@ syncop_mt_dir_scan (xlator_t *subvol, loc_t *loc, int pid, void *data, if (!entry) continue; - ret = _run_dir_scan_task (subvol, loc, &q, entry, + ret = _run_dir_scan_task (frame, subvol, loc, &q, entry, &retval, &mut, &cond, &jobs_running, &qlen, fn, data); if (ret) diff --git a/libglusterfs/src/syncop-utils.h b/libglusterfs/src/syncop-utils.h index 52bcfd99429..3968d758c6e 100644 --- a/libglusterfs/src/syncop-utils.h +++ b/libglusterfs/src/syncop-utils.h @@ -19,9 +19,9 @@ syncop_ftw (xlator_t *subvol, loc_t *loc, int pid, void *data, void *data)); int -syncop_mt_dir_scan (xlator_t *subvol, loc_t *loc, int pid, void *data, - syncop_dir_scan_fn_t fn, dict_t *xdata, uint32_t max_jobs, - uint32_t max_qlen); +syncop_mt_dir_scan (call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid, + void *data, syncop_dir_scan_fn_t fn, dict_t *xdata, + uint32_t max_jobs, uint32_t max_qlen); int syncop_dir_scan (xlator_t *subvol, loc_t *loc, int pid, void *data, diff --git a/tests/basic/afr/heal-quota.t b/tests/basic/afr/heal-quota.t new file mode 100644 index 00000000000..2663906f9d5 --- /dev/null +++ b/tests/basic/afr/heal-quota.t @@ -0,0 +1,35 @@ +#!/bin/bash + +#This file tests that heal succeeds even when quota is exceeded + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume set $V0 cluster.self-heal-daemon off +TEST $CLI volume start $V0 + +TEST glusterfs --attribute-timeout=0 --entry-timeout=0 --volfile-id=/$V0 --volfile-server=$H0 $M0; +TEST $CLI volume quota $V0 enable +TEST $CLI volume quota $V0 limit-usage / 10MB +TEST $CLI volume quota $V0 soft-timeout 0 +TEST $CLI volume quota $V0 hard-timeout 0 + +TEST touch $M0/a $M0/b +dd if=/dev/zero of=$M0/b bs=1M count=7 +TEST kill_brick $V0 $H0 $B0/${V0}0 +dd if=/dev/zero of=$M0/a bs=1M count=12 #This shall fail +TEST $CLI volume start $V0 force +TEST $CLI volume set $V0 cluster.self-heal-daemon on +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 + +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0 + +cleanup diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index 2ec9d9ce686..545ffa0a1d7 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -430,10 +430,17 @@ afr_shd_index_sweep (struct subvol_healer *healer, char *vgfid) int ret = 0; xlator_t *subvol = NULL; dict_t *xdata = NULL; + call_frame_t *frame = NULL; priv = healer->this->private; subvol = priv->children[healer->subvol]; + frame = afr_frame_create (healer->this); + if (!frame) { + ret = -ENOMEM; + goto out; + } + loc.inode = afr_shd_index_inode (healer->this, subvol, vgfid); if (!loc.inode) { gf_msg (healer->this->name, GF_LOG_WARNING, @@ -449,7 +456,7 @@ afr_shd_index_sweep (struct subvol_healer *healer, char *vgfid) goto out; } - ret = syncop_mt_dir_scan (subvol, &loc, GF_CLIENT_PID_SELF_HEALD, + ret = syncop_mt_dir_scan (frame, subvol, &loc, GF_CLIENT_PID_SELF_HEALD, healer, afr_shd_index_heal, xdata, priv->shd.max_threads, priv->shd.wait_qlength); @@ -461,6 +468,8 @@ out: if (xdata) dict_unref (xdata); + if (frame) + AFR_STACK_DESTROY (frame); return ret; } -- cgit