diff options
-rw-r--r-- | libglusterfs/src/defaults-tmpl.c | 7 | ||||
-rw-r--r-- | libglusterfs/src/glusterfs/glusterfs.h | 1 | ||||
-rw-r--r-- | libglusterfs/src/glusterfs/xlator.h | 2 | ||||
-rw-r--r-- | libglusterfs/src/graph.c | 1 | ||||
-rw-r--r-- | libglusterfs/src/libglusterfs.sym | 1 | ||||
-rw-r--r-- | libglusterfs/src/xlator.c | 23 | ||||
-rw-r--r-- | rpc/rpc-lib/src/rpc-clnt.c | 8 | ||||
-rw-r--r-- | rpc/rpc-lib/src/rpc-clnt.h | 2 | ||||
-rw-r--r-- | tests/basic/graph-cleanup-brick-down-shd-mux.t | 64 | ||||
-rw-r--r-- | tests/basic/volume-scale-shd-mux.t | 7 | ||||
-rw-r--r-- | xlators/protocol/client/src/client.c | 65 |
11 files changed, 169 insertions, 12 deletions
diff --git a/libglusterfs/src/defaults-tmpl.c b/libglusterfs/src/defaults-tmpl.c index 82e7f78d7f3..3cf707f42aa 100644 --- a/libglusterfs/src/defaults-tmpl.c +++ b/libglusterfs/src/defaults-tmpl.c @@ -171,8 +171,11 @@ default_notify(xlator_t *this, int32_t event, void *data, ...) /* Make sure this is not a daemon with master xlator */ pthread_mutex_lock(&graph->mutex); { - graph->used = 0; - pthread_cond_broadcast(&graph->child_down_cond); + if (graph->parent_down == + graph_total_client_xlator(graph)) { + graph->used = 0; + pthread_cond_broadcast(&graph->child_down_cond); + } } pthread_mutex_unlock(&graph->mutex); } diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h index 01262dcd9f5..155bf435386 100644 --- a/libglusterfs/src/glusterfs/glusterfs.h +++ b/libglusterfs/src/glusterfs/glusterfs.h @@ -594,6 +594,7 @@ struct _glusterfs_graph { in client multiplexed code path */ pthread_mutex_t mutex; pthread_cond_t child_down_cond; /* for broadcasting CHILD_DOWN */ + int parent_down; char graph_uuid[128]; }; typedef struct _glusterfs_graph glusterfs_graph_t; diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h index 6449e59f484..6608d6cdf0d 100644 --- a/libglusterfs/src/glusterfs/xlator.h +++ b/libglusterfs/src/glusterfs/xlator.h @@ -1095,4 +1095,6 @@ mgmt_is_multiplexed_daemon(char *name); gf_boolean_t xlator_is_cleanup_starting(xlator_t *this); +int +graph_total_client_xlator(glusterfs_graph_t *graph); #endif /* _XLATOR_H */ diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c index bbc5ad68d94..e6ae40db2ed 100644 --- a/libglusterfs/src/graph.c +++ b/libglusterfs/src/graph.c @@ -1695,6 +1695,7 @@ glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp, "failed to construct the graph"); goto out; } + graph->parent_down = 0; graph->last_xl = glusterfs_get_last_xlator(graph); for (xl = graph->first; xl; xl = xl->next) { diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym index 2e83d3f1003..dc7382ba749 100644 --- a/libglusterfs/src/libglusterfs.sym +++ b/libglusterfs/src/libglusterfs.sym @@ -1169,3 +1169,4 @@ glusterfs_process_svc_detach mgmt_is_multiplexed_daemon xlator_is_cleanup_starting gf_nanosleep +graph_total_client_xlator diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c index 9906809f7aa..8605fbd0e6f 100644 --- a/libglusterfs/src/xlator.c +++ b/libglusterfs/src/xlator.c @@ -1542,3 +1542,26 @@ xlator_is_cleanup_starting(xlator_t *this) out: return cleanup; } + +int +graph_total_client_xlator(glusterfs_graph_t *graph) +{ + xlator_t *xl = NULL; + int count = 0; + + if (!graph) { + gf_msg("xlator", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, + "graph object is null"); + goto out; + } + + xl = graph->first; + while (xl) { + if (strcmp(xl->type, "protocol/client") == 0) { + count++; + } + xl = xl->next; + } +out: + return count; +} diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c index 8ef05378351..aa65a1f8766 100644 --- a/rpc/rpc-lib/src/rpc-clnt.c +++ b/rpc/rpc-lib/src/rpc-clnt.c @@ -1858,7 +1858,7 @@ rpc_clnt_unref(struct rpc_clnt *rpc) return rpc; } -void +int rpc_clnt_disable(struct rpc_clnt *rpc) { rpc_clnt_connection_t *conn = NULL; @@ -1902,8 +1902,9 @@ rpc_clnt_disable(struct rpc_clnt *rpc) } pthread_mutex_unlock(&conn->lock); + ret = -1; if (trans) { - rpc_transport_disconnect(trans, _gf_true); + ret = rpc_transport_disconnect(trans, _gf_true); /* The auth_value was being reset to AUTH_GLUSTERFS_v2. * if (clnt->auth_value) * clnt->auth_value = AUTH_GLUSTERFS_v2; @@ -1919,7 +1920,6 @@ rpc_clnt_disable(struct rpc_clnt *rpc) * on a connected transport and hence its strictly serialized. */ } - if (unref) rpc_clnt_unref(rpc); @@ -1930,7 +1930,7 @@ rpc_clnt_disable(struct rpc_clnt *rpc) rpc_clnt_unref(rpc); out: - return; + return ret; } void diff --git a/rpc/rpc-lib/src/rpc-clnt.h b/rpc/rpc-lib/src/rpc-clnt.h index b46feed50c8..1d3274bbddd 100644 --- a/rpc/rpc-lib/src/rpc-clnt.h +++ b/rpc/rpc-lib/src/rpc-clnt.h @@ -250,7 +250,7 @@ int rpcclnt_cbk_program_register(struct rpc_clnt *svc, rpcclnt_cb_program_t *program, void *mydata); -void +int rpc_clnt_disable(struct rpc_clnt *rpc); int diff --git a/tests/basic/graph-cleanup-brick-down-shd-mux.t b/tests/basic/graph-cleanup-brick-down-shd-mux.t new file mode 100644 index 00000000000..3c621cdcc26 --- /dev/null +++ b/tests/basic/graph-cleanup-brick-down-shd-mux.t @@ -0,0 +1,64 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +cleanup; + +TESTS_EXPECTED_IN_LOOP=4 + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5} +TEST $CLI volume set $V0 cluster.background-self-heal-count 0 +TEST $CLI volume set $V0 cluster.eager-lock off +TEST $CLI volume set $V0 performance.flush-behind off +TEST $CLI volume start $V0 + +for i in $(seq 1 2); do + TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5} + TEST $CLI volume start ${V0}_afr$i + TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5} + TEST $CLI volume start ${V0}_ec$i +done + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count +#Check the thread count become to number of volumes*number of ec subvolume (2*6=12) +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd $V0 "ec_shd_index_healer" +#Check the thread count become to number of volumes*number of afr subvolume (3*6=18) +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "afr_shd_index_healer" + +#kill one brick and test cleanup +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST $CLI volume stop $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer" +TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer" + +#kill an entire subvol and test cleanup +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST kill_brick $V0 $H0 $B0/${V0}2 +#wait for some time to create a race sceanrio +sleep 1 +TEST $CLI volume stop $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer" +TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer" + +#kill all bricks and test cleanup +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST kill_brick $V0 $H0 $B0/${V0}2 +TEST kill_brick $V0 $H0 $B0/${V0}3 +TEST kill_brick $V0 $H0 $B0/${V0}4 +TEST kill_brick $V0 $H0 $B0/${V0}5 +#wait for some time to create a race sceanrio +sleep 2 + +TEST $CLI volume stop $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer" +TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer" + +cleanup diff --git a/tests/basic/volume-scale-shd-mux.t b/tests/basic/volume-scale-shd-mux.t index 89b833d5ddc..d1ddcbca7dd 100644 --- a/tests/basic/volume-scale-shd-mux.t +++ b/tests/basic/volume-scale-shd-mux.t @@ -23,8 +23,6 @@ for i in $(seq 1 2); do done EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count - -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count #Check the thread count become to number of volumes*number of ec subvolume (2*6=12) EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd $V0 "__ec_shd_healer_wait" #Check the thread count become to number of volumes*number of afr subvolume (3*6=18) @@ -38,9 +36,9 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^21$" number_healer_threads_shd $V0 "__afr_sh #Remove the brick and check the detach is successful $CLI volume remove-brick $V0 $H0:$B0/${V0}{6,7,8} force - EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" number_healer_threads_shd $V0 "glusterfs_graph_cleanup" TEST $CLI volume add-brick ${V0}_ec1 $H0:$B0/${V0}_ec1_add{0,1,2,3,4,5}; #Check the thread count become to number of volumes*number of ec subvolume plus 2 additional threads from newly added bricks (2*6+6=18) @@ -92,6 +90,9 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^9$" number_healer_threads_shd $V0 "__afr_shd TEST $CLI volume remove-brick ${V0}_distribute1 replica 1 $H0:$B0/add/{2..3} force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" +#Before stopping the process, make sure there is no pending clenup threads hanging +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" number_healer_threads_shd $V0 "glusterfs_graph_cleanup" + TEST $CLI volume stop ${V0} TEST $CLI volume delete ${V0} EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c index 776e7160c51..45e7bfedf91 100644 --- a/xlators/protocol/client/src/client.c +++ b/xlators/protocol/client/src/client.c @@ -61,9 +61,54 @@ out: } int +client_is_last_child_down(xlator_t *this, int32_t event, struct rpc_clnt *rpc) +{ + rpc_clnt_connection_t *conn = NULL; + int ret = 0; + + clnt_conf_t *conf = this->private; + if (!this || !rpc || !conf) + goto out; + + if (!conf->parent_down) + goto out; + conn = &rpc->conn; + pthread_mutex_lock(&conn->lock); + { + if (event == GF_EVENT_CHILD_DOWN && !conn->reconnect && rpc->disabled) { + ret = 1; + } + } + pthread_mutex_unlock(&conn->lock); +out: + return ret; +} + +int client_notify_dispatch_uniq(xlator_t *this, int32_t event, void *data, ...) { clnt_conf_t *conf = this->private; + glusterfs_ctx_t *ctx = this->ctx; + glusterfs_graph_t *graph = this->graph; + + pthread_mutex_lock(&ctx->notify_lock); + { + while (ctx->notifying) + pthread_cond_wait(&ctx->notify_cond, &ctx->notify_lock); + + if (client_is_last_child_down(this, event, data) && graph) { + pthread_mutex_lock(&graph->mutex); + { + graph->parent_down++; + if (graph->parent_down == graph_total_client_xlator(graph)) { + graph->used = 0; + pthread_cond_broadcast(&graph->child_down_cond); + } + } + pthread_mutex_unlock(&graph->mutex); + } + } + pthread_mutex_unlock(&ctx->notify_lock); if (conf->last_sent_event == event) return 0; @@ -81,6 +126,7 @@ client_notify_dispatch(xlator_t *this, int32_t event, void *data, ...) { int ret = -1; glusterfs_ctx_t *ctx = this->ctx; + clnt_conf_t *conf = this->private; pthread_mutex_lock(&ctx->notify_lock); @@ -94,6 +140,7 @@ client_notify_dispatch(xlator_t *this, int32_t event, void *data, ...) /* We assume that all translators in the graph handle notification * events in sequence. * */ + ret = default_notify(this, event, data); /* NB (Even) with MT-epoll and EPOLLET|EPOLLONESHOT we are guaranteed @@ -2376,7 +2423,7 @@ client_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, replicate), hence make sure events which are passed to parent are genuine */ ret = client_notify_dispatch_uniq(this, GF_EVENT_CHILD_DOWN, - NULL); + rpc); if (is_parent_down) { /* If parent is down, then there should not be any * operation after a child down. @@ -2424,6 +2471,8 @@ int notify(xlator_t *this, int32_t event, void *data, ...) { clnt_conf_t *conf = NULL; + glusterfs_graph_t *graph = this->graph; + int ret = -1; conf = this->private; if (!conf) @@ -2450,7 +2499,19 @@ notify(xlator_t *this, int32_t event, void *data, ...) } pthread_mutex_unlock(&conf->lock); - rpc_clnt_disable(conf->rpc); + ret = rpc_clnt_disable(conf->rpc); + if (ret == -1 && graph) { + pthread_mutex_lock(&graph->mutex); + { + graph->parent_down++; + if (graph->parent_down == + graph_total_client_xlator(graph)) { + graph->used = 0; + pthread_cond_broadcast(&graph->child_down_cond); + } + } + pthread_mutex_unlock(&graph->mutex); + } break; default: |