diff options
| -rw-r--r-- | libglusterfs/src/defaults-tmpl.c | 7 | ||||
| -rw-r--r-- | libglusterfs/src/glusterfs/glusterfs.h | 1 | ||||
| -rw-r--r-- | libglusterfs/src/glusterfs/xlator.h | 2 | ||||
| -rw-r--r-- | libglusterfs/src/graph.c | 1 | ||||
| -rw-r--r-- | libglusterfs/src/libglusterfs.sym | 1 | ||||
| -rw-r--r-- | libglusterfs/src/xlator.c | 23 | ||||
| -rw-r--r-- | rpc/rpc-lib/src/rpc-clnt.c | 8 | ||||
| -rw-r--r-- | rpc/rpc-lib/src/rpc-clnt.h | 2 | ||||
| -rw-r--r-- | tests/basic/graph-cleanup-brick-down-shd-mux.t | 64 | ||||
| -rw-r--r-- | tests/basic/volume-scale-shd-mux.t | 7 | ||||
| -rw-r--r-- | xlators/protocol/client/src/client.c | 65 | 
11 files changed, 169 insertions, 12 deletions
diff --git a/libglusterfs/src/defaults-tmpl.c b/libglusterfs/src/defaults-tmpl.c index 82e7f78d7f3..3cf707f42aa 100644 --- a/libglusterfs/src/defaults-tmpl.c +++ b/libglusterfs/src/defaults-tmpl.c @@ -171,8 +171,11 @@ default_notify(xlator_t *this, int32_t event, void *data, ...)                  /* Make sure this is not a daemon with master xlator */                  pthread_mutex_lock(&graph->mutex);                  { -                    graph->used = 0; -                    pthread_cond_broadcast(&graph->child_down_cond); +                    if (graph->parent_down == +                        graph_total_client_xlator(graph)) { +                        graph->used = 0; +                        pthread_cond_broadcast(&graph->child_down_cond); +                    }                  }                  pthread_mutex_unlock(&graph->mutex);              } diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h index 01262dcd9f5..155bf435386 100644 --- a/libglusterfs/src/glusterfs/glusterfs.h +++ b/libglusterfs/src/glusterfs/glusterfs.h @@ -594,6 +594,7 @@ struct _glusterfs_graph {                        in client multiplexed code path */      pthread_mutex_t mutex;      pthread_cond_t child_down_cond; /* for broadcasting CHILD_DOWN */ +    int parent_down;      char graph_uuid[128];  };  typedef struct _glusterfs_graph glusterfs_graph_t; diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h index 6449e59f484..6608d6cdf0d 100644 --- a/libglusterfs/src/glusterfs/xlator.h +++ b/libglusterfs/src/glusterfs/xlator.h @@ -1095,4 +1095,6 @@ mgmt_is_multiplexed_daemon(char *name);  gf_boolean_t  xlator_is_cleanup_starting(xlator_t *this); +int +graph_total_client_xlator(glusterfs_graph_t *graph);  #endif /* _XLATOR_H */ diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c index bbc5ad68d94..e6ae40db2ed 100644 --- a/libglusterfs/src/graph.c +++ b/libglusterfs/src/graph.c @@ -1695,6 +1695,7 @@ glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp,                 "failed to construct the graph");          goto out;      } +    graph->parent_down = 0;      graph->last_xl = glusterfs_get_last_xlator(graph);      for (xl = graph->first; xl; xl = xl->next) { diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym index 2e83d3f1003..dc7382ba749 100644 --- a/libglusterfs/src/libglusterfs.sym +++ b/libglusterfs/src/libglusterfs.sym @@ -1169,3 +1169,4 @@ glusterfs_process_svc_detach  mgmt_is_multiplexed_daemon  xlator_is_cleanup_starting  gf_nanosleep +graph_total_client_xlator diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c index 9906809f7aa..8605fbd0e6f 100644 --- a/libglusterfs/src/xlator.c +++ b/libglusterfs/src/xlator.c @@ -1542,3 +1542,26 @@ xlator_is_cleanup_starting(xlator_t *this)  out:      return cleanup;  } + +int +graph_total_client_xlator(glusterfs_graph_t *graph) +{ +    xlator_t *xl = NULL; +    int count = 0; + +    if (!graph) { +        gf_msg("xlator", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, +               "graph object is null"); +        goto out; +    } + +    xl = graph->first; +    while (xl) { +        if (strcmp(xl->type, "protocol/client") == 0) { +            count++; +        } +        xl = xl->next; +    } +out: +    return count; +} diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c index 8ef05378351..aa65a1f8766 100644 --- a/rpc/rpc-lib/src/rpc-clnt.c +++ b/rpc/rpc-lib/src/rpc-clnt.c @@ -1858,7 +1858,7 @@ rpc_clnt_unref(struct rpc_clnt *rpc)      return rpc;  } -void +int  rpc_clnt_disable(struct rpc_clnt *rpc)  {      rpc_clnt_connection_t *conn = NULL; @@ -1902,8 +1902,9 @@ rpc_clnt_disable(struct rpc_clnt *rpc)      }      pthread_mutex_unlock(&conn->lock); +    ret = -1;      if (trans) { -        rpc_transport_disconnect(trans, _gf_true); +        ret = rpc_transport_disconnect(trans, _gf_true);          /* The auth_value was being reset to AUTH_GLUSTERFS_v2.           *    if (clnt->auth_value)           *           clnt->auth_value = AUTH_GLUSTERFS_v2; @@ -1919,7 +1920,6 @@ rpc_clnt_disable(struct rpc_clnt *rpc)           * on a connected transport and hence its strictly serialized.           */      } -      if (unref)          rpc_clnt_unref(rpc); @@ -1930,7 +1930,7 @@ rpc_clnt_disable(struct rpc_clnt *rpc)          rpc_clnt_unref(rpc);  out: -    return; +    return ret;  }  void diff --git a/rpc/rpc-lib/src/rpc-clnt.h b/rpc/rpc-lib/src/rpc-clnt.h index b46feed50c8..1d3274bbddd 100644 --- a/rpc/rpc-lib/src/rpc-clnt.h +++ b/rpc/rpc-lib/src/rpc-clnt.h @@ -250,7 +250,7 @@ int  rpcclnt_cbk_program_register(struct rpc_clnt *svc,                               rpcclnt_cb_program_t *program, void *mydata); -void +int  rpc_clnt_disable(struct rpc_clnt *rpc);  int diff --git a/tests/basic/graph-cleanup-brick-down-shd-mux.t b/tests/basic/graph-cleanup-brick-down-shd-mux.t new file mode 100644 index 00000000000..3c621cdcc26 --- /dev/null +++ b/tests/basic/graph-cleanup-brick-down-shd-mux.t @@ -0,0 +1,64 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +cleanup; + +TESTS_EXPECTED_IN_LOOP=4 + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5} +TEST $CLI volume set $V0 cluster.background-self-heal-count 0 +TEST $CLI volume set $V0 cluster.eager-lock off +TEST $CLI volume set $V0 performance.flush-behind off +TEST $CLI volume start $V0 + +for i in $(seq 1 2); do +   TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5} +   TEST $CLI volume start ${V0}_afr$i +   TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5} +   TEST $CLI volume start ${V0}_ec$i +done + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count +#Check the thread count become to number of volumes*number of ec subvolume (2*6=12) +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd $V0 "ec_shd_index_healer" +#Check the thread count become to number of volumes*number of afr subvolume (3*6=18) +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "afr_shd_index_healer" + +#kill one brick and test cleanup +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST $CLI volume stop $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer" +TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer" + +#kill an entire subvol and test cleanup +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST kill_brick $V0 $H0 $B0/${V0}2 +#wait for some time to create a race sceanrio +sleep 1 +TEST $CLI volume stop $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer" +TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer" + +#kill all bricks and test cleanup +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST kill_brick $V0 $H0 $B0/${V0}2 +TEST kill_brick $V0 $H0 $B0/${V0}3 +TEST kill_brick $V0 $H0 $B0/${V0}4 +TEST kill_brick $V0 $H0 $B0/${V0}5 +#wait for some time to create a race sceanrio +sleep 2 + +TEST $CLI volume stop $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer" +TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer" + +cleanup diff --git a/tests/basic/volume-scale-shd-mux.t b/tests/basic/volume-scale-shd-mux.t index 89b833d5ddc..d1ddcbca7dd 100644 --- a/tests/basic/volume-scale-shd-mux.t +++ b/tests/basic/volume-scale-shd-mux.t @@ -23,8 +23,6 @@ for i in $(seq 1 2); do  done  EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count - -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count  #Check the thread count become to number of volumes*number of ec subvolume (2*6=12)  EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd $V0 "__ec_shd_healer_wait"  #Check the thread count become to number of volumes*number of afr subvolume (3*6=18) @@ -38,9 +36,9 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^21$" number_healer_threads_shd $V0 "__afr_sh  #Remove the brick and check the detach is successful  $CLI volume remove-brick $V0 $H0:$B0/${V0}{6,7,8} force -  EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" number_healer_threads_shd $V0 "glusterfs_graph_cleanup"  TEST $CLI volume add-brick ${V0}_ec1 $H0:$B0/${V0}_ec1_add{0,1,2,3,4,5};  #Check the thread count become to number of volumes*number of ec subvolume plus 2 additional threads from newly added bricks (2*6+6=18) @@ -92,6 +90,9 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^9$" number_healer_threads_shd $V0 "__afr_shd  TEST $CLI volume remove-brick ${V0}_distribute1 replica 1 $H0:$B0/add/{2..3} force  EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" +#Before stopping the process, make sure there is no pending clenup threads hanging +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" number_healer_threads_shd $V0 "glusterfs_graph_cleanup" +  TEST $CLI volume stop ${V0}  TEST $CLI volume delete ${V0}  EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c index 776e7160c51..45e7bfedf91 100644 --- a/xlators/protocol/client/src/client.c +++ b/xlators/protocol/client/src/client.c @@ -61,9 +61,54 @@ out:  }  int +client_is_last_child_down(xlator_t *this, int32_t event, struct rpc_clnt *rpc) +{ +    rpc_clnt_connection_t *conn = NULL; +    int ret = 0; + +    clnt_conf_t *conf = this->private; +    if (!this || !rpc || !conf) +        goto out; + +    if (!conf->parent_down) +        goto out; +    conn = &rpc->conn; +    pthread_mutex_lock(&conn->lock); +    { +        if (event == GF_EVENT_CHILD_DOWN && !conn->reconnect && rpc->disabled) { +            ret = 1; +        } +    } +    pthread_mutex_unlock(&conn->lock); +out: +    return ret; +} + +int  client_notify_dispatch_uniq(xlator_t *this, int32_t event, void *data, ...)  {      clnt_conf_t *conf = this->private; +    glusterfs_ctx_t *ctx = this->ctx; +    glusterfs_graph_t *graph = this->graph; + +    pthread_mutex_lock(&ctx->notify_lock); +    { +        while (ctx->notifying) +            pthread_cond_wait(&ctx->notify_cond, &ctx->notify_lock); + +        if (client_is_last_child_down(this, event, data) && graph) { +            pthread_mutex_lock(&graph->mutex); +            { +                graph->parent_down++; +                if (graph->parent_down == graph_total_client_xlator(graph)) { +                    graph->used = 0; +                    pthread_cond_broadcast(&graph->child_down_cond); +                } +            } +            pthread_mutex_unlock(&graph->mutex); +        } +    } +    pthread_mutex_unlock(&ctx->notify_lock);      if (conf->last_sent_event == event)          return 0; @@ -81,6 +126,7 @@ client_notify_dispatch(xlator_t *this, int32_t event, void *data, ...)  {      int ret = -1;      glusterfs_ctx_t *ctx = this->ctx; +      clnt_conf_t *conf = this->private;      pthread_mutex_lock(&ctx->notify_lock); @@ -94,6 +140,7 @@ client_notify_dispatch(xlator_t *this, int32_t event, void *data, ...)      /* We assume that all translators in the graph handle notification       * events in sequence.       * */ +      ret = default_notify(this, event, data);      /* NB (Even) with MT-epoll and EPOLLET|EPOLLONESHOT we are guaranteed @@ -2376,7 +2423,7 @@ client_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,                     replicate), hence make sure events which are passed                     to parent are genuine */                  ret = client_notify_dispatch_uniq(this, GF_EVENT_CHILD_DOWN, -                                                  NULL); +                                                  rpc);                  if (is_parent_down) {                      /* If parent is down, then there should not be any                       * operation after a child down. @@ -2424,6 +2471,8 @@ int  notify(xlator_t *this, int32_t event, void *data, ...)  {      clnt_conf_t *conf = NULL; +    glusterfs_graph_t *graph = this->graph; +    int ret = -1;      conf = this->private;      if (!conf) @@ -2450,7 +2499,19 @@ notify(xlator_t *this, int32_t event, void *data, ...)              }              pthread_mutex_unlock(&conf->lock); -            rpc_clnt_disable(conf->rpc); +            ret = rpc_clnt_disable(conf->rpc); +            if (ret == -1 && graph) { +                pthread_mutex_lock(&graph->mutex); +                { +                    graph->parent_down++; +                    if (graph->parent_down == +                        graph_total_client_xlator(graph)) { +                        graph->used = 0; +                        pthread_cond_broadcast(&graph->child_down_cond); +                    } +                } +                pthread_mutex_unlock(&graph->mutex); +            }              break;          default:  | 
