From 0043c63f70776444f69667a4ef9596217ecb42b7 Mon Sep 17 00:00:00 2001 From: Mohit Agrawal Date: Mon, 12 Mar 2018 19:43:15 +0530 Subject: gluster: Sometimes Brick process is crashed at the time of stopping brick Problem: Sometimes brick process is getting crashed at the time of stop brick while brick mux is enabled. Solution: Brick process was getting crashed because of rpc connection was not cleaning properly while brick mux is enabled.In this patch after sending GF_EVENT_CLEANUP notification to xlator(server) waits for all rpc client connection destroy for specific xlator.Once rpc connections are destroyed in server_rpc_notify for all associated client for that brick then call xlator_mem_cleanup for for brick xlator as well as all child xlators.To avoid races at the time of cleanup introduce two new flags at each xlator cleanup_starting, call_cleanup. BUG: 1544090 Signed-off-by: Mohit Agrawal Note: Run all test-cases in separate build (https://review.gluster.org/#/c/19700/) with same patch after enable brick mux forcefully, all test cases are passed. Change-Id: Ic4ab9c128df282d146cf1135640281fcb31997bf updates: bz#1544090 --- libglusterfs/src/client_t.c | 4 +-- libglusterfs/src/fd.c | 4 +-- libglusterfs/src/graph.c | 6 ++-- libglusterfs/src/inode.c | 2 +- libglusterfs/src/libglusterfs.sym | 1 + libglusterfs/src/statedump.c | 61 +++++++++++++++++++++++++++++---------- libglusterfs/src/xlator.c | 8 +++-- libglusterfs/src/xlator.h | 11 ++++++- 8 files changed, 70 insertions(+), 27 deletions(-) (limited to 'libglusterfs') diff --git a/libglusterfs/src/client_t.c b/libglusterfs/src/client_t.c index 63f4bbb4b06..4596db3470f 100644 --- a/libglusterfs/src/client_t.c +++ b/libglusterfs/src/client_t.c @@ -338,7 +338,7 @@ gf_client_destroy_recursive (xlator_t *xl, client_t *client) { xlator_list_t *trav; - if (xl->cbks->client_destroy) { + if (!xl->call_cleanup && xl->cbks->client_destroy) { xl->cbks->client_destroy (xl, client); } @@ -398,7 +398,7 @@ gf_client_disconnect_recursive (xlator_t *xl, client_t *client) int ret = 0; xlator_list_t *trav; - if (xl->cbks->client_disconnect) { + if (!xl->call_cleanup && xl->cbks->client_disconnect) { ret = xl->cbks->client_disconnect (xl, client); } diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c index d31b106aa8b..30a7494540d 100644 --- a/libglusterfs/src/fd.c +++ b/libglusterfs/src/fd.c @@ -484,7 +484,7 @@ fd_destroy (fd_t *fd, gf_boolean_t bound) xl = fd->_ctx[i].xl_key; old_THIS = THIS; THIS = xl; - if (xl->cbks->releasedir) + if (!xl->call_cleanup && xl->cbks->releasedir) xl->cbks->releasedir (xl, fd); THIS = old_THIS; } @@ -495,7 +495,7 @@ fd_destroy (fd_t *fd, gf_boolean_t bound) xl = fd->_ctx[i].xl_key; old_THIS = THIS; THIS = xl; - if (xl->cbks->release) + if (!xl->call_cleanup && xl->cbks->release) xl->cbks->release (xl, fd); THIS = old_THIS; } diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c index d36cf7b3da5..2d560b7f265 100644 --- a/libglusterfs/src/graph.c +++ b/libglusterfs/src/graph.c @@ -841,7 +841,7 @@ is_graph_topology_equal (glusterfs_graph_t *graph1, glusterfs_graph_t *graph2) trav2 = trav2->children->xlator; for (ltrav = trav1->children; ltrav; ltrav = ltrav->next) { trav1 = ltrav->xlator; - if (strcmp (trav1->name, trav2->name) == 0) { + if (!trav1->cleanup_starting && !strcmp (trav1->name, trav2->name)) { break; } } @@ -1088,7 +1088,7 @@ glusterfs_graph_reconfigure (glusterfs_graph_t *oldgraph, new_xl = FIRST_CHILD (new_xl); for (trav = old_xl->children; trav; trav = trav->next) { - if (strcmp (trav->xlator->name, new_xl->name) == 0) { + if (!trav->xlator->cleanup_starting && !strcmp (trav->xlator->name, new_xl->name)) { return xlator_tree_reconfigure (trav->xlator, new_xl); } } @@ -1237,7 +1237,7 @@ glusterfs_graph_attach (glusterfs_graph_t *orig_graph, char *path, xl->volfile_id[strlen(xl->volfile_id)-4] = '\0'; } - /* TBD: memory leaks everywhere */ + /* TODO memory leaks everywhere need to free graph in case of error */ if (glusterfs_graph_prepare (graph, this->ctx, xl->name)) { gf_log (this->name, GF_LOG_WARNING, "failed to prepare graph for xlator %s", xl->name); diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c index 2100ea3cad2..093683d41da 100644 --- a/libglusterfs/src/inode.c +++ b/libglusterfs/src/inode.c @@ -327,7 +327,7 @@ __inode_ctx_free (inode_t *inode) xl = (xlator_t *)(long)inode->_ctx[index].xl_key; old_THIS = THIS; THIS = xl; - if (xl->cbks->forget) + if (!xl->call_cleanup && xl->cbks->forget) xl->cbks->forget (xl, inode); THIS = old_THIS; } diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym index 1d21cfa8465..4b7dcb6f3e6 100644 --- a/libglusterfs/src/libglusterfs.sym +++ b/libglusterfs/src/libglusterfs.sym @@ -1100,6 +1100,7 @@ xlator_tree_free_members xlator_volopt_dynload xlator_volume_option_get xlator_volume_option_get_list +xlator_memrec_free default_fops gf_fop_list gf_upcall_list diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c index 34b8061425c..412a47b9383 100644 --- a/libglusterfs/src/statedump.c +++ b/libglusterfs/src/statedump.c @@ -501,21 +501,14 @@ gf_proc_dump_dict_info (glusterfs_ctx_t *ctx) (total_pairs / total_dicts)); } -void -gf_proc_dump_xlator_info (xlator_t *top) +static void +gf_proc_dump_per_xlator_info (xlator_t *top) { - xlator_t *trav = NULL; - glusterfs_ctx_t *ctx = NULL; + xlator_t *trav = top; + glusterfs_ctx_t *ctx = top->ctx; char itable_key[1024] = {0,}; - if (!top) - return; - - ctx = top->ctx; - - trav = top; - while (trav) { - + while (trav && !trav->cleanup_starting) { if (ctx->measure_latency) gf_proc_dump_latency_info (trav); @@ -539,7 +532,6 @@ gf_proc_dump_xlator_info (xlator_t *top) if (GF_PROC_DUMP_IS_XL_OPTION_ENABLED (inode) && (trav->dumpops->inode)) trav->dumpops->inode (trav); - if (trav->dumpops->fd && GF_PROC_DUMP_IS_XL_OPTION_ENABLED (fd)) trav->dumpops->fd (trav); @@ -550,6 +542,30 @@ gf_proc_dump_xlator_info (xlator_t *top) trav = trav->next; } +} + + + +void +gf_proc_dump_xlator_info (xlator_t *top, gf_boolean_t brick_mux) +{ + xlator_t *trav = NULL; + xlator_list_t **trav_p = NULL; + + if (!top) + return; + + trav = top; + gf_proc_dump_per_xlator_info (trav); + + if (brick_mux) { + trav_p = &top->children; + while (*trav_p) { + trav = (*trav_p)->xlator; + gf_proc_dump_per_xlator_info (trav); + trav_p = &(*trav_p)->next; + } + } return; } @@ -803,12 +819,27 @@ gf_proc_dump_info (int signum, glusterfs_ctx_t *ctx) char tmp_dump_name[PATH_MAX] = {0,}; char path[PATH_MAX] = {0,}; struct timeval tv = {0,}; + gf_boolean_t is_brick_mux = _gf_false; + xlator_t *top = NULL; + xlator_list_t **trav_p = NULL; + int brick_count = 0; gf_proc_dump_lock (); if (!ctx) goto out; + if (ctx) { + top = ctx->active->first; + for (trav_p = &top->children; *trav_p; + trav_p = &(*trav_p)->next) { + brick_count++; + } + + if (brick_count > 1) + is_brick_mux = _gf_true; + } + if (ctx->cmd_args.brick_name) { GF_REMOVE_SLASH_FROM_PATH (ctx->cmd_args.brick_name, brick_name); } else @@ -868,12 +899,12 @@ gf_proc_dump_info (int signum, glusterfs_ctx_t *ctx) if (ctx->master) { gf_proc_dump_add_section ("fuse"); - gf_proc_dump_xlator_info (ctx->master); + gf_proc_dump_xlator_info (ctx->master, _gf_false); } if (ctx->active) { gf_proc_dump_add_section ("active graph - %d", ctx->graph_id); - gf_proc_dump_xlator_info (ctx->active->top); + gf_proc_dump_xlator_info (ctx->active->top, is_brick_mux); } i = 0; diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c index 466be5e3a3a..ced89c71672 100644 --- a/libglusterfs/src/xlator.c +++ b/libglusterfs/src/xlator.c @@ -629,7 +629,7 @@ get_xlator_by_name_or_type (xlator_t *this, char *target, int is_name) for (trav = this->children; trav; trav = trav->next) { value = is_name ? trav->xlator->name : trav->xlator->type; - if (strcmp(value, target) == 0) { + if (!strcmp(value, target)) { return trav->xlator; } child_xl = get_xlator_by_name_or_type (trav->xlator, target, @@ -722,7 +722,9 @@ xlator_init (xlator_t *xl) } xl->init_succeeded = 1; - + /*xl->cleanup_starting = 0; + xl->call_cleanup = 0; + */ ret = 0; out: return ret; @@ -858,7 +860,7 @@ xlator_list_destroy (xlator_list_t *list) return 0; } -static int +int xlator_memrec_free (xlator_t *xl) { uint32_t i = 0; diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h index 2f8fed6bb64..3c16758e1a9 100644 --- a/libglusterfs/src/xlator.h +++ b/libglusterfs/src/xlator.h @@ -1043,6 +1043,14 @@ struct _xlator { /* Is this pass_through? */ gf_boolean_t pass_through; struct xlator_fops *pass_through_fops; + + /* cleanup flag to avoid races during xlator cleanup */ + uint32_t cleanup_starting; + + /* flag to avoid recall of xlator_mem_cleanup for xame xlator */ + uint32_t call_cleanup; + + }; typedef struct { @@ -1236,5 +1244,6 @@ copy_opts_to_child (xlator_t *src, xlator_t *dst, char *glob); int glusterfs_delete_volfile_checksum (glusterfs_ctx_t *ctx, const char *volfile_id); - +int +xlator_memrec_free (xlator_t *xl); #endif /* _XLATOR_H */ -- cgit