diff options
Diffstat (limited to 'xlators/protocol/client/src/client-handshake.c')
| -rw-r--r-- | xlators/protocol/client/src/client-handshake.c | 256 |
1 files changed, 152 insertions, 104 deletions
diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c index f5f056c6a..5668fea53 100644 --- a/xlators/protocol/client/src/client-handshake.c +++ b/xlators/protocol/client/src/client-handshake.c @@ -25,6 +25,7 @@ #include "portmap-xdr.h" #include "rpc-common-xdr.h" +#define CLIENT_REOPEN_MAX_ATTEMPTS 1024 extern rpc_clnt_prog_t clnt3_3_fop_prog; extern rpc_clnt_prog_t clnt_pmap_prog; @@ -52,7 +53,7 @@ rpc_client_ping_timer_expired (void *data) rpc_clnt_connection_t *conn = NULL; int disconnect = 0; int transport_activity = 0; - struct timeval timeout = {0, }; + struct timespec timeout = {0, }; struct timeval current = {0, }; struct rpc_clnt *clnt = NULL; xlator_t *this = NULL; @@ -100,7 +101,7 @@ rpc_client_ping_timer_expired (void *data) "ping timer expired but transport activity " "detected - not bailing transport"); timeout.tv_sec = conf->opt.ping_timeout; - timeout.tv_usec = 0; + timeout.tv_nsec = 0; conn->ping_timer = gf_timer_call_after (this->ctx, timeout, @@ -139,7 +140,7 @@ client_start_ping (void *data) clnt_conf_t *conf = NULL; rpc_clnt_connection_t *conn = NULL; int32_t ret = -1; - struct timeval timeout = {0, }; + struct timespec timeout = {0, }; call_frame_t *frame = NULL; int frame_count = 0; @@ -195,7 +196,7 @@ client_start_ping (void *data) } timeout.tv_sec = conf->opt.ping_timeout; - timeout.tv_usec = 0; + timeout.tv_nsec = 0; conn->ping_timer = gf_timer_call_after (this->ctx, timeout, @@ -240,7 +241,7 @@ client_ping_cbk (struct rpc_req *req, struct iovec *iov, int count, { xlator_t *this = NULL; rpc_clnt_connection_t *conn = NULL; - struct timeval timeout = {0, }; + struct timespec timeout = {0, }; call_frame_t *frame = NULL; clnt_conf_t *conf = NULL; @@ -280,7 +281,7 @@ client_ping_cbk (struct rpc_req *req, struct iovec *iov, int count, timeout.tv_sec = conf->opt.ping_timeout; - timeout.tv_usec = 0; + timeout.tv_nsec = 0; gf_timer_call_cancel (this->ctx, conn->ping_timer); @@ -415,9 +416,6 @@ clnt_fd_lk_reacquire_failed (xlator_t *this, clnt_fd_ctx_t *fdctx, { fdctx->remote_fd = -1; fdctx->lk_heal_state = GF_LK_HEAL_DONE; - - list_add_tail (&fdctx->sfd_pos, - &conf->saved_fds); } pthread_mutex_unlock (&conf->lock); @@ -603,7 +601,7 @@ clnt_release_reopen_fd_cbk (struct rpc_req *req, struct iovec *iov, clnt_fd_lk_reacquire_failed (this, fdctx, conf); - decrement_reopen_fd_count (this, conf); + fdctx->reopen_done (fdctx, this); frame->local = NULL; STACK_DESTROY (frame->root); @@ -633,10 +631,11 @@ clnt_release_reopen_fd (xlator_t *this, clnt_fd_ctx_t *fdctx) clnt_release_reopen_fd_cbk, NULL, NULL, 0, NULL, 0, NULL, (xdrproc_t)xdr_gfs3_releasedir_req); + return 0; out: if (ret) { - decrement_reopen_fd_count (this, conf); clnt_fd_lk_reacquire_failed (this, fdctx, conf); + fdctx->reopen_done (fdctx, this); if (frame) { frame->local = NULL; STACK_DESTROY (frame->root); @@ -754,13 +753,10 @@ client_reacquire_lock_cbk (struct rpc_req *req, struct iovec *iov, pthread_mutex_lock (&conf->lock); { fdctx->lk_heal_state = GF_LK_HEAL_DONE; - - list_add_tail (&fdctx->sfd_pos, - &conf->saved_fds); } pthread_mutex_unlock (&conf->lock); - decrement_reopen_fd_count (this, conf); + fdctx->reopen_done (fdctx, this); } ret = 0; @@ -869,8 +865,7 @@ client_reacquire_lock (xlator_t *this, clnt_fd_ctx_t *fdctx) if (client_fd_lk_list_empty (fdctx->lk_ctx, _gf_false)) { gf_log (this->name, GF_LOG_DEBUG, "fd lock list is empty"); - decrement_reopen_fd_count (this, - (clnt_conf_t *)this->private); + fdctx->reopen_done (fdctx, this); } else { lk_ctx = fdctx->lk_ctx; @@ -885,14 +880,66 @@ out: return ret; } +void +client_default_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this) +{ + gf_log_callingfn (this->name, GF_LOG_WARNING, + "This function should never be called"); +} + +void +client_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this) +{ + clnt_conf_t *conf = NULL; + gf_boolean_t destroy = _gf_false; + + conf = this->private; + + pthread_mutex_lock (&conf->lock); + { + fdctx->reopen_attempts = 0; + if (!fdctx->released) + list_add_tail (&fdctx->sfd_pos, &conf->saved_fds); + else + destroy = _gf_true; + fdctx->reopen_done = client_default_reopen_done; + } + pthread_mutex_unlock (&conf->lock); + + if (destroy) + client_fdctx_destroy (this, fdctx); +} + +void +client_child_up_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this) +{ + clnt_conf_t *conf = NULL; + uint64_t fd_count = 0; + + conf = this->private; + + LOCK (&conf->rec_lock); + { + fd_count = --(conf->reopen_fd_count); + } + UNLOCK (&conf->rec_lock); + + client_reopen_done (fdctx, this); + if (fd_count == 0) { + gf_log (this->name, GF_LOG_INFO, + "last fd open'd/lock-self-heal'd - notifying CHILD-UP"); + client_set_lk_version (this); + client_notify_parents_child_up (this); + } +} + int client3_3_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { int32_t ret = -1; gfs3_open_rsp rsp = {0,}; - int attempt_lock_recovery = _gf_false; - uint64_t fd_count = 0; + gf_boolean_t attempt_lock_recovery = _gf_false; clnt_local_t *local = NULL; clnt_conf_t *conf = NULL; clnt_fd_ctx_t *fdctx = NULL; @@ -900,12 +947,10 @@ client3_3_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count, xlator_t *this = NULL; frame = myframe; - if (!frame || !frame->this) - goto out; - this = frame->this; + conf = this->private; local = frame->local; - conf = frame->this->private; + fdctx = local->fdctx; if (-1 == req->rpc_status) { gf_log (frame->this->name, GF_LOG_WARNING, @@ -938,32 +983,23 @@ client3_3_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count, goto out; } - fdctx = local->fdctx; - if (!fdctx) { - gf_log (frame->this->name, GF_LOG_WARNING, "fdctx not found"); - ret = -1; - goto out; - } - pthread_mutex_lock (&conf->lock); { fdctx->remote_fd = rsp.fd; if (!fdctx->released) { - if (!client_fd_lk_list_empty (fdctx->lk_ctx, _gf_false)) { + if (conf->lk_heal && + !client_fd_lk_list_empty (fdctx->lk_ctx, + _gf_false)) { attempt_lock_recovery = _gf_true; fdctx->lk_heal_state = GF_LK_HEAL_IN_PROGRESS; - } else { - list_add_tail (&fdctx->sfd_pos, - &conf->saved_fds); } - fdctx = NULL; } } pthread_mutex_unlock (&conf->lock); ret = 0; - if (conf->lk_heal && attempt_lock_recovery) { + if (attempt_lock_recovery) { /* Delay decrementing the reopen fd count untill all the locks corresponding to this fd are acquired.*/ gf_log (this->name, GF_LOG_DEBUG, "acquiring locks " @@ -973,23 +1009,15 @@ client3_3_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count, clnt_reacquire_lock_error (this, local->fdctx, conf); gf_log (this->name, GF_LOG_WARNING, "acquiring locks " "failed on %s", local->loc.path); - ret = 0; } - } else { - fd_count = decrement_reopen_fd_count (frame->this, conf); } out: - if (fdctx) { - clnt_release_reopen_fd (this, fdctx); - } else if ((ret < 0) && frame && frame->this && conf) { - decrement_reopen_fd_count (frame->this, conf); - } + if (!attempt_lock_recovery) + fdctx->reopen_done (fdctx, this); - if (frame) { - frame->local = NULL; - STACK_DESTROY (frame->root); - } + frame->local = NULL; + STACK_DESTROY (frame->root); client_local_wipe (local); @@ -1008,11 +1036,10 @@ client3_3_reopendir_cbk (struct rpc_req *req, struct iovec *iov, int count, call_frame_t *frame = NULL; frame = myframe; - if (!frame || !frame->this) - goto out; + local = frame->local; + fdctx = local->fdctx; + conf = frame->this->private; - local = frame->local; - conf = frame->this->private; if (-1 == req->rpc_status) { gf_log (frame->this->name, GF_LOG_WARNING, @@ -1045,46 +1072,24 @@ client3_3_reopendir_cbk (struct rpc_req *req, struct iovec *iov, int count, goto out; } - fdctx = local->fdctx; - if (!fdctx) { - gf_log (frame->this->name, GF_LOG_WARNING, "fdctx not found"); - ret = -1; - goto out; - } - pthread_mutex_lock (&conf->lock); { fdctx->remote_fd = rsp.fd; - - if (!fdctx->released) { - list_add_tail (&fdctx->sfd_pos, &conf->saved_fds); - fdctx = NULL; - } } pthread_mutex_unlock (&conf->lock); - decrement_reopen_fd_count (frame->this, conf); - ret = 0; - out: - if (fdctx) - client_fdctx_destroy (frame->this, fdctx); - - if ((ret < 0) && frame && frame->this && conf) - decrement_reopen_fd_count (frame->this, conf); - - if (frame) { - frame->local = NULL; - STACK_DESTROY (frame->root); - } + fdctx->reopen_done (fdctx, frame->this); + frame->local = NULL; + STACK_DESTROY (frame->root); client_local_wipe (local); return 0; } -int -protocol_client_reopendir (xlator_t *this, clnt_fd_ctx_t *fdctx) +static int +protocol_client_reopendir (clnt_fd_ctx_t *fdctx, xlator_t *this) { int ret = -1; gfs3_opendir_req req = {{0,},}; @@ -1092,9 +1097,6 @@ protocol_client_reopendir (xlator_t *this, clnt_fd_ctx_t *fdctx) call_frame_t *frame = NULL; clnt_conf_t *conf = NULL; - if (!this || !fdctx) - goto out; - conf = this->private; local = mem_get0 (this->local_pool); @@ -1120,7 +1122,7 @@ protocol_client_reopendir (xlator_t *this, clnt_fd_ctx_t *fdctx) gf_log (frame->this->name, GF_LOG_DEBUG, "attempting reopen on %s", local->loc.path); - frame->local = local; local = NULL; + frame->local = local; ret = client_submit_request (this, &req, frame, conf->fops, GFS3_OP_OPENDIR, @@ -1128,11 +1130,11 @@ protocol_client_reopendir (xlator_t *this, clnt_fd_ctx_t *fdctx) NULL, 0, NULL, 0, NULL, (xdrproc_t)xdr_gfs3_opendir_req); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "failed to send the re-opendir request"); } - return ret; + return 0; out: if (frame) { @@ -1143,16 +1145,14 @@ out: if (local) client_local_wipe (local); - if ((ret < 0) && this && conf) { - decrement_reopen_fd_count (this, conf); - } + fdctx->reopen_done (fdctx, this); return 0; } -int -protocol_client_reopen (xlator_t *this, clnt_fd_ctx_t *fdctx) +static int +protocol_client_reopenfile (clnt_fd_ctx_t *fdctx, xlator_t *this) { int ret = -1; gfs3_open_req req = {{0,},}; @@ -1160,9 +1160,6 @@ protocol_client_reopen (xlator_t *this, clnt_fd_ctx_t *fdctx) call_frame_t *frame = NULL; clnt_conf_t *conf = NULL; - if (!this || !fdctx) - goto out; - conf = this->private; frame = create_frame (this, this->ctx->pool); @@ -1192,17 +1189,16 @@ protocol_client_reopen (xlator_t *this, clnt_fd_ctx_t *fdctx) gf_log (frame->this->name, GF_LOG_DEBUG, "attempting reopen on %s", local->loc.path); - local = NULL; ret = client_submit_request (this, &req, frame, conf->fops, GFS3_OP_OPEN, client3_3_reopen_cbk, NULL, NULL, 0, NULL, 0, NULL, (xdrproc_t)xdr_gfs3_open_req); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "failed to send the re-open request"); } - return ret; + return 0; out: if (frame) { @@ -1213,14 +1209,65 @@ out: if (local) client_local_wipe (local); - if ((ret < 0) && this && conf) { - decrement_reopen_fd_count (this, conf); - } + fdctx->reopen_done (fdctx, this); return 0; } +static void +protocol_client_reopen (clnt_fd_ctx_t *fdctx, xlator_t *this) +{ + if (fdctx->is_dir) + protocol_client_reopendir (fdctx, this); + else + protocol_client_reopenfile (fdctx, this); +} + +gf_boolean_t +__is_fd_reopen_in_progress (clnt_fd_ctx_t *fdctx) +{ + if (fdctx->reopen_done == client_default_reopen_done) + return _gf_false; + return _gf_true; +} + +void +client_attempt_reopen (fd_t *fd, xlator_t *this) +{ + clnt_conf_t *conf = NULL; + clnt_fd_ctx_t *fdctx = NULL; + gf_boolean_t reopen = _gf_false; + + if (!fd || !this) + goto out; + + conf = this->private; + pthread_mutex_lock (&conf->lock); + { + fdctx = this_fd_get_ctx (fd, this); + if (!fdctx) + goto unlock; + if (__is_fd_reopen_in_progress (fdctx)) + goto unlock; + if (fdctx->remote_fd != -1) + goto unlock; + + if (fdctx->reopen_attempts == CLIENT_REOPEN_MAX_ATTEMPTS) { + reopen = _gf_true; + fdctx->reopen_done = client_reopen_done; + list_del_init (&fdctx->sfd_pos); + } else { + fdctx->reopen_attempts++; + } + } +unlock: + pthread_mutex_unlock (&conf->lock); + if (reopen) + protocol_client_reopen (fdctx, this); +out: + return; +} int client_post_handshake (call_frame_t *frame, xlator_t *this) @@ -1245,6 +1292,7 @@ client_post_handshake (call_frame_t *frame, xlator_t *this) if (fdctx->remote_fd != -1) continue; + fdctx->reopen_done = client_child_up_reopen_done; list_del_init (&fdctx->sfd_pos); list_add_tail (&fdctx->sfd_pos, &reopen_head); count++; @@ -1263,10 +1311,7 @@ client_post_handshake (call_frame_t *frame, xlator_t *this) list_for_each_entry_safe (fdctx, tmp, &reopen_head, sfd_pos) { list_del_init (&fdctx->sfd_pos); - if (fdctx->is_dir) - protocol_client_reopendir (this, fdctx); - else - protocol_client_reopen (this, fdctx); + protocol_client_reopen (fdctx, this); } } else { gf_log (this->name, GF_LOG_DEBUG, @@ -1691,12 +1736,15 @@ client_query_portmap_cbk (struct rpc_req *req, struct iovec *iov, int count, voi ret = -1; gf_log (this->name, ((!conf->portmap_err_logged) ? GF_LOG_ERROR : GF_LOG_DEBUG), - "failed to get the port number for remote subvolume"); + "failed to get the port number for remote subvolume. " + "Please run 'gluster volume status' on server to see " + "if brick process is running."); conf->portmap_err_logged = 1; goto out; } conf->portmap_err_logged = 0; + conf->disconnect_err_logged = 0; config.remote_port = rsp.port; rpc_clnt_reconfig (conf->rpc, &config); |
