summaryrefslogtreecommitdiffstats
path: root/xlators/protocol/client/src/client-handshake.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/protocol/client/src/client-handshake.c')
-rw-r--r--xlators/protocol/client/src/client-handshake.c256
1 files changed, 152 insertions, 104 deletions
diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c
index f5f056c6a..5668fea53 100644
--- a/xlators/protocol/client/src/client-handshake.c
+++ b/xlators/protocol/client/src/client-handshake.c
@@ -25,6 +25,7 @@
#include "portmap-xdr.h"
#include "rpc-common-xdr.h"
+#define CLIENT_REOPEN_MAX_ATTEMPTS 1024
extern rpc_clnt_prog_t clnt3_3_fop_prog;
extern rpc_clnt_prog_t clnt_pmap_prog;
@@ -52,7 +53,7 @@ rpc_client_ping_timer_expired (void *data)
rpc_clnt_connection_t *conn = NULL;
int disconnect = 0;
int transport_activity = 0;
- struct timeval timeout = {0, };
+ struct timespec timeout = {0, };
struct timeval current = {0, };
struct rpc_clnt *clnt = NULL;
xlator_t *this = NULL;
@@ -100,7 +101,7 @@ rpc_client_ping_timer_expired (void *data)
"ping timer expired but transport activity "
"detected - not bailing transport");
timeout.tv_sec = conf->opt.ping_timeout;
- timeout.tv_usec = 0;
+ timeout.tv_nsec = 0;
conn->ping_timer =
gf_timer_call_after (this->ctx, timeout,
@@ -139,7 +140,7 @@ client_start_ping (void *data)
clnt_conf_t *conf = NULL;
rpc_clnt_connection_t *conn = NULL;
int32_t ret = -1;
- struct timeval timeout = {0, };
+ struct timespec timeout = {0, };
call_frame_t *frame = NULL;
int frame_count = 0;
@@ -195,7 +196,7 @@ client_start_ping (void *data)
}
timeout.tv_sec = conf->opt.ping_timeout;
- timeout.tv_usec = 0;
+ timeout.tv_nsec = 0;
conn->ping_timer =
gf_timer_call_after (this->ctx, timeout,
@@ -240,7 +241,7 @@ client_ping_cbk (struct rpc_req *req, struct iovec *iov, int count,
{
xlator_t *this = NULL;
rpc_clnt_connection_t *conn = NULL;
- struct timeval timeout = {0, };
+ struct timespec timeout = {0, };
call_frame_t *frame = NULL;
clnt_conf_t *conf = NULL;
@@ -280,7 +281,7 @@ client_ping_cbk (struct rpc_req *req, struct iovec *iov, int count,
timeout.tv_sec = conf->opt.ping_timeout;
- timeout.tv_usec = 0;
+ timeout.tv_nsec = 0;
gf_timer_call_cancel (this->ctx,
conn->ping_timer);
@@ -415,9 +416,6 @@ clnt_fd_lk_reacquire_failed (xlator_t *this, clnt_fd_ctx_t *fdctx,
{
fdctx->remote_fd = -1;
fdctx->lk_heal_state = GF_LK_HEAL_DONE;
-
- list_add_tail (&fdctx->sfd_pos,
- &conf->saved_fds);
}
pthread_mutex_unlock (&conf->lock);
@@ -603,7 +601,7 @@ clnt_release_reopen_fd_cbk (struct rpc_req *req, struct iovec *iov,
clnt_fd_lk_reacquire_failed (this, fdctx, conf);
- decrement_reopen_fd_count (this, conf);
+ fdctx->reopen_done (fdctx, this);
frame->local = NULL;
STACK_DESTROY (frame->root);
@@ -633,10 +631,11 @@ clnt_release_reopen_fd (xlator_t *this, clnt_fd_ctx_t *fdctx)
clnt_release_reopen_fd_cbk, NULL,
NULL, 0, NULL, 0, NULL,
(xdrproc_t)xdr_gfs3_releasedir_req);
+ return 0;
out:
if (ret) {
- decrement_reopen_fd_count (this, conf);
clnt_fd_lk_reacquire_failed (this, fdctx, conf);
+ fdctx->reopen_done (fdctx, this);
if (frame) {
frame->local = NULL;
STACK_DESTROY (frame->root);
@@ -754,13 +753,10 @@ client_reacquire_lock_cbk (struct rpc_req *req, struct iovec *iov,
pthread_mutex_lock (&conf->lock);
{
fdctx->lk_heal_state = GF_LK_HEAL_DONE;
-
- list_add_tail (&fdctx->sfd_pos,
- &conf->saved_fds);
}
pthread_mutex_unlock (&conf->lock);
- decrement_reopen_fd_count (this, conf);
+ fdctx->reopen_done (fdctx, this);
}
ret = 0;
@@ -869,8 +865,7 @@ client_reacquire_lock (xlator_t *this, clnt_fd_ctx_t *fdctx)
if (client_fd_lk_list_empty (fdctx->lk_ctx, _gf_false)) {
gf_log (this->name, GF_LOG_DEBUG,
"fd lock list is empty");
- decrement_reopen_fd_count (this,
- (clnt_conf_t *)this->private);
+ fdctx->reopen_done (fdctx, this);
} else {
lk_ctx = fdctx->lk_ctx;
@@ -885,14 +880,66 @@ out:
return ret;
}
+void
+client_default_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this)
+{
+ gf_log_callingfn (this->name, GF_LOG_WARNING,
+ "This function should never be called");
+}
+
+void
+client_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this)
+{
+ clnt_conf_t *conf = NULL;
+ gf_boolean_t destroy = _gf_false;
+
+ conf = this->private;
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ fdctx->reopen_attempts = 0;
+ if (!fdctx->released)
+ list_add_tail (&fdctx->sfd_pos, &conf->saved_fds);
+ else
+ destroy = _gf_true;
+ fdctx->reopen_done = client_default_reopen_done;
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ if (destroy)
+ client_fdctx_destroy (this, fdctx);
+}
+
+void
+client_child_up_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this)
+{
+ clnt_conf_t *conf = NULL;
+ uint64_t fd_count = 0;
+
+ conf = this->private;
+
+ LOCK (&conf->rec_lock);
+ {
+ fd_count = --(conf->reopen_fd_count);
+ }
+ UNLOCK (&conf->rec_lock);
+
+ client_reopen_done (fdctx, this);
+ if (fd_count == 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "last fd open'd/lock-self-heal'd - notifying CHILD-UP");
+ client_set_lk_version (this);
+ client_notify_parents_child_up (this);
+ }
+}
+
int
client3_3_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
int32_t ret = -1;
gfs3_open_rsp rsp = {0,};
- int attempt_lock_recovery = _gf_false;
- uint64_t fd_count = 0;
+ gf_boolean_t attempt_lock_recovery = _gf_false;
clnt_local_t *local = NULL;
clnt_conf_t *conf = NULL;
clnt_fd_ctx_t *fdctx = NULL;
@@ -900,12 +947,10 @@ client3_3_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count,
xlator_t *this = NULL;
frame = myframe;
- if (!frame || !frame->this)
- goto out;
-
this = frame->this;
+ conf = this->private;
local = frame->local;
- conf = frame->this->private;
+ fdctx = local->fdctx;
if (-1 == req->rpc_status) {
gf_log (frame->this->name, GF_LOG_WARNING,
@@ -938,32 +983,23 @@ client3_3_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count,
goto out;
}
- fdctx = local->fdctx;
- if (!fdctx) {
- gf_log (frame->this->name, GF_LOG_WARNING, "fdctx not found");
- ret = -1;
- goto out;
- }
-
pthread_mutex_lock (&conf->lock);
{
fdctx->remote_fd = rsp.fd;
if (!fdctx->released) {
- if (!client_fd_lk_list_empty (fdctx->lk_ctx, _gf_false)) {
+ if (conf->lk_heal &&
+ !client_fd_lk_list_empty (fdctx->lk_ctx,
+ _gf_false)) {
attempt_lock_recovery = _gf_true;
fdctx->lk_heal_state = GF_LK_HEAL_IN_PROGRESS;
- } else {
- list_add_tail (&fdctx->sfd_pos,
- &conf->saved_fds);
}
- fdctx = NULL;
}
}
pthread_mutex_unlock (&conf->lock);
ret = 0;
- if (conf->lk_heal && attempt_lock_recovery) {
+ if (attempt_lock_recovery) {
/* Delay decrementing the reopen fd count untill all the
locks corresponding to this fd are acquired.*/
gf_log (this->name, GF_LOG_DEBUG, "acquiring locks "
@@ -973,23 +1009,15 @@ client3_3_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count,
clnt_reacquire_lock_error (this, local->fdctx, conf);
gf_log (this->name, GF_LOG_WARNING, "acquiring locks "
"failed on %s", local->loc.path);
- ret = 0;
}
- } else {
- fd_count = decrement_reopen_fd_count (frame->this, conf);
}
out:
- if (fdctx) {
- clnt_release_reopen_fd (this, fdctx);
- } else if ((ret < 0) && frame && frame->this && conf) {
- decrement_reopen_fd_count (frame->this, conf);
- }
+ if (!attempt_lock_recovery)
+ fdctx->reopen_done (fdctx, this);
- if (frame) {
- frame->local = NULL;
- STACK_DESTROY (frame->root);
- }
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
client_local_wipe (local);
@@ -1008,11 +1036,10 @@ client3_3_reopendir_cbk (struct rpc_req *req, struct iovec *iov, int count,
call_frame_t *frame = NULL;
frame = myframe;
- if (!frame || !frame->this)
- goto out;
+ local = frame->local;
+ fdctx = local->fdctx;
+ conf = frame->this->private;
- local = frame->local;
- conf = frame->this->private;
if (-1 == req->rpc_status) {
gf_log (frame->this->name, GF_LOG_WARNING,
@@ -1045,46 +1072,24 @@ client3_3_reopendir_cbk (struct rpc_req *req, struct iovec *iov, int count,
goto out;
}
- fdctx = local->fdctx;
- if (!fdctx) {
- gf_log (frame->this->name, GF_LOG_WARNING, "fdctx not found");
- ret = -1;
- goto out;
- }
-
pthread_mutex_lock (&conf->lock);
{
fdctx->remote_fd = rsp.fd;
-
- if (!fdctx->released) {
- list_add_tail (&fdctx->sfd_pos, &conf->saved_fds);
- fdctx = NULL;
- }
}
pthread_mutex_unlock (&conf->lock);
- decrement_reopen_fd_count (frame->this, conf);
- ret = 0;
-
out:
- if (fdctx)
- client_fdctx_destroy (frame->this, fdctx);
-
- if ((ret < 0) && frame && frame->this && conf)
- decrement_reopen_fd_count (frame->this, conf);
-
- if (frame) {
- frame->local = NULL;
- STACK_DESTROY (frame->root);
- }
+ fdctx->reopen_done (fdctx, frame->this);
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
client_local_wipe (local);
return 0;
}
-int
-protocol_client_reopendir (xlator_t *this, clnt_fd_ctx_t *fdctx)
+static int
+protocol_client_reopendir (clnt_fd_ctx_t *fdctx, xlator_t *this)
{
int ret = -1;
gfs3_opendir_req req = {{0,},};
@@ -1092,9 +1097,6 @@ protocol_client_reopendir (xlator_t *this, clnt_fd_ctx_t *fdctx)
call_frame_t *frame = NULL;
clnt_conf_t *conf = NULL;
- if (!this || !fdctx)
- goto out;
-
conf = this->private;
local = mem_get0 (this->local_pool);
@@ -1120,7 +1122,7 @@ protocol_client_reopendir (xlator_t *this, clnt_fd_ctx_t *fdctx)
gf_log (frame->this->name, GF_LOG_DEBUG,
"attempting reopen on %s", local->loc.path);
- frame->local = local; local = NULL;
+ frame->local = local;
ret = client_submit_request (this, &req, frame, conf->fops,
GFS3_OP_OPENDIR,
@@ -1128,11 +1130,11 @@ protocol_client_reopendir (xlator_t *this, clnt_fd_ctx_t *fdctx)
NULL, 0, NULL, 0, NULL,
(xdrproc_t)xdr_gfs3_opendir_req);
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"failed to send the re-opendir request");
}
- return ret;
+ return 0;
out:
if (frame) {
@@ -1143,16 +1145,14 @@ out:
if (local)
client_local_wipe (local);
- if ((ret < 0) && this && conf) {
- decrement_reopen_fd_count (this, conf);
- }
+ fdctx->reopen_done (fdctx, this);
return 0;
}
-int
-protocol_client_reopen (xlator_t *this, clnt_fd_ctx_t *fdctx)
+static int
+protocol_client_reopenfile (clnt_fd_ctx_t *fdctx, xlator_t *this)
{
int ret = -1;
gfs3_open_req req = {{0,},};
@@ -1160,9 +1160,6 @@ protocol_client_reopen (xlator_t *this, clnt_fd_ctx_t *fdctx)
call_frame_t *frame = NULL;
clnt_conf_t *conf = NULL;
- if (!this || !fdctx)
- goto out;
-
conf = this->private;
frame = create_frame (this, this->ctx->pool);
@@ -1192,17 +1189,16 @@ protocol_client_reopen (xlator_t *this, clnt_fd_ctx_t *fdctx)
gf_log (frame->this->name, GF_LOG_DEBUG,
"attempting reopen on %s", local->loc.path);
- local = NULL;
ret = client_submit_request (this, &req, frame, conf->fops,
GFS3_OP_OPEN, client3_3_reopen_cbk, NULL,
NULL, 0, NULL, 0, NULL,
(xdrproc_t)xdr_gfs3_open_req);
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"failed to send the re-open request");
}
- return ret;
+ return 0;
out:
if (frame) {
@@ -1213,14 +1209,65 @@ out:
if (local)
client_local_wipe (local);
- if ((ret < 0) && this && conf) {
- decrement_reopen_fd_count (this, conf);
- }
+ fdctx->reopen_done (fdctx, this);
return 0;
}
+static void
+protocol_client_reopen (clnt_fd_ctx_t *fdctx, xlator_t *this)
+{
+ if (fdctx->is_dir)
+ protocol_client_reopendir (fdctx, this);
+ else
+ protocol_client_reopenfile (fdctx, this);
+}
+
+gf_boolean_t
+__is_fd_reopen_in_progress (clnt_fd_ctx_t *fdctx)
+{
+ if (fdctx->reopen_done == client_default_reopen_done)
+ return _gf_false;
+ return _gf_true;
+}
+
+void
+client_attempt_reopen (fd_t *fd, xlator_t *this)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_fd_ctx_t *fdctx = NULL;
+ gf_boolean_t reopen = _gf_false;
+
+ if (!fd || !this)
+ goto out;
+
+ conf = this->private;
+ pthread_mutex_lock (&conf->lock);
+ {
+ fdctx = this_fd_get_ctx (fd, this);
+ if (!fdctx)
+ goto unlock;
+ if (__is_fd_reopen_in_progress (fdctx))
+ goto unlock;
+ if (fdctx->remote_fd != -1)
+ goto unlock;
+
+ if (fdctx->reopen_attempts == CLIENT_REOPEN_MAX_ATTEMPTS) {
+ reopen = _gf_true;
+ fdctx->reopen_done = client_reopen_done;
+ list_del_init (&fdctx->sfd_pos);
+ } else {
+ fdctx->reopen_attempts++;
+ }
+ }
+unlock:
+ pthread_mutex_unlock (&conf->lock);
+ if (reopen)
+ protocol_client_reopen (fdctx, this);
+out:
+ return;
+}
int
client_post_handshake (call_frame_t *frame, xlator_t *this)
@@ -1245,6 +1292,7 @@ client_post_handshake (call_frame_t *frame, xlator_t *this)
if (fdctx->remote_fd != -1)
continue;
+ fdctx->reopen_done = client_child_up_reopen_done;
list_del_init (&fdctx->sfd_pos);
list_add_tail (&fdctx->sfd_pos, &reopen_head);
count++;
@@ -1263,10 +1311,7 @@ client_post_handshake (call_frame_t *frame, xlator_t *this)
list_for_each_entry_safe (fdctx, tmp, &reopen_head, sfd_pos) {
list_del_init (&fdctx->sfd_pos);
- if (fdctx->is_dir)
- protocol_client_reopendir (this, fdctx);
- else
- protocol_client_reopen (this, fdctx);
+ protocol_client_reopen (fdctx, this);
}
} else {
gf_log (this->name, GF_LOG_DEBUG,
@@ -1691,12 +1736,15 @@ client_query_portmap_cbk (struct rpc_req *req, struct iovec *iov, int count, voi
ret = -1;
gf_log (this->name, ((!conf->portmap_err_logged) ?
GF_LOG_ERROR : GF_LOG_DEBUG),
- "failed to get the port number for remote subvolume");
+ "failed to get the port number for remote subvolume. "
+ "Please run 'gluster volume status' on server to see "
+ "if brick process is running.");
conf->portmap_err_logged = 1;
goto out;
}
conf->portmap_err_logged = 0;
+ conf->disconnect_err_logged = 0;
config.remote_port = rsp.port;
rpc_clnt_reconfig (conf->rpc, &config);