summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMohammed Junaid <junaid@redhat.com>2012-03-19 19:56:21 +0530
committerVijay Bellur <vijay@gluster.com>2012-03-19 08:11:43 -0700
commit83277598bda524f44b76feed6adc7b19fc49d49a (patch)
tree86f436c187c910ee6a7899e8bdc3a2f0ffe54cef
parentf076c23f381d9013d4fa2145c329033a51194597 (diff)
protocol/server: Handle server send reply failure gracefully.
Server send reply failure should not call server connection cleanup because if a reconnection happens with in the grace-timeout the connection object is reused. We must cleanup only on grace-timeout. Change-Id: I7d171a863382646ff392031c2b845fe4f0d3d5dc BUG: 803365 Signed-off-by: Mohammed Junaid <junaid@redhat.com> Reviewed-on: http://review.gluster.com/2947 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vijay@gluster.com>
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c3
-rw-r--r--xlators/protocol/client/src/client.c79
-rw-r--r--xlators/protocol/client/src/client.h12
-rw-r--r--xlators/protocol/server/src/server.c84
-rw-r--r--xlators/protocol/server/src/server.h2
5 files changed, 121 insertions, 59 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index 2004a0834..8e35ad08a 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -210,7 +210,8 @@ static struct volopt_map_entry glusterd_volopt_map[] = {
{VKEY_FEATURES_LIMIT_USAGE, "features/quota", "limit-set", NULL, NO_DOC, 0},
{"features.quota-timeout", "features/quota", "timeout", "0", DOC, 0},
{"server.statedump-path", "protocol/server", "statedump-path", NULL, NO_DOC, 0},
- {"client.lk-heal", "protocol/client", "lk-heal", NULL, DOC, 0},
+ {"features.lock-heal", "protocol/client", "lk-heal", NULL, DOC, 0},
+ {"features.lock-heal", "protocol/server", "lk-heal", NULL, DOC, 0},
{"client.grace-timeout", "protocol/client", "grace-timeout", NULL, DOC, 0},
{"server.grace-timeout", "protocol/server", "grace-timeout", NULL, DOC, 0},
{"feature.read-only", "features/read-only", "!read-only", "off", DOC, 0},
diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
index 00ec90ff0..078e6e5d0 100644
--- a/xlators/protocol/client/src/client.c
+++ b/xlators/protocol/client/src/client.c
@@ -116,6 +116,38 @@ out:
return;
}
+int32_t
+client_register_grace_timer (xlator_t *this, clnt_conf_t *conf)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ if (conf->grace_timer || !conf->grace_timer_needed) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "Client grace timer is already set "
+ "or a grace-timer has already time out, "
+ "not registering a new timer");
+ } else {
+ gf_log (this->name, GF_LOG_INFO,
+ "Registering a grace timer");
+ conf->grace_timer =
+ gf_timer_call_after (this->ctx,
+ conf->grace_tv,
+ client_grace_timeout,
+ conf->rpc);
+ }
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
int
client_submit_request (xlator_t *this, void *req, call_frame_t *frame,
rpc_clnt_prog_t *prog, int procnum, fop_cbk_fn_t cbkfn,
@@ -1999,7 +2031,7 @@ client_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
/* Cancel grace timer if set */
pthread_mutex_lock (&conf->lock);
{
- conf->grace_timer_flag = _gf_true;
+ conf->grace_timer_needed = _gf_true;
if (conf->grace_timer) {
gf_log (this->name, GF_LOG_WARNING,
@@ -2016,28 +2048,10 @@ client_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
break;
}
case RPC_CLNT_DISCONNECT:
- /* client_mark_fd_bad (this); */
-
- pthread_mutex_lock (&conf->lock);
- {
- if (conf->grace_timer || !conf->grace_timer_flag) {
- gf_log (this->name, GF_LOG_TRACE,
- "Client grace timer is already set "
- "or a grace-timer has already timeout, "
- "not registering a new timer");
- } else {
- gf_log (this->name, GF_LOG_WARNING,
- "Registering a grace timer");
- conf->grace_timer_flag = _gf_false;
-
- conf->grace_timer =
- gf_timer_call_after (this->ctx,
- conf->grace_tv,
- client_grace_timeout,
- conf->rpc);
- }
- }
- pthread_mutex_unlock (&conf->lock);
+ if (!conf->lk_heal)
+ client_mark_fd_bad (this);
+ else
+ client_register_grace_timer (this, conf);
if (!conf->skip_notify) {
if (conf->connected)
@@ -2263,6 +2277,9 @@ client_init_grace_timer (xlator_t *this, dict_t *options,
if (!ret)
gf_string2boolean (lk_heal, &conf->lk_heal);
+ gf_log (this->name, GF_LOG_DEBUG, "lk-heal = %s",
+ (conf->lk_heal) ? "on" : "off");
+
ret = dict_get_int32 (options, "grace-timeout", &grace_timeout);
if (!ret)
conf->grace_tv.tv_sec = grace_timeout;
@@ -2271,8 +2288,8 @@ client_init_grace_timer (xlator_t *this, dict_t *options,
conf->grace_tv.tv_usec = 0;
- gf_log (this->name, GF_LOG_INFO, "lk-heal = %s",
- (conf->lk_heal) ? "on" : "off");
+ gf_log (this->name, GF_LOG_DEBUG, "Client grace timeout "
+ "value = %"PRIu64, conf->grace_tv.tv_sec);
ret = 0;
out:
@@ -2363,9 +2380,9 @@ init (xlator_t *this)
INIT_LIST_HEAD (&conf->saved_fds);
/* Initialize parameters for lock self healing*/
- conf->lk_version = 1;
- conf->grace_timer = NULL;
- conf->grace_timer_flag = _gf_true;
+ conf->lk_version = 1;
+ conf->grace_timer = NULL;
+ conf->grace_timer_needed = _gf_true;
ret = client_init_grace_timer (this, this->options, conf);
if (ret)
@@ -2644,10 +2661,12 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_BOOL
},
{ .key = {"lk-heal"},
- .type = GF_OPTION_TYPE_STR
+ .type = GF_OPTION_TYPE_BOOL,
},
{ .key = {"grace-timeout"},
- .type = GF_OPTION_TYPE_INT
+ .type = GF_OPTION_TYPE_INT,
+ .min = 10,
+ .max = 1800
},
{.key = {"tcp-window-size"},
.type = GF_OPTION_TYPE_SIZET,
diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
index e1647d9fa..352d2b371 100644
--- a/xlators/protocol/client/src/client.h
+++ b/xlators/protocol/client/src/client.h
@@ -106,12 +106,12 @@ typedef struct clnt_conf {
performing lock healing */
struct timeval grace_tv;
gf_timer_t *grace_timer;
- gf_boolean_t grace_timer_flag; /* The state of this flag will
- be used to decide whether
- a new grace-timer must be
- registered or not. False
- means dont register, true
- means register */
+ gf_boolean_t grace_timer_needed; /* The state of this flag will
+ be used to decide whether
+ a new grace-timer must be
+ registered or not. False
+ means dont register, true
+ means register */
char parent_down;
} clnt_conf_t;
diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
index 519d378ac..d22852404 100644
--- a/xlators/protocol/server/src/server.c
+++ b/xlators/protocol/server/src/server.c
@@ -128,7 +128,8 @@ server_submit_reply (call_frame_t *frame, rpcsvc_request_t *req, void *arg,
struct iovec rsp = {0,};
server_state_t *state = NULL;
char new_iobref = 0;
- server_connection_t *conn = NULL;
+ server_connection_t *conn = NULL;
+ gf_boolean_t lk_heal = _gf_false;
GF_VALIDATE_OR_GOTO ("server", req, ret);
@@ -138,6 +139,9 @@ server_submit_reply (call_frame_t *frame, rpcsvc_request_t *req, void *arg,
conn = SERVER_CONNECTION(frame);
}
+ if (conn)
+ lk_heal = ((server_conf_t *) conn->this->private)->lk_heal;
+
if (!iobref) {
iobref = iobref_new ();
if (!iobref) {
@@ -170,9 +174,14 @@ server_submit_reply (call_frame_t *frame, rpcsvc_request_t *req, void *arg,
iobuf_unref (iob);
if (ret == -1) {
gf_log_callingfn ("", GF_LOG_ERROR, "Reply submission failed");
- if (frame && conn)
+ if (frame && conn && !lk_heal) {
server_connection_cleanup (frame->this, conn,
INTERNAL_LOCKS | POSIX_LOCKS);
+ } else {
+ /* TODO: Failure of open(dir), create, inodelk, entrylk
+ or lk fops send failure must be handled specially. */
+ ;
+ }
goto ret;
}
@@ -614,6 +623,7 @@ int
server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
void *data)
{
+ gf_boolean_t detached = _gf_false;
xlator_t *this = NULL;
rpc_transport_t *xprt = NULL;
server_connection_t *conn = NULL;
@@ -655,32 +665,44 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
if (!conn)
break;
- put_server_conn_state (this, xprt);
gf_log (this->name, GF_LOG_INFO, "disconnecting connection"
"from %s", conn->id);
- server_connection_cleanup (this, conn, INTERNAL_LOCKS);
- pthread_mutex_lock (&conf->mutex);
- {
- list_del (&xprt->list);
- }
- pthread_mutex_unlock (&conf->mutex);
- pthread_mutex_lock (&conn->lock);
- {
- if (conn->timer)
- goto unlock;
+ /* If lock self heal is off, then destroy the
+ conn object, else register a grace timer event */
+ if (!conf->lk_heal) {
+ server_conn_ref (conn);
+ server_connection_put (this, conn, &detached);
+ if (detached)
+ server_connection_cleanup (this, conn,
+ INTERNAL_LOCKS |
+ POSIX_LOCKS);
+ server_conn_unref (conn);
+ } else {
+ put_server_conn_state (this, xprt);
+ server_connection_cleanup (this, conn, INTERNAL_LOCKS);
+ pthread_mutex_lock (&conf->mutex);
+ {
+ list_del (&xprt->list);
+ }
+ pthread_mutex_unlock (&conf->mutex);
- gf_log (this->name, GF_LOG_INFO, "starting a grace "
- "timer for %s", conn->id);
+ pthread_mutex_lock (&conn->lock);
+ {
+ if (conn->timer)
+ goto unlock;
- conn->timer = gf_timer_call_after (this->ctx,
- conf->grace_tv,
- grace_time_handler,
- conn);
- }
- unlock:
- pthread_mutex_unlock (&conn->lock);
+ gf_log (this->name, GF_LOG_INFO, "starting a grace "
+ "timer for %s", xprt->name);
+ conn->timer = gf_timer_call_after (this->ctx,
+ conf->grace_tv,
+ grace_time_handler,
+ conn);
+ }
+ unlock:
+ pthread_mutex_unlock (&conn->lock);
+ }
break;
case RPCSVC_EVENT_TRANSPORT_DESTROY:
/*- conn obj has been disassociated from xprt on first
@@ -758,17 +780,30 @@ server_init_grace_timer (xlator_t *this, dict_t *options,
{
int32_t ret = -1;
int32_t grace_timeout = -1;
+ char *lk_heal = NULL;
GF_VALIDATE_OR_GOTO ("server", this, out);
GF_VALIDATE_OR_GOTO (this->name, options, out);
GF_VALIDATE_OR_GOTO (this->name, conf, out);
+ conf->lk_heal = _gf_true;
+
+ ret = dict_get_str (options, "lk-heal", &lk_heal);
+ if (!ret)
+ gf_string2boolean (lk_heal, &conf->lk_heal);
+
+ gf_log (this->name, GF_LOG_DEBUG, "lk-heal = %s",
+ (conf->lk_heal) ? "on" : "off");
+
ret = dict_get_int32 (options, "grace-timeout", &grace_timeout);
if (!ret)
conf->grace_tv.tv_sec = grace_timeout;
else
conf->grace_tv.tv_sec = 10;
+ gf_log (this->name, GF_LOG_DEBUG, "Server grace timeout "
+ "value = %"PRIu64, conf->grace_tv.tv_sec);
+
conf->grace_tv.tv_usec = 0;
ret = 0;
@@ -1146,8 +1181,13 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_PATH,
.default_value = "/tmp"
},
+ { .key = {"lk-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ },
{.key = {"grace-timeout"},
.type = GF_OPTION_TYPE_INT,
+ .min = 10,
+ .max = 1800,
},
{.key = {"tcp-window-size"},
.type = GF_OPTION_TYPE_SIZET,
diff --git a/xlators/protocol/server/src/server.h b/xlators/protocol/server/src/server.h
index 4213faa17..d000c8d36 100644
--- a/xlators/protocol/server/src/server.h
+++ b/xlators/protocol/server/src/server.h
@@ -108,6 +108,8 @@ struct server_conf {
int inode_lru_limit;
gf_boolean_t verify_volfile;
gf_boolean_t trace;
+ gf_boolean_t lk_heal; /* If true means lock self
+ heal is on else off. */
char *conf_dir;
struct _volfile_ctx *volfile;
struct timeval grace_tv;