summaryrefslogtreecommitdiffstats
path: root/xlators/features
diff options
context:
space:
mode:
authorAshish Pandey <aspandey@redhat.com>2016-02-17 15:57:02 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2016-03-30 01:51:11 -0700
commit7bbcd6fb692dffc628b723eace8cfcfa466e606f (patch)
treebc94cffe889a20851d813de0abb7f54bb1db1b6b /xlators/features
parent207289621f6c5b75bdb80aa14ddaf72efd5eb9b1 (diff)
cluster/ec: Rebalance hangs during rename
Problem: During the rename of a particular file (ec is holding blocking inodelk on the parent directory), if the rename of another file under the same directory comes. EC does not release the lock and goes ahead and renames the "new" file with the "already held lock". That causes rebalance process to be blocked on a lock which has been acquired by rename. Solution: While rename fop comes, ec takes blocking inodelk on old and new parent of the file. Before releasing, every lock held by ec, it waits for some "time" to see if that lock can be reused by the next fop. If within this "time" some other request comes, it releases this lock based on condition "lock count > 1" To get this "lock count" for rename fop, we have implemented "pl_rename" in feature/lock. Also, on ec side, changed the condition to release the lock based on the type of fop and old and new parent directories. Change-Id: I979dbab1185df962e8f305a6074ae1186ffe7db0 Bug: 1304988 Signed-off-by: Ashish Pandey <aspandey@redhat.com> Reviewed-on: http://review.gluster.org/13460 Smoke: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Reviewed-by: Krutika Dhananjay <kdhananj@redhat.com>
Diffstat (limited to 'xlators/features')
-rw-r--r--xlators/features/locks/src/common.h2
-rw-r--r--xlators/features/locks/src/entrylk.c4
-rw-r--r--xlators/features/locks/src/locks.h3
-rw-r--r--xlators/features/locks/src/pl-messages.h64
-rw-r--r--xlators/features/locks/src/posix.c154
5 files changed, 177 insertions, 50 deletions
diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h
index b3026cb8c83..be13d29362b 100644
--- a/xlators/features/locks/src/common.h
+++ b/xlators/features/locks/src/common.h
@@ -149,7 +149,7 @@ pl_verify_reservelk (xlator_t *this, pl_inode_t *pl_inode,
int
pl_reserve_unlock (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *reqlock);
-uint32_t
+int32_t
check_entrylk_on_basename (xlator_t *this, inode_t *parent, char *basename);
void __pl_inodelk_unref (pl_inode_lock_t *lock);
diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c
index 31553c12be2..783c57e6381 100644
--- a/xlators/features/locks/src/entrylk.c
+++ b/xlators/features/locks/src/entrylk.c
@@ -432,10 +432,10 @@ __unlock_entrylk (pl_dom_list_t *dom, pl_entry_lock_t *lock)
return ret_lock;
}
-uint32_t
+int32_t
check_entrylk_on_basename (xlator_t *this, inode_t *parent, char *basename)
{
- uint32_t entrylk = 0;
+ int32_t entrylk = 0;
pl_inode_t *pinode = 0;
pl_dom_list_t *dom = NULL;
pl_entry_lock_t *conf = NULL;
diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
index 8c24eb8cb9a..df42cf560fd 100644
--- a/xlators/features/locks/src/locks.h
+++ b/xlators/features/locks/src/locks.h
@@ -164,8 +164,7 @@ typedef struct {
data_t *inodelk_dom_count_req;
dict_t *xdata;
- /* used by {f,}truncate */
- loc_t loc;
+ loc_t loc[2];
fd_t *fd;
off_t offset;
enum {TRUNCATE, FTRUNCATE} op;
diff --git a/xlators/features/locks/src/pl-messages.h b/xlators/features/locks/src/pl-messages.h
new file mode 100644
index 00000000000..45c8873ecb4
--- /dev/null
+++ b/xlators/features/locks/src/pl-messages.h
@@ -0,0 +1,64 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _PL_MESSAGES_H_
+#define _PL_MESSAGES_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glfs-message-id.h"
+
+/*! \file pl-messages.h
+ * \brief Locks log-message IDs and their descriptions
+ */
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check acorss the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLFS_PL_COMP_BASE GLFS_MSGID_COMP_PL
+#define GLFS_NUM_MESSAGES 1
+#define GLFS_MSGID_END (GLFS_PL_COMP_BASE + GLFS_NUM_MESSAGES + 1)
+/* Messaged with message IDs */
+#define glfs_msg_start_x GLFS_PL_COMP_BASE, "Invalid: Start of messages"
+/*------------*/
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define PL_MSG_LOCK_NUMBER (GLFS_PL_COMP_BASE + 1)
+
+/*------------*/
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+
+#endif /* !_PL_MESSAGES_H_ */
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
index b81a0738a60..1bebdc568a8 100644
--- a/xlators/features/locks/src/posix.c
+++ b/xlators/features/locks/src/posix.c
@@ -25,6 +25,7 @@
#include "clear.h"
#include "defaults.h"
#include "syncop.h"
+#include "pl-messages.h"
#ifndef LLONG_MAX
#define LLONG_MAX LONG_LONG_MAX /* compat with old gcc */
@@ -46,7 +47,7 @@ static int fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **);
inode_t *__inode = NULL; \
char *__name = NULL; \
dict_t *__unref = NULL; \
- \
+ int __i = 0 ; \
__local = frame->local; \
if (op_ret >= 0 && pl_needs_xdata_response (frame->local)) {\
if (xdata) \
@@ -55,12 +56,17 @@ static int fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **);
xdata = dict_new(); \
if (xdata) { \
__unref = xdata; \
- pl_get_xdata_rsp_args (__local, \
- #fop, &__parent, &__inode, \
- &__name); \
- pl_set_xdata_response (frame->this, \
- __local, __parent, __inode, __name, \
- xdata); \
+ while (__local->fd || __local->loc[__i].inode) { \
+ pl_get_xdata_rsp_args (__local, \
+ #fop, &__parent, &__inode, \
+ &__name, __i); \
+ pl_set_xdata_response (frame->this, \
+ __local, __parent, __inode, __name, \
+ xdata, __i > 0); \
+ if (__local->fd || __i == 1) \
+ break; \
+ __i++; \
+ } \
} \
} \
frame->local = NULL; \
@@ -68,7 +74,8 @@ static int fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **);
if (__local) { \
if (__local->inodelk_dom_count_req) \
data_unref (__local->inodelk_dom_count_req);\
- loc_wipe (&__local->loc); \
+ loc_wipe (&__local->loc[0]); \
+ loc_wipe (&__local->loc[1]); \
if (__local->fd) \
fd_unref (__local->fd); \
mem_put (__local); \
@@ -77,16 +84,22 @@ static int fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **);
dict_unref (__unref); \
} while (0)
-#define PL_LOCAL_GET_REQUESTS(frame, this, xdata, __fd, __loc) \
+#define PL_LOCAL_GET_REQUESTS(frame, this, xdata, __fd, __loc, __newloc)\
do { \
if (pl_has_xdata_requests (xdata)) { \
frame->local = mem_get0 (this->local_pool); \
pl_local_t *__local = frame->local; \
if (__local) { \
- if (__fd) \
+ if (__fd) { \
__local->fd = fd_ref (__fd); \
- else \
- loc_copy (&__local->loc, __loc);\
+ } else { \
+ if (__loc) \
+ loc_copy (&__local->loc[0],\
+ __loc); \
+ if (__newloc) \
+ loc_copy (&__local->loc[1],\
+ __newloc); \
+ } \
pl_get_xdata_requests (__local, xdata); \
} \
} \
@@ -167,17 +180,17 @@ pl_needs_xdata_response (pl_local_t *local)
void
pl_get_xdata_rsp_args (pl_local_t *local, char *fop, inode_t **parent,
- inode_t **inode, char **name)
+ inode_t **inode, char **name, int i)
{
if (strcmp (fop, "lookup") == 0) {
- *parent = local->loc.parent;
- *inode = local->loc.inode;
- *name = (char *)local->loc.name;
+ *parent = local->loc[0].parent;
+ *inode = local->loc[0].inode;
+ *name = (char *)local->loc[0].name;
} else {
if (local->fd) {
*inode = local->fd->inode;
} else {
- *inode = local->loc.parent;
+ *inode = local->loc[i].parent;
}
}
}
@@ -223,16 +236,22 @@ out:
void
pl_parent_entrylk_xattr_fill (xlator_t *this, inode_t *parent,
- char *basename, dict_t *dict)
+ char *basename, dict_t *dict, gf_boolean_t keep_max)
{
- uint32_t entrylk = 0;
- int ret = -1;
+ int32_t entrylk = 0;
+ int32_t maxcount = -1;
+ int ret = -1;
if (!parent || !basename || !strlen (basename))
goto out;
+ if (keep_max) {
+ ret = dict_get_int32 (dict, GLUSTERFS_PARENT_ENTRYLK, &maxcount);
+ }
entrylk = check_entrylk_on_basename (this, parent, basename);
+ if (maxcount >= entrylk)
+ return;
out:
- ret = dict_set_uint32 (dict, GLUSTERFS_PARENT_ENTRYLK, entrylk);
+ ret = dict_set_int32 (dict, GLUSTERFS_PARENT_ENTRYLK, entrylk);
if (ret < 0) {
gf_log (this->name, GF_LOG_DEBUG,
" dict_set failed on key %s", GLUSTERFS_PARENT_ENTRYLK);
@@ -241,12 +260,19 @@ out:
void
pl_entrylk_xattr_fill (xlator_t *this, inode_t *inode,
- dict_t *dict)
+ dict_t *dict, gf_boolean_t keep_max)
{
int32_t count = 0;
+ int32_t maxcount = -1;
int ret = -1;
+ if (keep_max) {
+ ret = dict_get_int32 (dict, GLUSTERFS_ENTRYLK_COUNT, &maxcount);
+ }
count = get_entrylk_count (this, inode);
+ if (maxcount >= count)
+ return;
+
ret = dict_set_int32 (dict, GLUSTERFS_ENTRYLK_COUNT, count);
if (ret < 0) {
gf_log (this->name, GF_LOG_DEBUG,
@@ -257,13 +283,18 @@ pl_entrylk_xattr_fill (xlator_t *this, inode_t *inode,
void
pl_inodelk_xattr_fill (xlator_t *this, inode_t *inode, dict_t *dict,
- char *domname)
+ char *domname, gf_boolean_t keep_max)
{
int32_t count = 0;
+ int32_t maxcount = -1;
int ret = -1;
-
+ if (keep_max) {
+ ret = dict_get_int32 (dict, GLUSTERFS_INODELK_COUNT, &maxcount);
+ }
count = get_inodelk_count (this, inode, domname);
+ if (maxcount >= count)
+ return;
ret = dict_set_int32 (dict, GLUSTERFS_INODELK_COUNT, count);
if (ret < 0) {
@@ -276,12 +307,19 @@ pl_inodelk_xattr_fill (xlator_t *this, inode_t *inode, dict_t *dict,
void
pl_posixlk_xattr_fill (xlator_t *this, inode_t *inode,
- dict_t *dict)
+ dict_t *dict, gf_boolean_t keep_max)
{
int32_t count = 0;
+ int32_t maxcount = -1;
int ret = -1;
+ if (keep_max) {
+ ret = dict_get_int32 (dict, GLUSTERFS_POSIXLK_COUNT, &maxcount);
+ }
count = get_posixlk_count (this, inode);
+ if (maxcount >= count)
+ return;
+
ret = dict_set_int32 (dict, GLUSTERFS_POSIXLK_COUNT, count);
if (ret < 0) {
gf_log (this->name, GF_LOG_DEBUG,
@@ -292,26 +330,26 @@ pl_posixlk_xattr_fill (xlator_t *this, inode_t *inode,
void
pl_set_xdata_response (xlator_t *this, pl_local_t *local, inode_t *parent,
- inode_t *inode, char *name, dict_t *xdata)
+ inode_t *inode, char *name, dict_t *xdata, gf_boolean_t max_lock)
{
if (!xdata || !local)
return;
if (local->parent_entrylk_req && parent && name && strlen (name))
- pl_parent_entrylk_xattr_fill (this, parent, name, xdata);
+ pl_parent_entrylk_xattr_fill (this, parent, name, xdata, max_lock);
if (local->entrylk_count_req && inode)
- pl_entrylk_xattr_fill (this, inode, xdata);
+ pl_entrylk_xattr_fill (this, inode, xdata, max_lock);
if (local->inodelk_dom_count_req && inode)
pl_inodelk_xattr_fill (this, inode, xdata,
- data_to_str (local->inodelk_dom_count_req));
+ data_to_str (local->inodelk_dom_count_req), max_lock);
if (local->inodelk_count_req && inode)
- pl_inodelk_xattr_fill (this, inode, xdata, NULL);
+ pl_inodelk_xattr_fill (this, inode, xdata, NULL, max_lock);
if (local->posixlk_count_req && inode)
- pl_posixlk_xattr_fill (this, inode, xdata);
+ pl_posixlk_xattr_fill (this, inode, xdata, max_lock);
}
static pl_fdctx_t *
@@ -374,7 +412,7 @@ pl_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
if (local->op == TRUNCATE)
- loc_wipe (&local->loc);
+ loc_wipe (&local->loc[0]);
if (local->xdata)
dict_unref (local->xdata);
@@ -443,7 +481,7 @@ truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
if (local->op == TRUNCATE)
- inode = local->loc.inode;
+ inode = local->loc[0].inode;
else
inode = local->fd->inode;
@@ -468,7 +506,7 @@ truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
case TRUNCATE:
STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),
FIRST_CHILD (this)->fops->truncate,
- &local->loc, local->offset, local->xdata);
+ &local->loc[0], local->offset, local->xdata);
break;
case FTRUNCATE:
STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),
@@ -483,7 +521,7 @@ unwind:
gf_log (this->name, GF_LOG_ERROR, "truncate failed with ret: %d, "
"error: %s", op_ret, strerror (op_errno));
if (local->op == TRUNCATE)
- loc_wipe (&local->loc);
+ loc_wipe (&local->loc[0]);
if (local->xdata)
dict_unref (local->xdata);
if (local->fd)
@@ -505,7 +543,7 @@ pl_truncate (call_frame_t *frame, xlator_t *this,
local->op = TRUNCATE;
local->offset = offset;
- loc_copy (&local->loc, loc);
+ loc_copy (&local->loc[0], loc);
if (xdata)
local->xdata = dict_ref (xdata);
@@ -1200,7 +1238,7 @@ int32_t
pl_opendir (call_frame_t *frame, xlator_t *this,
loc_t *loc, fd_t *fd, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL);
+ PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL, NULL);
STACK_WIND (frame, pl_opendir_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
return 0;
@@ -1326,7 +1364,7 @@ pl_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS (frame, this, xdata, NULL, loc);
+ PL_LOCAL_GET_REQUESTS (frame, this, xdata, NULL, loc, NULL);
STACK_WIND (frame, pl_create_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->create,
loc, flags, mode, umask, fd, xdata);
@@ -1347,7 +1385,7 @@ int32_t
pl_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS (frame, this, xdata, NULL, loc);
+ PL_LOCAL_GET_REQUESTS (frame, this, xdata, NULL, loc, NULL);
STACK_WIND (frame, pl_unlink_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
return 0;
@@ -1455,7 +1493,7 @@ pl_readv (call_frame_t *frame, xlator_t *this,
priv = this->private;
pl_inode = pl_inode_get (this, fd->inode);
- PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL);
+ PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL, NULL);
if (priv->mandatory && pl_inode->mandatory) {
region.fl_start = offset;
@@ -1551,7 +1589,7 @@ pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
pl_inode = pl_inode_get (this, fd->inode);
- PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL);
+ PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL, NULL);
if (priv->mandatory && pl_inode->mandatory) {
region.fl_start = offset;
@@ -2178,7 +2216,7 @@ pl_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t
pl_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS (frame, this, xdata, NULL, loc);
+ PL_LOCAL_GET_REQUESTS (frame, this, xdata, NULL, loc, NULL);
STACK_WIND (frame, pl_lookup_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup, loc, xdata);
return 0;
@@ -2195,7 +2233,7 @@ pl_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t
pl_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL);
+ PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL, NULL);
STACK_WIND (frame, pl_fstat_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fstat, fd, xdata);
return 0;
@@ -2218,7 +2256,7 @@ pl_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
list_for_each_entry (entry, &entries->list, list) {
pl_set_xdata_response (this, local, local->fd->inode,
entry->inode, entry->d_name,
- entry->dict);
+ entry->dict, 0);
}
unwind:
@@ -2232,7 +2270,7 @@ int
pl_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t offset, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL);
+ PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL, NULL);
STACK_WIND (frame, pl_readdirp_cbk,
FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp,
fd, size, offset, xdata);
@@ -2785,6 +2823,31 @@ pl_fentrylk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd, const char *basename,
entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
+int32_t
+pl_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ PL_STACK_UNWIND (rename, xdata, frame, op_ret, op_errno,
+ buf, preoldparent, postoldparent, prenewparent,
+ postnewparent, xdata);
+ return 0;
+}
+
+int32_t
+pl_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ PL_LOCAL_GET_REQUESTS (frame, this, xdata, NULL, oldloc, newloc);
+
+ STACK_WIND (frame, pl_rename_cbk, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->rename, oldloc,
+ newloc, xdata);
+ return 0;
+}
+
struct xlator_fops fops = {
.lookup = pl_lookup,
.create = pl_create,
@@ -2805,6 +2868,7 @@ struct xlator_fops fops = {
.getxattr = pl_getxattr,
.fgetxattr = pl_fgetxattr,
.fsetxattr = pl_fsetxattr,
+ .rename = pl_rename,
};
struct xlator_dumpops dumpops = {