diff options
author | Krutika Dhananjay <kdhananj@redhat.com> | 2018-12-28 18:53:15 +0530 |
---|---|---|
committer | Xavi Hernandez <xhernandez@redhat.com> | 2019-01-11 08:35:55 +0000 |
commit | c0c2022e7d7097e96270a74f37813eda0c4e6339 (patch) | |
tree | 006c15bfcedcf94e573fbb5abb07efdda775390e | |
parent | 18bbb0f5b5bf6c96f764b7949df8a0a136b931ce (diff) |
features/shard: Fix launch of multiple synctasks for background deletion
PROBLEM:
When multiple sharded files are deleted in quick succession, multiple
issues were observed:
1. misleading logs corresponding to a sharded file where while one log
message said the shards corresponding to the file were deleted
successfully, this was followed by multiple logs suggesting the very
same operation failed. This was because of multiple synctasks
attempting to clean up shards of the same file and only one of them
succeeding (the one that gets ENTRYLK successfully), and the rest of
them logging failure.
2. multiple synctasks to do background deletion would be launched, one
for each deleted file but all of them could readdir entries from
.remove_me at the same time could potentially contend for ENTRYLK on
.shard for each of the entry names. This is undesirable and wasteful.
FIX:
Background deletion will now follow a state machine. In the event that
there are multiple attempts to launch synctask for background deletion,
one for each file deleted, only the first task is launched. And if while
this task is doing the cleanup, more attempts are made to delete other
files, the state of the synctask is adjusted so that it restarts the
crawl even after reaching end-of-directory to pick up any files it may
have missed in the previous iteration.
This patch also fixes uninitialized lk-owner during syncop_entrylk()
which was leading to multiple background deletion synctasks entering
the critical section at the same time and leading to illegal memory access
of base inode in the second syntcask after it was destroyed post shard deletion
by the first synctask.
Change-Id: Ib33773d27fb4be463c7a8a5a6a4b63689705324e
updates: bz#1662368
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
-rw-r--r-- | xlators/features/shard/src/shard.c | 188 | ||||
-rw-r--r-- | xlators/features/shard/src/shard.h | 11 |
2 files changed, 128 insertions, 71 deletions
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c index eea30b6065b..abea8dc64a3 100644 --- a/xlators/features/shard/src/shard.c +++ b/xlators/features/shard/src/shard.c @@ -1461,16 +1461,45 @@ int shard_start_background_deletion(xlator_t *this) { int ret = 0; + gf_boolean_t i_cleanup = _gf_true; + shard_priv_t *priv = NULL; call_frame_t *cleanup_frame = NULL; + priv = this->private; + + LOCK(&priv->lock); + { + switch (priv->bg_del_state) { + case SHARD_BG_DELETION_NONE: + i_cleanup = _gf_true; + priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; + break; + case SHARD_BG_DELETION_LAUNCHING: + i_cleanup = _gf_false; + break; + case SHARD_BG_DELETION_IN_PROGRESS: + priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; + i_cleanup = _gf_false; + break; + default: + break; + } + } + UNLOCK(&priv->lock); + if (!i_cleanup) + return 0; + cleanup_frame = create_frame(this, this->ctx->pool); if (!cleanup_frame) { gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, "Failed to create " "new frame to delete shards"); - return -ENOMEM; + ret = -ENOMEM; + goto err; } + set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root); + ret = synctask_new(this->ctx->env, shard_delete_shards, shard_delete_shards_cbk, cleanup_frame, cleanup_frame); if (ret < 0) { @@ -1479,7 +1508,16 @@ shard_start_background_deletion(xlator_t *this) "failed to create task to do background " "cleanup of shards"); STACK_DESTROY(cleanup_frame->root); + goto err; } + return 0; + +err: + LOCK(&priv->lock); + { + priv->bg_del_state = SHARD_BG_DELETION_NONE; + } + UNLOCK(&priv->lock); return ret; } @@ -1488,7 +1526,7 @@ shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata, struct iatt *postparent) { - int ret = 0; + int ret = -1; shard_priv_t *priv = NULL; gf_boolean_t i_start_cleanup = _gf_false; @@ -1521,23 +1559,25 @@ shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, LOCK(&priv->lock); { - if (priv->first_lookup == SHARD_FIRST_LOOKUP_PENDING) { - priv->first_lookup = SHARD_FIRST_LOOKUP_IN_PROGRESS; + if (priv->first_lookup_done == _gf_false) { + priv->first_lookup_done = _gf_true; i_start_cleanup = _gf_true; } } UNLOCK(&priv->lock); - if (i_start_cleanup) { - ret = shard_start_background_deletion(this); - if (ret) { - LOCK(&priv->lock); - { - priv->first_lookup = SHARD_FIRST_LOOKUP_PENDING; - } - UNLOCK(&priv->lock); + if (!i_start_cleanup) + goto unwind; + + ret = shard_start_background_deletion(this); + if (ret < 0) { + LOCK(&priv->lock); + { + priv->first_lookup_done = _gf_false; } + UNLOCK(&priv->lock); } + unwind: SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata, postparent); @@ -2924,10 +2964,10 @@ shard_unlink_block_inode(shard_local_t *local, int shard_block_num) if (ctx->fsync_needed) { unref_base_inode++; list_del_init(&ctx->to_fsync_list); - if (base_inode) + if (base_inode) { __shard_inode_ctx_get(base_inode, this, &base_ictx); - if (base_ictx) base_ictx->fsync_count--; + } } } UNLOCK(&inode->lock); @@ -3339,9 +3379,13 @@ shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this, loc.inode = inode_ref(priv->dot_shard_rm_inode); ret = syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name, - ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL, NULL); - if (ret) + ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL, NULL); + if (ret < 0) { + if (ret == -EAGAIN) { + ret = 0; + } goto out; + } { ret = __shard_delete_shards_of_entry(cleanup_frame, this, entry, inode); } @@ -3355,20 +3399,6 @@ out: int shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data) { - xlator_t *this = NULL; - shard_priv_t *priv = NULL; - - this = frame->this; - priv = this->private; - - if (ret < 0) { - gf_msg(this->name, GF_LOG_WARNING, -ret, - SHARD_MSG_SHARDS_DELETION_FAILED, - "Background deletion of shards failed"); - priv->first_lookup = SHARD_FIRST_LOOKUP_PENDING; - } else { - priv->first_lookup = SHARD_FIRST_LOOKUP_DONE; - } SHARD_STACK_DESTROY(frame); return 0; } @@ -3490,6 +3520,7 @@ shard_delete_shards(void *opaque) gf_dirent_t entries; gf_dirent_t *entry = NULL; call_frame_t *cleanup_frame = NULL; + gf_boolean_t done = _gf_false; this = THIS; priv = this->private; @@ -3544,51 +3575,76 @@ shard_delete_shards(void *opaque) goto err; } - while ((ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset, - &entries, local->xattr_req, NULL))) { - if (ret > 0) - ret = 0; - list_for_each_entry(entry, &entries.list, list) + for (;;) { + offset = 0; + LOCK(&priv->lock); { - offset = entry->d_off; - - if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) - continue; + if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) { + priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS; + } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) { + priv->bg_del_state = SHARD_BG_DELETION_NONE; + done = _gf_true; + } + } + UNLOCK(&priv->lock); + if (done) + break; + while ( + (ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset, + &entries, local->xattr_req, NULL))) { + if (ret > 0) + ret = 0; + list_for_each_entry(entry, &entries.list, list) + { + offset = entry->d_off; - if (!entry->inode) { - ret = shard_lookup_marker_entry(this, local, entry); - if (ret < 0) + if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) continue; - } - link_inode = inode_link(entry->inode, local->fd->inode, - entry->d_name, &entry->d_stat); - gf_msg_debug(this->name, 0, - "Initiating deletion of " - "shards of gfid %s", - entry->d_name); - ret = shard_delete_shards_of_entry(cleanup_frame, this, entry, - link_inode); - inode_unlink(link_inode, local->fd->inode, entry->d_name); - inode_unref(link_inode); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, -ret, - SHARD_MSG_SHARDS_DELETION_FAILED, - "Failed to clean up shards of gfid %s", entry->d_name); - continue; + if (!entry->inode) { + ret = shard_lookup_marker_entry(this, local, entry); + if (ret < 0) + continue; + } + link_inode = inode_link(entry->inode, local->fd->inode, + entry->d_name, &entry->d_stat); + + gf_msg_debug(this->name, 0, + "Initiating deletion of " + "shards of gfid %s", + entry->d_name); + ret = shard_delete_shards_of_entry(cleanup_frame, this, entry, + link_inode); + inode_unlink(link_inode, local->fd->inode, entry->d_name); + inode_unref(link_inode); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, + SHARD_MSG_SHARDS_DELETION_FAILED, + "Failed to clean up shards of gfid %s", + entry->d_name); + continue; + } + gf_msg(this->name, GF_LOG_INFO, 0, + SHARD_MSG_SHARD_DELETION_COMPLETED, + "Deleted " + "shards of gfid=%s from backend", + entry->d_name); } - gf_msg(this->name, GF_LOG_INFO, 0, - SHARD_MSG_SHARD_DELETION_COMPLETED, - "Deleted " - "shards of gfid=%s from backend", - entry->d_name); + gf_dirent_free(&entries); + if (ret) + break; } - gf_dirent_free(&entries); - if (ret) - break; } ret = 0; + loc_wipe(&loc); + return ret; + err: + LOCK(&priv->lock); + { + priv->bg_del_state = SHARD_BG_DELETION_NONE; + } + UNLOCK(&priv->lock); loc_wipe(&loc); return ret; } diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h index 0dbb9399656..570fe4685f5 100644 --- a/xlators/features/shard/src/shard.h +++ b/xlators/features/shard/src/shard.h @@ -200,10 +200,10 @@ shard_unlock_entrylk(call_frame_t *frame, xlator_t *this); } while (0) typedef enum { - SHARD_FIRST_LOOKUP_PENDING = 0, - SHARD_FIRST_LOOKUP_IN_PROGRESS, - SHARD_FIRST_LOOKUP_DONE, -} shard_first_lookup_state_t; + SHARD_BG_DELETION_NONE = 0, + SHARD_BG_DELETION_LAUNCHING, + SHARD_BG_DELETION_IN_PROGRESS, +} shard_bg_deletion_state_t; /* rm = "remove me" */ @@ -217,7 +217,8 @@ typedef struct shard_priv { int inode_count; struct list_head ilist_head; uint32_t deletion_rate; - shard_first_lookup_state_t first_lookup; + shard_bg_deletion_state_t bg_del_state; + gf_boolean_t first_lookup_done; uint64_t lru_limit; } shard_priv_t; |