diff options
-rw-r--r-- | libglusterfs/src/syncop.c | 59 | ||||
-rw-r--r-- | libglusterfs/src/syncop.h | 3 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 19 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-mem-types.h | 2 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 22 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.h | 3 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 4 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.c | 551 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.h | 14 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.c | 29 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 6 | ||||
-rw-r--r-- | xlators/cluster/afr/src/pump.c | 21 |
12 files changed, 529 insertions, 204 deletions
diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c index 096c29efe..4acac5f8f 100644 --- a/libglusterfs/src/syncop.c +++ b/libglusterfs/src/syncop.c @@ -548,6 +548,65 @@ syncop_readdirp (xlator_t *subvol, } int32_t +syncop_readdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + gf_dirent_t *entries) +{ + struct syncargs *args = NULL; + gf_dirent_t *entry = NULL; + gf_dirent_t *tmp = NULL; + + int count = 0; + + args = cookie; + + INIT_LIST_HEAD (&args->entries.list); + + args->op_ret = op_ret; + args->op_errno = op_errno; + + if (op_ret >= 0) { + list_for_each_entry (entry, &entries->list, list) { + tmp = entry_copy (entry); + gf_log (this->name, GF_LOG_TRACE, + "adding entry=%s, count=%d", + tmp->d_name, count); + list_add_tail (&tmp->list, &(args->entries.list)); + count++; + } + } + + __wake (args); + + return 0; + +} + +int +syncop_readdir (xlator_t *subvol, + fd_t *fd, + size_t size, + off_t off, + gf_dirent_t *entries) +{ + struct syncargs args = {0, }; + + SYNCOP (subvol, (&args), syncop_readdir_cbk, subvol->fops->readdir, + fd, size, off); + + if (entries) + list_splice_init (&args.entries.list, &entries->list); + /* TODO: need to free all the 'args.entries' in 'else' case */ + + errno = args.op_errno; + return args.op_ret; + +} + +int32_t syncop_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, diff --git a/libglusterfs/src/syncop.h b/libglusterfs/src/syncop.h index 9554edb72..1bea189a7 100644 --- a/libglusterfs/src/syncop.h +++ b/libglusterfs/src/syncop.h @@ -182,6 +182,9 @@ int syncop_readdirp (xlator_t *subvol, fd_t *fd, size_t size, off_t off, /* out */ gf_dirent_t *entries); +int syncop_readdir (xlator_t *subvol, fd_t *fd, size_t size, off_t off, + gf_dirent_t *entries); + int syncop_opendir (xlator_t *subvol, loc_t *loc, fd_t *fd); int syncop_setattr (xlator_t *subvol, loc_t *loc, struct iatt *iatt, int valid, diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index a5dee65f6..1895150cd 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -3448,6 +3448,8 @@ afr_notify (xlator_t *this, int32_t event, priv->child_up[idx] = 1; priv->up_count++; + call_psh = 1; + up_child = idx; for (i = 0; i < priv->child_count; i++) if (priv->child_up[i] == 1) up_children++; @@ -3457,12 +3459,6 @@ afr_notify (xlator_t *this, int32_t event, "going online.", ((xlator_t *)data)->name); } else { event = GF_EVENT_CHILD_MODIFIED; - gf_log (this->name, GF_LOG_INFO, "subvol %d came up, " - "start crawl", idx); - if (had_heard_from_all) { - call_psh = 1; - up_child = idx; - } } priv->last_event[idx] = event; @@ -3551,18 +3547,15 @@ afr_notify (xlator_t *this, int32_t event, } } UNLOCK (&priv->lock); - if (up_children > 1) { - gf_log (this->name, GF_LOG_INFO, "All subvolumes came " - "up, start crawl"); - call_psh = 1; - } } ret = 0; if (propagate) ret = default_notify (this, event, data); - if (call_psh) - afr_proactive_self_heal (this, up_child); + if (call_psh) { + gf_log (this->name, GF_LOG_DEBUG, "start crawl: %d", up_child); + afr_do_poll_self_heal ((void*) (long) up_child); + } out: return ret; diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h index ebe189c35..228139408 100644 --- a/xlators/cluster/afr/src/afr-mem-types.h +++ b/xlators/cluster/afr/src/afr-mem-types.h @@ -46,6 +46,8 @@ enum gf_afr_mem_types_ { gf_afr_fd_paused_call_t, gf_afr_mt_afr_crawl_data_t, gf_afr_mt_afr_brick_pos_t, + gf_afr_mt_afr_shd_bool_t, + gf_afr_mt_afr_shd_timer_t, gf_afr_mt_end }; #endif diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 8fbea8c9d..36a1e04c9 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -2224,13 +2224,21 @@ afr_self_heal_type_for_transaction (afr_transaction_type type) } int -afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name, uuid_t gfid) +afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name) { int ret = -1; + uuid_t pargfid = {0}; - if (!child) { + if (!child) + goto out; + + if (!uuid_is_null (parent->inode->gfid)) + uuid_copy (pargfid, parent->inode->gfid); + else if (!uuid_is_null (parent->gfid)) + uuid_copy (pargfid, parent->gfid); + + if (uuid_is_null (pargfid)) goto out; - } if (strcmp (parent->path, "/") == 0) ret = gf_asprintf ((char **)&child->path, "/%s", name); @@ -2243,26 +2251,22 @@ afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name, uu "asprintf failed while setting child path"); } - if (!child->path) { - goto out; - } - child->name = strrchr (child->path, '/'); if (child->name) child->name++; child->parent = inode_ref (parent->inode); child->inode = inode_new (parent->inode->table); + uuid_copy (child->pargfid, pargfid); if (!child->inode) { ret = -1; goto out; } - uuid_copy (child->gfid, gfid); ret = 0; out: - if (ret == -1) + if ((ret == -1) && child) loc_wipe (child); return ret; diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h index 114c17777..715ed3dc1 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.h +++ b/xlators/cluster/afr/src/afr-self-heal-common.h @@ -120,8 +120,7 @@ typedef int xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xattr); int -afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name, - uuid_t gfid); +afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name); int afr_impunge_frame_create (call_frame_t *frame, xlator_t *this, int active_source, call_frame_t **impunge_frame); diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 570c7080f..6531615df 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -733,7 +733,7 @@ afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this, expunge_sh->entrybuf = entry->d_stat; ret = afr_build_child_loc (this, &expunge_local->loc, &local->loc, - name, entry->d_stat.ia_gfid); + name); if (ret != 0) { op_errno = EINVAL; goto out; @@ -1819,7 +1819,7 @@ afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this, impunge_local = impunge_frame->local; impunge_sh = &impunge_local->self_heal; ret = afr_build_child_loc (this, &impunge_local->loc, &local->loc, - entry->d_name, entry->d_stat.ia_gfid); + entry->d_name); loc_copy (&impunge_sh->parent_loc, &local->loc); if (ret != 0) { op_errno = ENOMEM; diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index 186d7dd26..1f071b871 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -26,8 +26,53 @@ #include "afr-self-heald.h" #include "afr-self-heal-common.h" +#define AFR_POLL_TIMEOUT 600 + +void +afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl); + +void +afr_do_poll_self_heal (void *data) +{ + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + struct timeval timeout = {0}; + xlator_t *this = NULL; + long child = (long)data; + int i = 0; + + this = THIS; + priv = this->private; + shd = &priv->shd; + + if (child == AFR_ALL_CHILDREN) { //done by command + for (i = 0; i < priv->child_count; i++) + afr_start_crawl (this, i, INDEX); + goto out; + } else { + afr_start_crawl (this, child, INDEX); + if (shd->pos[child] == AFR_POS_REMOTE) + goto out; + } + timeout.tv_sec = AFR_POLL_TIMEOUT; + timeout.tv_usec = 0; + if (shd->timer[child]) + gf_timer_call_cancel (this->ctx, shd->timer[child]); + shd->timer[child] = gf_timer_call_after (this->ctx, timeout, + afr_do_poll_self_heal, data); + + if (shd->timer[child] == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "Cannot create pending self-heal polling timer for %s", + priv->children[child]->name); + } +out: + return; +} + static int -_crawl_directory (loc_t *loc, pid_t pid); +_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data, + xlator_t *readdir_xl); static int get_pathinfo_host (char *pathinfo, char *hostname, size_t size) { @@ -84,30 +129,215 @@ out: return ret; } -inline void -afr_fill_loc_info (loc_t *loc, struct iatt *iatt, struct iatt *parent) + +int +afr_crawl_build_start_loc (xlator_t *this, afr_crawl_data_t *crawl_data, + loc_t *dirloc, xlator_t *readdir_xl) +{ + afr_private_t *priv = NULL; + dict_t *xattr = NULL; + void *index_gfid = NULL; + loc_t rootloc = {0}; + struct iatt iatt = {0}; + struct iatt parent = {0}; + int ret = 0; + + priv = this->private; + if (crawl_data->crawl == FULL) { + afr_build_root_loc (this, dirloc); + } else { + afr_build_root_loc (this, &rootloc); + ret = syncop_getxattr (readdir_xl, &rootloc, &xattr, + GF_XATTROP_INDEX_GFID); + if (ret < 0) + goto out; + ret = dict_get_ptr (xattr, GF_XATTROP_INDEX_GFID, &index_gfid); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "failed to get index " + "dir gfid on %s", readdir_xl->name); + goto out; + } + if (!index_gfid) { + gf_log (this->name, GF_LOG_ERROR, "index gfid empty " + "on %s", readdir_xl->name); + ret = -1; + goto out; + } + uuid_copy (dirloc->gfid, index_gfid); + dirloc->path = ""; + dirloc->inode = inode_new (priv->root_inode->table); + ret = syncop_lookup (readdir_xl, dirloc, NULL, + &iatt, NULL, &parent); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "lookup failed on " + "index dir on %s", readdir_xl->name); + goto out; + } + inode_link (dirloc->inode, NULL, NULL, &iatt); + } + ret = 0; +out: + if (xattr) + dict_unref (xattr); + loc_wipe (&rootloc); + return ret; +} + +int +afr_crawl_opendir (xlator_t *this, afr_crawl_data_t *crawl_data, fd_t **dirfd, + loc_t *dirloc, xlator_t *readdir_xl) +{ + fd_t *fd = NULL; + int ret = 0; + + if (crawl_data->crawl == FULL) { + fd = fd_create (dirloc->inode, crawl_data->pid); + if (!fd) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create fd for %s", dirloc->path); + ret = -1; + goto out; + } + + ret = syncop_opendir (readdir_xl, dirloc, fd); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "opendir failed on %s", dirloc->path); + goto out; + } + } else { + fd = fd_anonymous (dirloc->inode); + } + ret = 0; +out: + if (!ret) + *dirfd = fd; + return ret; +} + +xlator_t* +afr_crawl_readdir_xl_get (xlator_t *this, afr_crawl_data_t *crawl_data) { - afr_update_loc_gfids (loc, iatt, parent); - uuid_copy (loc->inode->gfid, iatt->ia_gfid); + afr_private_t *priv = this->private; + + if (crawl_data->crawl == FULL) { + return this; + } else { + return priv->children[crawl_data->child]; + } + return NULL; +} + +int +afr_crawl_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, + gf_dirent_t *entry, afr_crawl_data_t *crawl_data) +{ + int ret = 0; + afr_private_t *priv = NULL; + + priv = this->private; + if (crawl_data->crawl == FULL) { + ret = afr_build_child_loc (this, child, parent, entry->d_name); + } else { + child->path = ""; + child->inode = inode_new (priv->root_inode->table); + uuid_parse (entry->d_name, child->gfid); + } + return ret; +} + +gf_boolean_t +_crawl_proceed (xlator_t *this, int child) +{ + afr_private_t *priv = this->private; + gf_boolean_t proceed = _gf_false; + + if (!priv->child_up[child]) { + gf_log (this->name, GF_LOG_ERROR, "Stopping crawl for %s " + ", subvol went down", priv->children[child]->name); + goto out; + } + + if (afr_up_children_count (priv->child_up, + priv->child_count) < 2) { + gf_log (this->name, GF_LOG_ERROR, "Stopping crawl as " + "< 2 children are up"); + goto out; + } + proceed = _gf_true; +out: + return proceed; +} + +static int +_build_index_loc (xlator_t *this, loc_t *loc, char *name, loc_t *parent) +{ + int ret = 0; + + uuid_copy (loc->pargfid, parent->inode->gfid); + loc->path = ""; + loc->name = name; + loc->parent = inode_ref (parent->inode); + if (!loc->parent) { + loc->path = NULL; + loc_wipe (loc); + ret = -1; + } + return ret; +} + +void +_index_crawl_post_lookup_fop (xlator_t *this, loc_t *parentloc, + gf_dirent_t *entry, int op_ret, int op_errno, + xlator_t *readdir_xl) +{ + loc_t index_loc = {0}; + int ret = 0; + + if (op_ret && (op_errno == ENOENT)) { + ret = _build_index_loc (this, &index_loc, entry->d_name, + parentloc); + if (ret) + goto out; + gf_log (this->name, GF_LOG_INFO, "Removing stale index " + "for %s on %s", index_loc.name, readdir_xl->name); + ret = syncop_unlink (readdir_xl, &index_loc); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "%s: Failed to remove" + " index on %s - %s", index_loc.name, + readdir_xl->name, strerror (errno)); + } + index_loc.path = NULL; + loc_wipe (&index_loc); + } +out: + return; } static int _perform_self_heal (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries, - off_t *offset, pid_t pid) + off_t *offset, afr_crawl_data_t *crawl_data, + xlator_t *readdir_xl) { gf_dirent_t *entry = NULL; gf_dirent_t *tmp = NULL; struct iatt iatt = {0}; - struct iatt parent = {0};; + struct iatt parent = {0}; int ret = 0; loc_t entry_loc = {0}; + fd_t *fd = NULL; list_for_each_entry_safe (entry, tmp, &entries->list, list) { + if (!_crawl_proceed (this, crawl_data->child)) { + ret = -1; + goto out; + } *offset = entry->d_off; if (IS_ENTRY_CWD (entry->d_name) || IS_ENTRY_PARENT (entry->d_name)) continue; - if (uuid_is_null (entry->d_stat.ia_gfid)) { + if ((crawl_data->crawl == FULL) && + uuid_is_null (entry->d_stat.ia_gfid)) { gf_log (this->name, GF_LOG_WARNING, "%s/%s: No " "gfid present skipping", parentloc->path, entry->d_name); @@ -115,21 +345,44 @@ _perform_self_heal (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries, } loc_wipe (&entry_loc); - ret = afr_build_child_loc (this, &entry_loc, parentloc, - entry->d_name, entry->d_stat.ia_gfid); + ret = afr_crawl_build_child_loc (this, &entry_loc, parentloc, + entry, crawl_data); if (ret) goto out; - gf_log (this->name, GF_LOG_DEBUG, "lookup %s", entry_loc.path); + if (uuid_is_null (entry_loc.gfid)) { + gf_log (this->name, GF_LOG_WARNING, "failed to build " + "location for %s", entry->d_name); + continue; + } + if (entry_loc.path) + gf_log (this->name, GF_LOG_DEBUG, "lookup %s", + entry_loc.path); + else + gf_log (this->name, GF_LOG_DEBUG, "lookup %s", + uuid_utoa (entry_loc.gfid)); ret = syncop_lookup (this, &entry_loc, NULL, &iatt, NULL, &parent); + if (crawl_data->crawl == INDEX) { + _index_crawl_post_lookup_fop (this, parentloc, entry, + ret, errno, readdir_xl); + entry_loc.path = NULL; + loc_wipe (&entry_loc); + continue; + } + //Don't fail the crawl if lookup fails as it //could be because of split-brain if (ret || (!IA_ISDIR (iatt.ia_type))) continue; - afr_fill_loc_info (&entry_loc, &iatt, &parent); - ret = _crawl_directory (&entry_loc, pid); + inode_link (entry_loc.inode, parentloc->inode, NULL, &iatt); + ret = afr_crawl_opendir (this, crawl_data, &fd, &entry_loc, + readdir_xl); + if (ret) + continue; + ret = _crawl_directory (fd, &entry_loc, crawl_data, readdir_xl); + fd_unref (fd); } ret = 0; out: @@ -139,64 +392,50 @@ out: } static int -_crawl_directory (loc_t *loc, pid_t pid) +_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data, + xlator_t *readdir_xl) { xlator_t *this = NULL; - afr_private_t *priv = NULL; - fd_t *fd = NULL; off_t offset = 0; gf_dirent_t entries; - struct iatt iatt = {0}; - struct iatt parent = {0};; int ret = 0; gf_boolean_t free_entries = _gf_false; INIT_LIST_HEAD (&entries.list); this = THIS; - priv = this->private; GF_ASSERT (loc->inode); - gf_log (this->name, GF_LOG_DEBUG, "crawling %s", loc->path); - fd = fd_create (loc->inode, pid); - if (!fd) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to create fd for %s", loc->path); - goto out; - } - - if (!loc->parent) { - ret = syncop_lookup (this, loc, NULL, - &iatt, NULL, &parent); - } - - ret = syncop_opendir (this, loc, fd); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, - "opendir failed on %s", loc->path); - goto out; - } + if (loc->path) + gf_log (this->name, GF_LOG_DEBUG, "crawling %s", loc->path); + else + gf_log (this->name, GF_LOG_DEBUG, "crawling %s", + uuid_utoa (loc->gfid)); - while (syncop_readdirp (this, fd, 131072, offset, NULL, &entries)) { + while (1) { + if (crawl_data->crawl == FULL) + ret = syncop_readdirp (readdir_xl, fd, 131072, offset, + NULL, &entries); + else + ret = syncop_readdir (readdir_xl, fd, 131072, offset, + &entries); + if (ret <= 0) + break; ret = 0; free_entries = _gf_true; - if (afr_up_children_count (priv->child_up, - priv->child_count) < 2) { - gf_log (this->name, GF_LOG_ERROR, "Stopping crawl as " - "< 2 children are up"); + + if (!_crawl_proceed (this, crawl_data->child)) { ret = -1; goto out; } - if (list_empty (&entries.list)) goto out; - ret = _perform_self_heal (this, loc, &entries, &offset, pid); + ret = _perform_self_heal (this, loc, &entries, &offset, + crawl_data, readdir_xl); gf_dirent_free (&entries); free_entries = _gf_false; } - if (fd) - fd_unref (fd); ret = 0; out: if (free_entries) @@ -204,6 +443,20 @@ out: return ret; } +static char* +position_str_get (afr_child_pos_t pos) +{ + switch (pos) { + case AFR_POS_UNKNOWN: + return "unknown"; + case AFR_POS_LOCAL: + return "local"; + case AFR_POS_REMOTE: + return "remote"; + } + return NULL; +} + int afr_find_child_position (xlator_t *this, int child) { @@ -214,35 +467,21 @@ afr_find_child_position (xlator_t *this, int child) gf_boolean_t local = _gf_false; char *pathinfo = NULL; afr_child_pos_t *pos = NULL; - inode_table_t *itable = NULL; priv = this->private; pos = &priv->shd.pos[child]; - if (*pos != AFR_POS_UNKNOWN) { - goto out; - } - - //TODO: Hack to make the root_loc hack work - LOCK (&priv->lock); - { - if (!priv->root_inode) { - itable = inode_table_new (0, this); - if (!itable) - goto unlock; - priv->root_inode = inode_new (itable); + if (!priv->root_inode) { + LOCK (&priv->lock); + { if (!priv->root_inode) - goto unlock; + priv->root_inode = inode_ref + (this->itable->root); } + UNLOCK (&priv->lock); } -unlock: - UNLOCK (&priv->lock); - if (!priv->root_inode) { - ret = -1; - goto out; - } - afr_build_root_loc (priv->root_inode, &loc); + afr_build_root_loc (this, &loc); ret = syncop_getxattr (priv->children[child], &loc, &xattr_rsp, GF_XATTR_PATHINFO_KEY); @@ -267,8 +506,12 @@ unlock: else *pos = AFR_POS_REMOTE; - gf_log (this->name, GF_LOG_INFO, "child %d is %d", child, *pos); + gf_log (this->name, GF_LOG_INFO, "child %s is %s", + priv->children[child]->name, position_str_get (*pos)); out: + if (ret) + *pos = AFR_POS_UNKNOWN; + loc_wipe (&loc); return ret; } @@ -280,93 +523,34 @@ afr_crawl_done (int ret, call_frame_t *sync_frame, void *data) return 0; } -static int -afr_find_all_children_postions (xlator_t *this) -{ - int ret = -1; - int i = 0; - gf_boolean_t succeeded = _gf_false; - afr_private_t *priv = NULL; - - priv = this->private; - for (i = 0; i < priv->child_count; i++) { - if (priv->child_up[i] != 1) - continue; - ret = afr_find_child_position (this, i); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to determine if the " - "child %s is local.", - priv->children[i]->name); - continue; - } - succeeded = _gf_true; - } - if (succeeded) - ret = 0; - return ret; -} - -static gf_boolean_t -afr_local_child_exists (afr_child_pos_t *pos, unsigned int child_count) -{ - int i = 0; - gf_boolean_t local = _gf_false; - - for (i = 0; i < child_count; i++, pos++) { - if (*pos == AFR_POS_LOCAL) { - local = _gf_true; - break; - } - } - return local; -} - -int -afr_init_child_position (xlator_t *this, int child) -{ - int ret = 0; - - if (child == AFR_ALL_CHILDREN) { - ret = afr_find_all_children_postions (this); - } else { - ret = afr_find_child_position (this, child); - } - return ret; -} - -int +static inline int afr_is_local_child (afr_self_heald_t *shd, int child, unsigned int child_count) { - gf_boolean_t local = _gf_false; - - if (child == AFR_ALL_CHILDREN) - local = afr_local_child_exists (shd->pos, child_count); - else - local = (shd->pos[child] == AFR_POS_LOCAL); - - return local; + return (shd->pos[child] == AFR_POS_LOCAL); } static int -afr_crawl_directory (xlator_t *this, pid_t pid) +afr_crawl_directory (xlator_t *this, afr_crawl_data_t *crawl_data) { afr_private_t *priv = NULL; afr_self_heald_t *shd = NULL; - loc_t loc = {0}; + loc_t dirloc = {0}; gf_boolean_t crawl = _gf_false; - int ret = 0; + int ret = 0; + xlator_t *readdir_xl = NULL; + fd_t *fd = NULL; + int child = -1; priv = this->private; shd = &priv->shd; - + child = crawl_data->child; LOCK (&priv->lock); { - if (shd->inprogress) { - shd->pending = _gf_true; + if (shd->inprogress[child]) { + shd->pending[child] = _gf_true; } else { - shd->inprogress = _gf_true; + shd->inprogress[child] = _gf_true; crawl = _gf_true; } } @@ -377,28 +561,56 @@ afr_crawl_directory (xlator_t *this, pid_t pid) goto out; } - if (!crawl) + if (!crawl) { + gf_log (this->name, GF_LOG_INFO, "Another crawl is in progress " + "for %s", priv->children[child]->name); goto out; + } - afr_build_root_loc (priv->root_inode, &loc); - while (crawl) { - ret = _crawl_directory (&loc, pid); + do { + readdir_xl = afr_crawl_readdir_xl_get (this, crawl_data); + if (!readdir_xl) + goto done; + ret = afr_crawl_build_start_loc (this, crawl_data, &dirloc, + readdir_xl); + if (ret) + goto done; + ret = afr_crawl_opendir (this, crawl_data, &fd, &dirloc, + readdir_xl); + if (ret) + goto done; + ret = _crawl_directory (fd, &dirloc, crawl_data, readdir_xl); if (ret) - gf_log (this->name, GF_LOG_ERROR, "Crawl failed"); + gf_log (this->name, GF_LOG_ERROR, "Crawl failed on %s", + readdir_xl->name); else - gf_log (this->name, GF_LOG_INFO, "Crawl completed"); + gf_log (this->name, GF_LOG_INFO, "Crawl completed " + "on %s", readdir_xl->name); + fd_unref (fd); + fd = NULL; +done: LOCK (&priv->lock); { - if (shd->pending) { - shd->pending = _gf_false; + if (shd->pending[child]) { + shd->pending[child] = _gf_false; } else { - shd->inprogress = _gf_false; + shd->inprogress[child] = _gf_false; crawl = _gf_false; } } UNLOCK (&priv->lock); - } + if (crawl_data->crawl == INDEX) { + dirloc.path = NULL; + loc_wipe (&dirloc); + } + } while (crawl); out: + if (fd) + fd_unref (fd); + if (crawl_data->crawl == INDEX) { + dirloc.path = NULL; + loc_wipe (&dirloc); + } return ret; } @@ -415,20 +627,22 @@ afr_crawl (void *data) priv = this->private; shd = &priv->shd; - ret = afr_init_child_position (this, crawl_data->child); + if (!_crawl_proceed (this, crawl_data->child)) + goto out; + ret = afr_find_child_position (this, crawl_data->child); if (ret) goto out; if (!afr_is_local_child (shd, crawl_data->child, priv->child_count)) goto out; - ret = afr_crawl_directory (this, crawl_data->pid); + ret = afr_crawl_directory (this, crawl_data); out: return ret; } void -afr_proactive_self_heal (xlator_t *this, int idx) +afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl) { afr_private_t *priv = NULL; afr_self_heald_t *shd = NULL; @@ -441,10 +655,6 @@ afr_proactive_self_heal (xlator_t *this, int idx) if (!shd->enabled) goto out; - if ((idx != AFR_ALL_CHILDREN) && - (shd->pos[idx] == AFR_POS_REMOTE)) - goto out; - frame = create_frame (this, this->ctx->pool); if (!frame) goto out; @@ -457,7 +667,9 @@ afr_proactive_self_heal (xlator_t *this, int idx) goto out; crawl_data->child = idx; crawl_data->pid = frame->root->pid; - gf_log (this->name, GF_LOG_INFO, "starting crawl for %d", idx); + crawl_data->crawl = crawl; + gf_log (this->name, GF_LOG_INFO, "starting crawl for %s", + priv->children[idx]->name); ret = synctask_new (this->ctx->env, afr_crawl, afr_crawl_done, frame, crawl_data); if (ret) @@ -467,16 +679,25 @@ out: return; } -//TODO: This is a hack +//void +//afr_full_self_heal (xlator_t *this) +//{ +// int i = 0; +// afr_private_t *priv = this->private; +// +// for (i = 0; i < priv->child_count; i++) +// afr_start_crawl (this, i, FULL); +//} + void -afr_build_root_loc (inode_t *inode, loc_t *loc) +afr_build_root_loc (xlator_t *this, loc_t *loc) { - loc->path = "/"; + afr_private_t *priv = NULL; + + priv = this->private; + loc->path = gf_strdup ("/"); loc->name = ""; - loc->inode = inode; - loc->inode->ia_type = IA_IFDIR; - memset (loc->inode->gfid, 0, 16); - loc->inode->gfid[15] = 1; + loc->inode = inode_ref (priv->root_inode); uuid_copy (loc->gfid, loc->inode->gfid); } diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h index 6eb119b07..eb1021995 100644 --- a/xlators/cluster/afr/src/afr-self-heald.h +++ b/xlators/cluster/afr/src/afr-self-heald.h @@ -26,18 +26,26 @@ #define IS_ENTRY_PARENT(entry) (!strcmp (entry, "..")) #define AFR_ALL_CHILDREN -1 +typedef enum { + INDEX, + FULL, +} afr_crawl_type_t; typedef struct afr_crawl_data_ { - int child; - pid_t pid; + int child; + pid_t pid; + afr_crawl_type_t crawl; + xlator_t *readdir_xl; } afr_crawl_data_t; void afr_proactive_self_heal (xlator_t *this, int idx); -void afr_build_root_loc (inode_t *inode, loc_t *loc); +void afr_build_root_loc (xlator_t *this, loc_t *loc); int afr_set_root_gfid (dict_t *dict); inline void afr_fill_loc_info (loc_t *loc, struct iatt *iatt, struct iatt *parent); +void +afr_do_poll_self_heal (void *data); #endif /* __AFR_SELF_HEALD_H__ */ diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 18cd030f1..abc6aa3e5 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -30,6 +30,8 @@ #endif #include "afr-common.c" +#define SHD_INODE_LRU_LIMIT 100 + struct volume_options options[]; int32_t @@ -347,6 +349,33 @@ init (xlator_t *this) goto out; } + priv->shd.pending = GF_CALLOC (sizeof (*priv->shd.pending), child_count, + gf_afr_mt_afr_shd_bool_t); + if (!priv->shd.pending) { + ret = -ENOMEM; + goto out; + } + + priv->shd.inprogress = GF_CALLOC (sizeof (*priv->shd.inprogress), + child_count, + gf_afr_mt_afr_shd_bool_t); + if (!priv->shd.inprogress) { + ret = -ENOMEM; + goto out; + } + priv->shd.timer = GF_CALLOC (sizeof (*priv->shd.timer), child_count, + gf_afr_mt_afr_shd_timer_t); + if (!priv->shd.timer) { + ret = -ENOMEM; + goto out; + } + if (priv->shd.enabled) { + this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this); + if (!this->itable) { + ret = -ENOMEM; + goto out; + } + } priv->first_lookup = 1; priv->root_inode = NULL; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 003d666e0..f3d372de5 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -32,6 +32,7 @@ #include "afr-self-heal-algorithm.h" #include "libxlator.h" +#include "timer.h" #define AFR_XATTR_PREFIX "trusted.afr" #define AFR_PATHINFO_HEADER "REPLICATE:" @@ -89,9 +90,10 @@ typedef struct afr_inode_ctx_ { typedef struct afr_self_heald_ { gf_boolean_t enabled; - gf_boolean_t pending; - gf_boolean_t inprogress; + gf_boolean_t *pending; + gf_boolean_t *inprogress; afr_child_pos_t *pos; + gf_timer_t **timer; } afr_self_heald_t; typedef struct _afr_private { diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c index df0b31166..281bfd722 100644 --- a/xlators/cluster/afr/src/pump.c +++ b/xlators/cluster/afr/src/pump.c @@ -167,7 +167,7 @@ pump_save_path (xlator_t *this, const char *path) GF_ASSERT (priv->root_inode); - afr_build_root_loc (priv->root_inode, &loc); + afr_build_root_loc (this, &loc); dict = dict_new (); dict_ret = dict_set_str (dict, PUMP_PATH, (char *)path); @@ -187,6 +187,7 @@ pump_save_path (xlator_t *this, const char *path) dict_unref (dict); + loc_wipe (&loc); return 0; } @@ -384,8 +385,7 @@ gf_pump_traverse_directory (loc_t *loc) } loc_wipe (&entry_loc); ret = afr_build_child_loc (this, &entry_loc, loc, - entry->d_name, - entry->d_stat.ia_gfid); + entry->d_name); if (ret) goto out; @@ -491,7 +491,7 @@ pump_xattr_cleaner (call_frame_t *frame, void *cookie, xlator_t *this, priv = this->private; - afr_build_root_loc (priv->root_inode, &loc); + afr_build_root_loc (this, &loc); ret = syncop_removexattr (priv->children[source], &loc, PUMP_PATH); @@ -507,6 +507,7 @@ pump_xattr_cleaner (call_frame_t *frame, void *cookie, xlator_t *this, "failed with %s", strerror (errno)); } + loc_wipe (&loc); return pump_command_reply (frame, this); } @@ -526,7 +527,7 @@ pump_complete_migration (xlator_t *this) GF_ASSERT (priv->root_inode); - afr_build_root_loc (priv->root_inode, &loc); + afr_build_root_loc (this, &loc); dict = dict_new (); @@ -568,6 +569,7 @@ pump_complete_migration (xlator_t *this) call_resume (pump_priv->cleaner); } + loc_wipe (&loc); return 0; } @@ -623,7 +625,7 @@ pump_task (void *data) GF_ASSERT (priv->root_inode); - afr_build_root_loc (priv->root_inode, &loc); + afr_build_root_loc (this, &loc); xattr_req = dict_new (); if (!xattr_req) { gf_log (this->name, GF_LOG_DEBUG, @@ -661,6 +663,7 @@ out: if (xattr_req) dict_unref (xattr_req); + loc_wipe (&loc); return 0; } @@ -795,7 +798,7 @@ pump_initiate_sink_connect (call_frame_t *frame, xlator_t *this) GF_ASSERT (priv->root_inode); - afr_build_root_loc (priv->root_inode, &loc); + afr_build_root_loc (this, &loc); data = data_ref (dict_get (local->dict, RB_PUMP_CMD_START)); if (!data) { @@ -850,6 +853,7 @@ out: if (ret && clnt_cmd) GF_FREE (clnt_cmd); + loc_wipe (&loc); return ret; } @@ -1033,7 +1037,7 @@ pump_execute_start (call_frame_t *frame, xlator_t *this) GF_ASSERT (priv->root_inode); - afr_build_root_loc (priv->root_inode, &loc); + afr_build_root_loc (this, &loc); STACK_WIND (frame, pump_cmd_start_getxattr_cbk, @@ -1050,6 +1054,7 @@ out: pump_command_reply (frame, this); } + loc_wipe (&loc); return 0; } |