diff options
Diffstat (limited to 'xlators')
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 46 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-dir-read.c | 12 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 182 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 205 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-name.c | 33 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal.h | 5 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.c | 178 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.h | 2 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.c | 41 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 11 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.c | 39 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 5 |
12 files changed, 677 insertions, 82 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 4c8fa31b679..1a4341a66d9 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -45,6 +45,41 @@ afr_quorum_errno(afr_private_t *priv) return ENOTCONN; } +gf_boolean_t +afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name, + pid_t pid) +{ + if (!__is_root_gfid(pargfid)) { + return _gf_false; + } + + if (strcmp(name, GF_REPLICATE_TRASH_DIR) == 0) { + /*For backward compatibility /.landfill is private*/ + return _gf_true; + } + + if (pid == GF_CLIENT_PID_GSYNCD) { + /*geo-rep needs to create/sync private directory on slave because + * it appears in changelog*/ + return _gf_false; + } + + if (pid == GF_CLIENT_PID_GLFS_HEAL || pid == GF_CLIENT_PID_SELF_HEALD) { + if (strcmp(name, priv->anon_inode_name) == 0) { + /* anonymous-inode dir is private*/ + return _gf_true; + } + } else { + if (strncmp(name, AFR_ANON_DIR_PREFIX, strlen(AFR_ANON_DIR_PREFIX)) == + 0) { + /* anonymous-inode dir prefix is private for geo-rep to work*/ + return _gf_true; + } + } + + return _gf_false; +} + void afr_fill_success_replies(afr_local_t *local, afr_private_t *priv, unsigned char *replies) @@ -3978,11 +4013,10 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) return 0; } - if (__is_root_gfid(loc->parent->gfid)) { - if (!strcmp(loc->name, GF_REPLICATE_TRASH_DIR)) { - op_errno = EPERM; - goto out; - } + if (afr_is_private_directory(this->private, loc->parent->gfid, loc->name, + frame->root->pid)) { + op_errno = EPERM; + goto out; } local = AFR_FRAME_INIT(frame, op_errno); @@ -5660,6 +5694,7 @@ afr_priv_dump(xlator_t *this) priv->background_self_heal_count); gf_proc_dump_write("healers", "%d", priv->healers); gf_proc_dump_write("read-hash-mode", "%d", priv->hash_mode); + gf_proc_dump_write("use-anonymous-inode", "%d", priv->use_anon_inode); if (priv->quorum_count == AFR_QUORUM_AUTO) { gf_proc_dump_write("quorum-type", "auto"); } else if (priv->quorum_count == 0) { @@ -6653,6 +6688,7 @@ afr_priv_destroy(afr_private_t *priv) GF_FREE(priv->local); GF_FREE(priv->pending_key); GF_FREE(priv->children); + GF_FREE(priv->anon_inode); GF_FREE(priv->child_up); GF_FREE(priv->halo_child_up); GF_FREE(priv->child_latency); diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index f69013f3e0a..f8bf8340dab 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -164,8 +164,8 @@ afr_validate_read_subvol(inode_t *inode, xlator_t *this, int par_read_subvol) } static void -afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol, - gf_dirent_t *entries, fd_t *fd) +afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries, + int subvol, gf_dirent_t *entries, fd_t *fd) { int ret = -1; gf_dirent_t *entry = NULL; @@ -183,8 +183,8 @@ afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol, list_for_each_entry_safe(entry, tmp, &subvol_entries->list, list) { - if (__is_root_gfid(fd->inode->gfid) && - !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) { + if (afr_is_private_directory(priv, fd->inode->gfid, entry->d_name, + frame->root->pid)) { continue; } @@ -228,8 +228,8 @@ afr_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, } if (op_ret >= 0) - afr_readdir_transform_entries(subvol_entries, (long)cookie, &entries, - local->fd); + afr_readdir_transform_entries(frame, subvol_entries, (long)cookie, + &entries, local->fd); AFR_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, xdata); diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index f35c41df274..a580a1584cc 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -2750,3 +2750,185 @@ afr_choose_source_by_policy(afr_private_t *priv, unsigned char *sources, out: return source; } + +static int +afr_anon_inode_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + afr_local_t *local = frame->local; + int i = (long)cookie; + + local->replies[i].valid = 1; + local->replies[i].op_ret = op_ret; + local->replies[i].op_errno = op_errno; + if (op_ret == 0) { + local->op_ret = 0; + local->replies[i].poststat = *buf; + local->replies[i].preparent = *preparent; + local->replies[i].postparent = *postparent; + } + if (xdata) { + local->replies[i].xdata = dict_ref(xdata); + } + + syncbarrier_wake(&local->barrier); + return 0; +} + +int +afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode) +{ + call_frame_t *frame = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = this->private; + unsigned char *mkdir_on = alloca0(priv->child_count); + unsigned char *lookup_on = alloca0(priv->child_count); + loc_t loc = {0}; + int32_t op_errno = 0; + int32_t child_op_errno = 0; + struct iatt iatt = {0}; + dict_t *xdata = NULL; + uuid_t anon_inode_gfid = {0}; + int mkdir_count = 0; + int i = 0; + + /*Try to mkdir everywhere and return success if the dir exists on 'child' + */ + + if (!priv->use_anon_inode) { + op_errno = EINVAL; + goto out; + } + + frame = afr_frame_create(this, &op_errno); + if (op_errno) { + goto out; + } + local = frame->local; + if (!local->child_up[child]) { + /*Other bricks may need mkdir so don't error out yet*/ + child_op_errno = ENOTCONN; + } + gf_uuid_parse(priv->anon_gfid_str, anon_inode_gfid); + for (i = 0; i < priv->child_count; i++) { + if (!local->child_up[i]) + continue; + + if (priv->anon_inode[i]) { + mkdir_on[i] = 0; + } else { + mkdir_on[i] = 1; + mkdir_count++; + } + } + + if (mkdir_count == 0) { + *linked_inode = inode_find(this->itable, anon_inode_gfid); + if (*linked_inode) { + op_errno = 0; + goto out; + } + } + + loc.parent = inode_ref(this->itable->root); + loc.name = priv->anon_inode_name; + loc.inode = inode_new(this->itable); + if (!loc.inode) { + op_errno = ENOMEM; + goto out; + } + + xdata = dict_new(); + if (!xdata) { + op_errno = ENOMEM; + goto out; + } + + op_errno = -dict_set_gfuuid(xdata, "gfid-req", anon_inode_gfid, _gf_true); + if (op_errno) { + goto out; + } + + if (mkdir_count == 0) { + memcpy(lookup_on, local->child_up, priv->child_count); + goto lookup; + } + + AFR_ONLIST(mkdir_on, frame, afr_anon_inode_mkdir_cbk, mkdir, &loc, 0755, 0, + xdata); + + for (i = 0; i < priv->child_count; i++) { + if (!mkdir_on[i]) { + continue; + } + + if (local->replies[i].op_ret == 0) { + priv->anon_inode[i] = 1; + iatt = local->replies[i].poststat; + } else if (local->replies[i].op_ret < 0 && + local->replies[i].op_errno == EEXIST) { + lookup_on[i] = 1; + } else if (i == child) { + child_op_errno = local->replies[i].op_errno; + } + } + + if (AFR_COUNT(lookup_on, priv->child_count) == 0) { + goto link; + } + +lookup: + AFR_ONLIST(lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc, + xdata); + for (i = 0; i < priv->child_count; i++) { + if (!lookup_on[i]) { + continue; + } + + if (local->replies[i].op_ret == 0) { + if (gf_uuid_compare(anon_inode_gfid, + local->replies[i].poststat.ia_gfid) == 0) { + priv->anon_inode[i] = 1; + iatt = local->replies[i].poststat; + } else { + if (i == child) + child_op_errno = EINVAL; + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_DATA, + "%s has gfid: %s", priv->anon_inode_name, + uuid_utoa(local->replies[i].poststat.ia_gfid)); + } + } else if (i == child) { + child_op_errno = local->replies[i].op_errno; + } + } +link: + if (!gf_uuid_is_null(iatt.ia_gfid)) { + *linked_inode = inode_link(loc.inode, loc.parent, loc.name, &iatt); + if (*linked_inode) { + op_errno = 0; + inode_lookup(*linked_inode); + } else { + op_errno = ENOMEM; + } + goto out; + } + +out: + if (xdata) + dict_unref(xdata); + loc_wipe(&loc); + /*child_op_errno takes precedence*/ + if (child_op_errno == 0) { + child_op_errno = op_errno; + } + + if (child_op_errno && *linked_inode) { + inode_unref(*linked_inode); + *linked_inode = NULL; + } + if (frame) + AFR_STACK_DESTROY(frame); + return -child_op_errno; +} diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index ac31751997f..64893f441e3 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -16,54 +16,170 @@ #include <glusterfs/syncop-utils.h> #include <glusterfs/events.h> -static int -afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name, - inode_t *inode, int child, struct afr_reply *replies) +int +afr_selfheal_entry_anon_inode(xlator_t *this, inode_t *dir, const char *name, + inode_t *inode, int child, + struct afr_reply *replies, + gf_boolean_t *anon_inode) { afr_private_t *priv = NULL; + afr_local_t *local = NULL; xlator_t *subvol = NULL; int ret = 0; + int i = 0; + char g[64] = {0}; + unsigned char *lookup_success = NULL; + call_frame_t *frame = NULL; + loc_t loc2 = { + 0, + }; loc_t loc = { 0, }; - char g[64]; priv = this->private; - subvol = priv->children[child]; + lookup_success = alloca0(priv->child_count); + uuid_utoa_r(replies[child].poststat.ia_gfid, g); + loc.inode = inode_new(inode->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + + if (replies[child].poststat.ia_type == IA_IFDIR) { + /* This directory may have sub-directory hierarchy which may need to + * be preserved for subsequent heals. So unconditionally move the + * directory to anonymous-inode directory*/ + *anon_inode = _gf_true; + goto anon_inode; + } + + frame = afr_frame_create(this, &ret); + if (!frame) { + ret = -ret; + goto out; + } + local = frame->local; + gf_uuid_copy(loc.gfid, replies[child].poststat.ia_gfid); + AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc, + NULL); + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].op_ret == 0) { + lookup_success[i] = 1; + } else if (local->replies[i].op_errno != ENOENT && + local->replies[i].op_errno != ESTALE) { + ret = -local->replies[i].op_errno; + } + } + + if (priv->quorum_count) { + if (afr_has_quorum(lookup_success, this, NULL)) { + *anon_inode = _gf_true; + } + } else if (AFR_COUNT(lookup_success, priv->child_count) > 1) { + *anon_inode = _gf_true; + } else if (ret) { + goto out; + } + +anon_inode: + if (!*anon_inode) { + ret = 0; + goto out; + } loc.parent = inode_ref(dir); gf_uuid_copy(loc.pargfid, dir->gfid); loc.name = name; - loc.inode = inode_ref(inode); - if (replies[child].valid && replies[child].op_ret == 0) { - switch (replies[child].poststat.ia_type) { - case IA_IFDIR: - gf_msg(this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), - name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), - subvol->name); - ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL); - break; - default: - gf_msg(this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid), - name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), - subvol->name); - ret = syncop_unlink(subvol, &loc, NULL, NULL); - break; - } + ret = afr_anon_inode_create(this, child, &loc2.parent); + if (ret < 0) + goto out; + + loc2.name = g; + ret = syncop_rename(subvol, &loc, &loc2, NULL, NULL); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_EXPUNGING_FILE_OR_DIR, + "Rename to %s dir %s/%s (%s) on %s failed", + priv->anon_inode_name, uuid_utoa(dir->gfid), name, g, + subvol->name); + } else { + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR, + "Rename to %s dir %s/%s (%s) on %s successful", + priv->anon_inode_name, uuid_utoa(dir->gfid), name, g, + subvol->name); } +out: loc_wipe(&loc); + loc_wipe(&loc2); + if (frame) { + AFR_STACK_DESTROY(frame); + } return ret; } int +afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name, + inode_t *inode, int child, struct afr_reply *replies) +{ + char g[64] = {0}; + afr_private_t *priv = NULL; + xlator_t *subvol = NULL; + int ret = 0; + loc_t loc = { + 0, + }; + gf_boolean_t anon_inode = _gf_false; + + priv = this->private; + subvol = priv->children[child]; + + if ((!replies[child].valid) || (replies[child].op_ret < 0)) { + /*Nothing to do*/ + ret = 0; + goto out; + } + + if (priv->use_anon_inode) { + ret = afr_selfheal_entry_anon_inode(this, dir, name, inode, child, + replies, &anon_inode); + if (ret < 0 || anon_inode) + goto out; + } + + loc.parent = inode_ref(dir); + loc.inode = inode_new(inode->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + loc.name = name; + switch (replies[child].poststat.ia_type) { + case IA_IFDIR: + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR, + "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), name, + uuid_utoa_r(replies[child].poststat.ia_gfid, g), + subvol->name); + ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL); + break; + default: + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR, + "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid), + name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), + subvol->name); + ret = syncop_unlink(subvol, &loc, NULL, NULL); + break; + } + +out: + loc_wipe(&loc); + return ret; +} + +int afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, unsigned char *sources, inode_t *dir, const char *name, inode_t *inode, @@ -76,6 +192,9 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, loc_t srcloc = { 0, }; + loc_t anonloc = { + 0, + }; xlator_t *this = frame->this; afr_private_t *priv = NULL; dict_t *xdata = NULL; @@ -86,15 +205,17 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, 0, }; unsigned char *newentry = NULL; - char dir_uuid_str[64] = {0}, iatt_uuid_str[64] = {0}; + char iatt_uuid_str[64] = {0}; + char dir_uuid_str[64] = {0}; priv = this->private; iatt = &replies[source].poststat; + uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str); if (iatt->ia_type == IA_INVAL || gf_uuid_is_null(iatt->ia_gfid)) { gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SELF_HEAL_FAILED, "Invalid ia_type (%d) or gfid(%s). source brick=%d, " "pargfid=%s, name=%s", - iatt->ia_type, uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str), source, + iatt->ia_type, iatt_uuid_str, source, uuid_utoa_r(dir->gfid, dir_uuid_str), name); ret = -EINVAL; goto out; @@ -120,14 +241,24 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, srcloc.inode = inode_ref(inode); gf_uuid_copy(srcloc.gfid, iatt->ia_gfid); - if (iatt->ia_type != IA_IFDIR) - ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0); - if (iatt->ia_type == IA_IFDIR || ret == -ENOENT || ret == -ESTALE) { + ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0); + if (ret == -ENOENT || ret == -ESTALE) { newentry[dst] = 1; ret = afr_selfheal_newentry_mark(frame, this, inode, source, replies, sources, newentry); if (ret) goto out; + } else if (ret == 0 && iatt->ia_type == IA_IFDIR && priv->use_anon_inode) { + // Try rename from hidden directory + ret = afr_anon_inode_create(this, dst, &anonloc.parent); + if (ret < 0) + goto out; + anonloc.inode = inode_ref(inode); + anonloc.name = iatt_uuid_str; + ret = syncop_rename(priv->children[dst], &anonloc, &loc, NULL, NULL); + if (ret == -ENOENT || ret == -ESTALE) + ret = -1; /*This sets 'mismatch' to true*/ + goto out; } mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type); @@ -166,6 +297,7 @@ out: GF_FREE(linkname); loc_wipe(&loc); loc_wipe(&srcloc); + loc_wipe(&anonloc); return ret; } @@ -578,6 +710,11 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, priv = this->private; + if (afr_is_private_directory(priv, fd->inode->gfid, name, + GF_CLIENT_PID_SELF_HEALD)) { + return 0; + } + xattr = dict_new(); if (!xattr) return -ENOMEM; @@ -626,7 +763,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, replies); if ((ret == 0) && (priv->esh_granular) && parent_idx_inode) { - ret = afr_shd_index_purge(subvol, parent_idx_inode, name, + ret = afr_shd_entry_purge(subvol, parent_idx_inode, name, inode->ia_type); /* Why is ret force-set to 0? We do not care about * index purge failing for full heal as it is quite @@ -756,10 +893,6 @@ afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd, if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) continue; - if (__is_root_gfid(fd->inode->gfid) && - !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) - continue; - ret = afr_selfheal_entry_dirent(iter_frame, this, fd, entry->d_name, loc.inode, subvol, local->need_full_crawl); @@ -822,7 +955,7 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry, /* The name indices under the pgfid index dir are guaranteed * to be regular files. Hence the hardcoding. */ - afr_shd_index_purge(subvol, parent->inode, entry->d_name, IA_IFREG); + afr_shd_entry_purge(subvol, parent->inode, entry->d_name, IA_IFREG); ret = 0; goto out; } diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c index dd40c57ab12..834aac86d48 100644 --- a/xlators/cluster/afr/src/afr-self-heal-name.c +++ b/xlators/cluster/afr/src/afr-self-heal-name.c @@ -98,21 +98,12 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid, const char *bname, inode_t *inode, struct afr_reply *replies) { - loc_t loc = { - 0, - }; int i = 0; afr_private_t *priv = NULL; - char g[64]; int ret = 0; priv = this->private; - loc.parent = inode_ref(parent); - gf_uuid_copy(loc.pargfid, pargfid); - loc.name = bname; - loc.inode = inode_ref(inode); - for (i = 0; i < priv->child_count; i++) { if (!replies[i].valid) continue; @@ -120,30 +111,10 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid, if (replies[i].op_ret) continue; - switch (replies[i].poststat.ia_type) { - case IA_IFDIR: - gf_msg(this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging dir %s/%s (%s) on %s", uuid_utoa(pargfid), - bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g), - priv->children[i]->name); - - ret |= syncop_rmdir(priv->children[i], &loc, 1, NULL, NULL); - break; - default: - gf_msg(this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging file %s/%s (%s) on %s", uuid_utoa(pargfid), - bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g), - priv->children[i]->name); - - ret |= syncop_unlink(priv->children[i], &loc, NULL, NULL); - break; - } + ret |= afr_selfheal_entry_delete(this, parent, bname, inode, i, + replies); } - loc_wipe(&loc); - return ret; } diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index 7a038fa7fe3..48e6dbcfb18 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -369,4 +369,9 @@ gf_boolean_t afr_is_file_empty_on_all_children(afr_private_t *priv, struct afr_reply *replies); +int +afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name, + inode_t *inode, int child, struct afr_reply *replies); +int +afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode); #endif /* !_AFR_SELFHEAL_H */ diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index f2e08908c24..109fd4b7421 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -222,7 +222,7 @@ out: } int -afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name, +afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name, ia_type_t type) { int ret = 0; @@ -424,7 +424,7 @@ afr_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, ret = afr_shd_selfheal(healer, healer->subvol, gfid); if (ret == -ENOENT || ret == -ESTALE) - afr_shd_index_purge(subvol, parent->inode, entry->d_name, val); + afr_shd_entry_purge(subvol, parent->inode, entry->d_name, val); if (ret == 2) /* If bricks crashed in pre-op after creating indices/xattrop @@ -843,6 +843,176 @@ out: return need_heal; } +static int +afr_shd_anon_inode_cleaner(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + void *data) +{ + struct subvol_healer *healer = data; + afr_private_t *priv = healer->this->private; + call_frame_t *frame = NULL; + afr_local_t *local = NULL; + int ret = 0; + loc_t loc = {0}; + int count = 0; + int i = 0; + int op_errno = 0; + struct iatt *iatt = NULL; + gf_boolean_t multiple_links = _gf_false; + unsigned char *gfid_present = alloca0(priv->child_count); + unsigned char *entry_present = alloca0(priv->child_count); + char *type = "file"; + + frame = afr_frame_create(healer->this, &ret); + if (!frame) { + ret = -ret; + goto out; + } + local = frame->local; + if (AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) { + gf_msg_debug(healer->this->name, 0, + "Not all bricks are up. Skipping " + "cleanup of %s on %s", + entry->d_name, subvol->name); + ret = 0; + goto out; + } + + loc.inode = inode_new(parent->inode->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + ret = gf_uuid_parse(entry->d_name, loc.gfid); + if (ret) { + ret = 0; + goto out; + } + AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc, + NULL); + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].op_ret == 0) { + count++; + gfid_present[i] = 1; + iatt = &local->replies[i].poststat; + if (iatt->ia_type == IA_IFDIR) { + type = "dir"; + } + + if (i == healer->subvol) { + if (local->replies[i].poststat.ia_nlink > 1) { + multiple_links = _gf_true; + } + } + } else if (local->replies[i].op_errno != ENOENT && + local->replies[i].op_errno != ESTALE) { + /*We don't have complete view. Skip the entry*/ + gf_msg_debug(healer->this->name, local->replies[i].op_errno, + "Skipping cleanup of %s on %s", entry->d_name, + subvol->name); + ret = 0; + goto out; + } + } + + /*Inode is deleted from subvol*/ + if (count == 1 || (iatt->ia_type != IA_IFDIR && multiple_links)) { + gf_msg(healer->this->name, GF_LOG_WARNING, 0, + AFR_MSG_EXPUNGING_FILE_OR_DIR, "expunging %s %s/%s on %s", type, + priv->anon_inode_name, entry->d_name, subvol->name); + ret = afr_shd_entry_purge(subvol, parent->inode, entry->d_name, + iatt->ia_type); + if (ret == -ENOENT || ret == -ESTALE) + ret = 0; + } else if (count > 1) { + loc_wipe(&loc); + loc.parent = inode_ref(parent->inode); + loc.name = entry->d_name; + loc.inode = inode_new(parent->inode->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, + &loc, NULL); + count = 0; + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].op_ret == 0) { + count++; + entry_present[i] = 1; + iatt = &local->replies[i].poststat; + } else if (local->replies[i].op_errno != ENOENT && + local->replies[i].op_errno != ESTALE) { + /*We don't have complete view. Skip the entry*/ + gf_msg_debug(healer->this->name, local->replies[i].op_errno, + "Skipping cleanup of %s on %s", entry->d_name, + subvol->name); + ret = 0; + goto out; + } + } + for (i = 0; i < priv->child_count; i++) { + if (gfid_present[i] && !entry_present[i]) { + /*Entry is not anonymous on at least one subvol*/ + gf_msg_debug(healer->this->name, 0, + "Valid entry present on %s " + "Skipping cleanup of %s on %s", + priv->children[i]->name, entry->d_name, + subvol->name); + ret = 0; + goto out; + } + } + + gf_msg(healer->this->name, GF_LOG_WARNING, 0, + AFR_MSG_EXPUNGING_FILE_OR_DIR, + "expunging %s %s/%s on all subvols", type, priv->anon_inode_name, + entry->d_name); + ret = 0; + for (i = 0; i < priv->child_count; i++) { + op_errno = -afr_shd_entry_purge(priv->children[i], loc.parent, + entry->d_name, iatt->ia_type); + if (op_errno != ENOENT && op_errno != ESTALE) { + ret |= -op_errno; + } + } + } + +out: + if (frame) + AFR_STACK_DESTROY(frame); + loc_wipe(&loc); + return ret; +} + +static void +afr_cleanup_anon_inode_dir(struct subvol_healer *healer) +{ + int ret = 0; + call_frame_t *frame = NULL; + afr_private_t *priv = healer->this->private; + loc_t loc = {0}; + + ret = afr_anon_inode_create(healer->this, healer->subvol, &loc.inode); + if (ret) + goto out; + + frame = afr_frame_create(healer->this, &ret); + if (!frame) { + ret = -ret; + goto out; + } + + ret = syncop_mt_dir_scan(frame, priv->children[healer->subvol], &loc, + GF_CLIENT_PID_SELF_HEALD, healer, + afr_shd_anon_inode_cleaner, NULL, + priv->shd.max_threads, priv->shd.wait_qlength); +out: + if (frame) + AFR_STACK_DESTROY(frame); + loc_wipe(&loc); + return; +} + void * afr_shd_index_healer(void *data) { @@ -900,6 +1070,10 @@ afr_shd_index_healer(void *data) sleep(1); } while (ret > 0); + if (ret == 0) { + afr_cleanup_anon_inode_dir(healer); + } + if (ret == 0 && pre_crawl_xdata && !healer->crawl_event.heal_failed_count) { afr_shd_ta_check_and_unset_xattrs(this, &loc, healer, diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h index 687c28e6472..18db728ea7b 100644 --- a/xlators/cluster/afr/src/afr-self-heald.h +++ b/xlators/cluster/afr/src/afr-self-heald.h @@ -70,6 +70,6 @@ afr_shd_gfid_to_path(xlator_t *this, xlator_t *subvol, uuid_t gfid, char **path_p); int -afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name, +afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name, ia_type_t type); #endif /* !_AFR_SELF_HEALD_H */ diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 67e0a4d10be..df7366f0a65 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -135,6 +135,27 @@ set_data_self_heal_algorithm(afr_private_t *priv, char *algo) } } +void +afr_handle_anon_inode_options(afr_private_t *priv, dict_t *options) +{ + char *volfile_id_str = NULL; + uuid_t anon_inode_gfid = {0}; + + /*If volume id is not present don't enable anything*/ + if (dict_get_str(options, "volume-id", &volfile_id_str)) + return; + GF_ASSERT(strlen(AFR_ANON_DIR_PREFIX) + strlen(volfile_id_str) <= NAME_MAX); + /*anon_inode_name is not supposed to change once assigned*/ + if (!priv->anon_inode_name[0]) { + snprintf(priv->anon_inode_name, sizeof(priv->anon_inode_name), "%s-%s", + AFR_ANON_DIR_PREFIX, volfile_id_str); + gf_uuid_parse(volfile_id_str, anon_inode_gfid); + /*Flip a bit to make sure volfile-id and anon-gfid are not same*/ + anon_inode_gfid[0] ^= 1; + uuid_utoa_r(anon_inode_gfid, priv->anon_gfid_str); + } +} + int reconfigure(xlator_t *this, dict_t *options) { @@ -290,6 +311,10 @@ reconfigure(xlator_t *this, dict_t *options) consistent_io = _gf_false; priv->consistent_io = consistent_io; + afr_handle_anon_inode_options(priv, options); + + GF_OPTION_RECONF("use-anonymous-inode", priv->use_anon_inode, options, bool, + out); if (priv->shd.enabled) { if ((priv->shd.enabled != enabled_old) || (timeout_old != priv->shd.timeout)) @@ -541,7 +566,9 @@ init(xlator_t *this) GF_OPTION_INIT("consistent-metadata", priv->consistent_metadata, bool, out); GF_OPTION_INIT("consistent-io", priv->consistent_io, bool, out); + afr_handle_anon_inode_options(priv, this->options); + GF_OPTION_INIT("use-anonymous-inode", priv->use_anon_inode, bool, out); if (priv->quorum_count != 0) priv->consistent_io = _gf_false; @@ -553,6 +580,9 @@ init(xlator_t *this) goto out; } + priv->anon_inode = GF_CALLOC(sizeof(unsigned char), child_count, + gf_afr_mt_char); + priv->child_up = GF_CALLOC(sizeof(unsigned char), child_count, gf_afr_mt_char); @@ -561,7 +591,8 @@ init(xlator_t *this) priv->halo_child_up = GF_CALLOC(sizeof(unsigned char), child_count, gf_afr_mt_char); - if (!priv->child_up || !priv->child_latency || !priv->halo_child_up) { + if (!priv->child_up || !priv->child_latency || !priv->halo_child_up || + !priv->anon_inode) { ret = -ENOMEM; goto out; } @@ -1286,6 +1317,14 @@ struct volume_options options[] = { .tags = {"replicate"}, .description = "This option exists only for backward compatibility " "and configuring it doesn't have any effect"}, + {.key = {"use-anonymous-inode"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "no", + .op_version = {GD_OP_VERSION_8_0}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE, + .tags = {"replicate"}, + .description = "Setting this option heals directory renames efficiently"}, + {.key = {NULL}}, }; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 2e60708accf..d62f9a9caf2 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -42,6 +42,7 @@ #define AFR_LK_HEAL_DOM "afr.lock-heal.domain" #define AFR_HALO_MAX_LATENCY 99999 +#define AFR_ANON_DIR_PREFIX ".glusterfs-anonymous-inode" #define PFLAG_PENDING (1 << 0) #define PFLAG_SBRAIN (1 << 1) @@ -190,6 +191,7 @@ typedef struct _afr_private { struct list_head ta_waitq; struct list_head ta_onwireq; + unsigned char *anon_inode; unsigned char *child_up; unsigned char *halo_child_up; int64_t *child_latency; @@ -275,10 +277,15 @@ typedef struct _afr_private { gf_boolean_t esh_granular; gf_boolean_t consistent_io; gf_boolean_t data_self_heal; /* on/off */ + gf_boolean_t use_anon_inode; /*For lock healing.*/ struct list_head saved_locks; struct list_head lk_healq; + + /*For anon-inode handling */ + char anon_inode_name[NAME_MAX + 1]; + char anon_gfid_str[UUID_SIZE + 1]; } afr_private_t; typedef enum { @@ -1409,4 +1416,8 @@ afr_dom_lock_release(call_frame_t *frame); void afr_fill_success_replies(afr_local_t *local, afr_private_t *priv, unsigned char *replies); + +gf_boolean_t +afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name, + pid_t pid); #endif /* __AFR_H__ */ diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 4e5712e6447..8d6fb5e0fac 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -3810,6 +3810,38 @@ out: } static int +set_volfile_id_option(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + int clusters) +{ + xlator_t *xlator = NULL; + int i = 0; + int ret = -1; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + if (conf->op_version < GD_OP_VERSION_9_0) + return 0; + xlator = first_of(graph); + + for (i = 0; i < clusters; i++) { + ret = xlator_set_fixed_option(xlator, "volume-id", + uuid_utoa(volinfo->volume_id)); + if (ret) + goto out; + + xlator = xlator->next; + } + +out: + return ret; +} + +static int volgen_graph_build_afr_clusters(volgen_graph_t *graph, glusterd_volinfo_t *volinfo) { @@ -3851,6 +3883,13 @@ volgen_graph_build_afr_clusters(volgen_graph_t *graph, clusters = -1; goto out; } + + ret = set_volfile_id_option(graph, volinfo, clusters); + if (ret) { + clusters = -1; + goto out; + } + if (!volinfo->arbiter_count && !volinfo->thin_arbiter_count) goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index f74876eec9f..398b4d76f52 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -3138,4 +3138,9 @@ struct volopt_map_entry glusterd_volopt_map[] = { .type = NO_DOC, }, + {.key = "cluster.use-anonymous-inode", + .voltype = "cluster/replicate", + .op_version = GD_OP_VERSION_9_0, + .value = "yes", + .flags = VOLOPT_FLAG_CLIENT_OPT}, {.key = NULL}}; |