diff options
Diffstat (limited to 'xlators/storage/posix/src/posix-metadata.c')
-rw-r--r-- | xlators/storage/posix/src/posix-metadata.c | 1063 |
1 files changed, 523 insertions, 540 deletions
diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c index 7488e59ae70..82b05ebffc2 100644 --- a/xlators/storage/posix/src/posix-metadata.c +++ b/xlators/storage/posix/src/posix-metadata.c @@ -23,648 +23,631 @@ static int gf_posix_xattr_enotsup_log; * save it on disk in machine independent format */ static inline void -posix_mdata_to_disk (posix_mdata_disk_t *out, posix_mdata_t *in) +posix_mdata_to_disk(posix_mdata_disk_t *out, posix_mdata_t *in) { - out->version = in->version; - out->flags = htobe64(in->flags); + out->version = in->version; + out->flags = htobe64(in->flags); - out->ctime.tv_sec = htobe64(in->ctime.tv_sec); - out->ctime.tv_nsec = htobe64(in->ctime.tv_nsec); + out->ctime.tv_sec = htobe64(in->ctime.tv_sec); + out->ctime.tv_nsec = htobe64(in->ctime.tv_nsec); - out->mtime.tv_sec = htobe64(in->mtime.tv_sec); - out->mtime.tv_nsec = htobe64(in->mtime.tv_nsec); + out->mtime.tv_sec = htobe64(in->mtime.tv_sec); + out->mtime.tv_nsec = htobe64(in->mtime.tv_nsec); - out->atime.tv_sec = htobe64(in->atime.tv_sec); - out->atime.tv_nsec = htobe64(in->atime.tv_nsec); + out->atime.tv_sec = htobe64(in->atime.tv_sec); + out->atime.tv_nsec = htobe64(in->atime.tv_nsec); } /* posix_mdata_from_disk converts posix_mdata_disk_t into host byte order */ static inline void -posix_mdata_from_disk (posix_mdata_t *out, posix_mdata_disk_t *in) +posix_mdata_from_disk(posix_mdata_t *out, posix_mdata_disk_t *in) { - out->version = in->version; - out->flags = be64toh(in->flags); + out->version = in->version; + out->flags = be64toh(in->flags); - out->ctime.tv_sec = be64toh(in->ctime.tv_sec); - out->ctime.tv_nsec = be64toh(in->ctime.tv_nsec); + out->ctime.tv_sec = be64toh(in->ctime.tv_sec); + out->ctime.tv_nsec = be64toh(in->ctime.tv_nsec); - out->mtime.tv_sec = be64toh(in->mtime.tv_sec); - out->mtime.tv_nsec = be64toh(in->mtime.tv_nsec); + out->mtime.tv_sec = be64toh(in->mtime.tv_sec); + out->mtime.tv_nsec = be64toh(in->mtime.tv_nsec); - out->atime.tv_sec = be64toh(in->atime.tv_sec); - out->atime.tv_nsec = be64toh(in->atime.tv_nsec); + out->atime.tv_sec = be64toh(in->atime.tv_sec); + out->atime.tv_nsec = be64toh(in->atime.tv_nsec); } /* posix_fetch_mdata_xattr fetches the posix_mdata_t from disk */ static int -posix_fetch_mdata_xattr (xlator_t *this, const char *real_path_arg, int _fd, - inode_t *inode, posix_mdata_t *metadata, int *op_errno) +posix_fetch_mdata_xattr(xlator_t *this, const char *real_path_arg, int _fd, + inode_t *inode, posix_mdata_t *metadata, int *op_errno) { - size_t size = -1; - int op_ret = -1; - char *value = NULL; - gf_boolean_t fd_based_fop = _gf_false; - char gfid_str[64] = {0}; - char *real_path = NULL; - - char *key = GF_XATTR_MDATA_KEY; - - if (!metadata) { - op_ret = -1; - goto out; - } - - if (_fd != -1) { - fd_based_fop = _gf_true; - } - if (!(fd_based_fop || real_path_arg)) { - MAKE_HANDLE_PATH (real_path, this, inode->gfid, NULL); - if (!real_path) { - uuid_utoa_r (inode->gfid, gfid_str); - gf_msg (this->name, GF_LOG_WARNING, errno, - P_MSG_LSTAT_FAILED, "lstat on gfid %s failed", - gfid_str); - op_ret = -1; - *op_errno = errno; - goto out; - } - } - - if (fd_based_fop) { - size = sys_fgetxattr (_fd, key, NULL, 0); - } else if (real_path_arg) { - size = sys_lgetxattr (real_path_arg, key, NULL, 0); - } else if (real_path) { - size = sys_lgetxattr (real_path, key, NULL, 0); - } - - if (size == -1) { - *op_errno = errno; - if ((*op_errno == ENOTSUP) || (*op_errno == ENOSYS)) { - GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log, - this->name, GF_LOG_WARNING, - "Extended attributes not " - "supported (try remounting" - " brick with 'user_xattr' " - "flag)"); - } else if (*op_errno == ENOATTR || - *op_errno == ENODATA) { - gf_msg_debug (this->name, 0, - "No such attribute:%s for file %s " - "gfid: %s", - key, real_path ? real_path : (real_path_arg ? real_path_arg : "null"), - uuid_utoa(inode->gfid)); - } else { - gf_msg (this->name, GF_LOG_DEBUG, *op_errno, - P_MSG_XATTR_FAILED, "getxattr failed" - " on %s gfid: %s key: %s ", - real_path ? real_path : (real_path_arg ? real_path_arg : "null"), - uuid_utoa(inode->gfid), key); - } - op_ret = -1; - goto out; - } - - value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char); - if (!value) { - op_ret = -1; - *op_errno = ENOMEM; - goto out; + size_t size = -1; + int op_ret = -1; + char *value = NULL; + gf_boolean_t fd_based_fop = _gf_false; + char gfid_str[64] = {0}; + char *real_path = NULL; + + char *key = GF_XATTR_MDATA_KEY; + + if (!metadata) { + op_ret = -1; + goto out; + } + + if (_fd != -1) { + fd_based_fop = _gf_true; + } + if (!(fd_based_fop || real_path_arg)) { + MAKE_HANDLE_PATH(real_path, this, inode->gfid, NULL); + if (!real_path) { + uuid_utoa_r(inode->gfid, gfid_str); + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_LSTAT_FAILED, + "lstat on gfid %s failed", gfid_str); + op_ret = -1; + *op_errno = errno; + goto out; } - - if (fd_based_fop) { - size = sys_fgetxattr (_fd, key, value, size); - } else if (real_path_arg) { - size = sys_lgetxattr (real_path_arg, key, value, size); - } else if (real_path) { - size = sys_lgetxattr (real_path, key, value, size); - } - if (size == -1) { - op_ret = -1; - *op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_XATTR_FAILED, "getxattr failed on " - " on %s gfid: %s key: %s ", - real_path ? real_path : (real_path_arg ? real_path_arg : "null"), - uuid_utoa(inode->gfid), key); - goto out; + } + + if (fd_based_fop) { + size = sys_fgetxattr(_fd, key, NULL, 0); + } else if (real_path_arg) { + size = sys_lgetxattr(real_path_arg, key, NULL, 0); + } else if (real_path) { + size = sys_lgetxattr(real_path, key, NULL, 0); + } + + if (size == -1) { + *op_errno = errno; + if ((*op_errno == ENOTSUP) || (*op_errno == ENOSYS)) { + GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name, + GF_LOG_WARNING, + "Extended attributes not " + "supported (try remounting" + " brick with 'user_xattr' " + "flag)"); + } else if (*op_errno == ENOATTR || *op_errno == ENODATA) { + gf_msg_debug(this->name, 0, + "No such attribute:%s for file %s " + "gfid: %s", + key, + real_path ? real_path + : (real_path_arg ? real_path_arg : "null"), + uuid_utoa(inode->gfid)); + } else { + gf_msg(this->name, GF_LOG_DEBUG, *op_errno, P_MSG_XATTR_FAILED, + "getxattr failed" + " on %s gfid: %s key: %s ", + real_path ? real_path + : (real_path_arg ? real_path_arg : "null"), + uuid_utoa(inode->gfid), key); } - - posix_mdata_from_disk (metadata, (posix_mdata_disk_t*)value); - - op_ret = 0; + op_ret = -1; + goto out; + } + + value = GF_CALLOC(size + 1, sizeof(char), gf_posix_mt_char); + if (!value) { + op_ret = -1; + *op_errno = ENOMEM; + goto out; + } + + if (fd_based_fop) { + size = sys_fgetxattr(_fd, key, value, size); + } else if (real_path_arg) { + size = sys_lgetxattr(real_path_arg, key, value, size); + } else if (real_path) { + size = sys_lgetxattr(real_path, key, value, size); + } + if (size == -1) { + op_ret = -1; + *op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "getxattr failed on " + " on %s gfid: %s key: %s ", + real_path ? real_path : (real_path_arg ? real_path_arg : "null"), + uuid_utoa(inode->gfid), key); + goto out; + } + + posix_mdata_from_disk(metadata, (posix_mdata_disk_t *)value); + + op_ret = 0; out: - GF_FREE (value); - return op_ret; + GF_FREE(value); + return op_ret; } /* posix_store_mdata_xattr stores the posix_mdata_t on disk */ static int -posix_store_mdata_xattr (xlator_t *this, const char *real_path_arg, int fd, - inode_t *inode, posix_mdata_t *metadata) +posix_store_mdata_xattr(xlator_t *this, const char *real_path_arg, int fd, + inode_t *inode, posix_mdata_t *metadata) { - char *real_path = NULL; - int op_ret = 0; - gf_boolean_t fd_based_fop = _gf_false; - char *key = GF_XATTR_MDATA_KEY; - char gfid_str[64] = {0}; - posix_mdata_disk_t disk_metadata; - - if (!metadata) { - op_ret = -1; - goto out; - } - - if (fd != -1) { - fd_based_fop = _gf_true; - } - if (!(fd_based_fop || real_path_arg)) { - MAKE_HANDLE_PATH (real_path, this, inode->gfid, NULL); - if (!real_path) { - uuid_utoa_r (inode->gfid, gfid_str); - gf_msg (this->name, GF_LOG_DEBUG, errno, - P_MSG_LSTAT_FAILED, "lstat on gfid %s failed", - gfid_str); - op_ret = -1; - goto out; - } - } - - /* Set default version as 1 */ - posix_mdata_to_disk (&disk_metadata, metadata); - - if (fd_based_fop) { - op_ret = sys_fsetxattr (fd, key, - (void *) &disk_metadata, - sizeof (posix_mdata_disk_t), 0); - } else if (real_path_arg) { - op_ret = sys_lsetxattr (real_path_arg, key, - (void *) &disk_metadata, - sizeof (posix_mdata_disk_t), 0); - } else if (real_path) { - op_ret = sys_lsetxattr (real_path, key, - (void *) &disk_metadata, - sizeof (posix_mdata_disk_t), 0); + char *real_path = NULL; + int op_ret = 0; + gf_boolean_t fd_based_fop = _gf_false; + char *key = GF_XATTR_MDATA_KEY; + char gfid_str[64] = {0}; + posix_mdata_disk_t disk_metadata; + + if (!metadata) { + op_ret = -1; + goto out; + } + + if (fd != -1) { + fd_based_fop = _gf_true; + } + if (!(fd_based_fop || real_path_arg)) { + MAKE_HANDLE_PATH(real_path, this, inode->gfid, NULL); + if (!real_path) { + uuid_utoa_r(inode->gfid, gfid_str); + gf_msg(this->name, GF_LOG_DEBUG, errno, P_MSG_LSTAT_FAILED, + "lstat on gfid %s failed", gfid_str); + op_ret = -1; + goto out; } + } + + /* Set default version as 1 */ + posix_mdata_to_disk(&disk_metadata, metadata); + + if (fd_based_fop) { + op_ret = sys_fsetxattr(fd, key, (void *)&disk_metadata, + sizeof(posix_mdata_disk_t), 0); + } else if (real_path_arg) { + op_ret = sys_lsetxattr(real_path_arg, key, (void *)&disk_metadata, + sizeof(posix_mdata_disk_t), 0); + } else if (real_path) { + op_ret = sys_lsetxattr(real_path, key, (void *)&disk_metadata, + sizeof(posix_mdata_disk_t), 0); + } #ifdef GF_DARWIN_HOST_OS - if (real_path_arg) { - posix_dump_buffer(this, real_path_arg, key, value, 0); - } else if (real_path) { - posix_dump_buffer(this, real_path, key, value, 0); - } + if (real_path_arg) { + posix_dump_buffer(this, real_path_arg, key, value, 0); + } else if (real_path) { + posix_dump_buffer(this, real_path, key, value, 0); + } #endif out: - if (op_ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, - "file: %s: gfid: %s key:%s ", - real_path ? real_path : (real_path_arg ? real_path_arg : "null"), - uuid_utoa(inode->gfid), key); - } - return op_ret; + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "file: %s: gfid: %s key:%s ", + real_path ? real_path : (real_path_arg ? real_path_arg : "null"), + uuid_utoa(inode->gfid), key); + } + return op_ret; } /* _posix_get_mdata_xattr gets posix_mdata_t from inode context. If it fails * to get it from inode context, gets it from disk. This is with out inode lock. */ int -__posix_get_mdata_xattr (xlator_t *this, const char *real_path, int _fd, - inode_t *inode, struct iatt *stbuf) +__posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd, + inode_t *inode, struct iatt *stbuf) { - posix_mdata_t *mdata = NULL; - int ret = -1; - int op_errno = 0; + posix_mdata_t *mdata = NULL; + int ret = -1; + int op_errno = 0; + + GF_VALIDATE_OR_GOTO(this->name, inode, out); + + ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata); + if (ret == -1 || !mdata) { + mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr); + if (!mdata) { + ret = -1; + goto out; + } - GF_VALIDATE_OR_GOTO (this->name, inode, out); + ret = posix_fetch_mdata_xattr(this, real_path, _fd, inode, mdata, + &op_errno); - ret = __inode_ctx_get1 (inode, this, - (uint64_t *)&mdata); - if (ret == -1 || !mdata) { - mdata = GF_CALLOC (1, sizeof (posix_mdata_t), - gf_posix_mt_mdata_attr); - if (!mdata) { - ret = -1; - goto out; - } - - ret = posix_fetch_mdata_xattr (this, real_path, _fd, inode, - mdata, &op_errno); - - if (ret == 0) { - /* Got mdata from disk, set it in inode ctx. This case - * is hit when in-memory status is lost due to brick - * down scenario - */ - __inode_ctx_set1 (inode, this, (uint64_t *)&mdata); - } else { - /* Failed to get mdata from disk, xattr missing. - * This happens on two cases. - * 1. File is created before ctime is enabled. - * 2. On new file creation. - * - * Do nothing, just return success. It is as - * good as ctime feature is not enabled for this - * file. For files created before ctime is enabled, - * time attributes gets updated into ctime structure - * once the metadata modification fop happens and - * time attributes become consistent eventually. - * For new files, it would obviously get updated - * before the fop completion. - */ - if (stbuf && op_errno != ENOENT) { - ret = 0; - goto out; - } else { - /* This case should not be hit. If it hits, - * don't fail, log warning, free mdata and move - * on - */ - gf_msg (this->name, GF_LOG_WARNING, op_errno, - P_MSG_FETCHMDATA_FAILED, - "file: %s: gfid: %s key:%s ", - real_path ? real_path : "null", - uuid_utoa(inode->gfid), - GF_XATTR_MDATA_KEY); - GF_FREE (mdata); - ret = 0; - goto out; - } - } + if (ret == 0) { + /* Got mdata from disk, set it in inode ctx. This case + * is hit when in-memory status is lost due to brick + * down scenario + */ + __inode_ctx_set1(inode, this, (uint64_t *)&mdata); + } else { + /* Failed to get mdata from disk, xattr missing. + * This happens on two cases. + * 1. File is created before ctime is enabled. + * 2. On new file creation. + * + * Do nothing, just return success. It is as + * good as ctime feature is not enabled for this + * file. For files created before ctime is enabled, + * time attributes gets updated into ctime structure + * once the metadata modification fop happens and + * time attributes become consistent eventually. + * For new files, it would obviously get updated + * before the fop completion. + */ + if (stbuf && op_errno != ENOENT) { + ret = 0; + goto out; + } else { + /* This case should not be hit. If it hits, + * don't fail, log warning, free mdata and move + * on + */ + gf_msg(this->name, GF_LOG_WARNING, op_errno, + P_MSG_FETCHMDATA_FAILED, "file: %s: gfid: %s key:%s ", + real_path ? real_path : "null", uuid_utoa(inode->gfid), + GF_XATTR_MDATA_KEY); + GF_FREE(mdata); + ret = 0; + goto out; + } } + } - ret = 0; + ret = 0; - if (ret == 0 && stbuf) { - stbuf->ia_ctime = mdata->ctime.tv_sec; - stbuf->ia_ctime_nsec = mdata->ctime.tv_nsec; - stbuf->ia_mtime = mdata->mtime.tv_sec; - stbuf->ia_mtime_nsec = mdata->mtime.tv_nsec; - stbuf->ia_atime = mdata->atime.tv_sec; - stbuf->ia_atime_nsec = mdata->atime.tv_nsec; - } + if (ret == 0 && stbuf) { + stbuf->ia_ctime = mdata->ctime.tv_sec; + stbuf->ia_ctime_nsec = mdata->ctime.tv_nsec; + stbuf->ia_mtime = mdata->mtime.tv_sec; + stbuf->ia_mtime_nsec = mdata->mtime.tv_nsec; + stbuf->ia_atime = mdata->atime.tv_sec; + stbuf->ia_atime_nsec = mdata->atime.tv_nsec; + } out: - return ret; + return ret; } /* posix_get_mdata_xattr gets posix_mdata_t from inode context. If it fails * to get it from inode context, gets it from disk. This is with inode lock. */ int -posix_get_mdata_xattr (xlator_t *this, const char *real_path, int _fd, - inode_t *inode, struct iatt *stbuf) +posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd, + inode_t *inode, struct iatt *stbuf) { - int ret = -1; + int ret = -1; - GF_VALIDATE_OR_GOTO (this->name, inode, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); - LOCK (&inode->lock); - { - ret = __posix_get_mdata_xattr (this, real_path, _fd, inode, stbuf); - } - UNLOCK (&inode->lock); + LOCK(&inode->lock); + { + ret = __posix_get_mdata_xattr(this, real_path, _fd, inode, stbuf); + } + UNLOCK(&inode->lock); out: - return ret; + return ret; } static int -posix_compare_timespec (struct timespec *first, struct timespec *second) +posix_compare_timespec(struct timespec *first, struct timespec *second) { - if (first->tv_sec == second->tv_sec) - return first->tv_nsec - second->tv_nsec; - else - return first->tv_sec - second->tv_sec; + if (first->tv_sec == second->tv_sec) + return first->tv_nsec - second->tv_nsec; + else + return first->tv_sec - second->tv_sec; } - /* posix_set_mdata_xattr updates the posix_mdata_t based on the flag * in inode context and stores it on disk */ static int -posix_set_mdata_xattr (xlator_t *this, const char *real_path, int fd, - inode_t *inode, struct timespec *time, - struct iatt *stbuf, posix_mdata_flag_t *flag, - gf_boolean_t update_utime) +posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd, + inode_t *inode, struct timespec *time, struct iatt *stbuf, + posix_mdata_flag_t *flag, gf_boolean_t update_utime) { - posix_mdata_t *mdata = NULL; - int ret = -1; - int op_errno = 0; - - GF_VALIDATE_OR_GOTO ("posix", this, out); - GF_VALIDATE_OR_GOTO (this->name, inode, out); - - LOCK (&inode->lock); - { - ret = __inode_ctx_get1 (inode, this, - (uint64_t *)&mdata); - if (ret == -1 || !mdata) { - /* - * Do we need to fetch the data from xattr - * If we does we can compare the value and store - * the largest data in inode ctx. - */ - mdata = GF_CALLOC (1, sizeof (posix_mdata_t), - gf_posix_mt_mdata_attr); - if (!mdata) { - ret = -1; - goto unlock; - } - - ret = posix_fetch_mdata_xattr (this, real_path, fd, - inode, - (void *)mdata, &op_errno); - if (ret == 0) { - /* Got mdata from disk, set it in inode ctx. This case - * is hit when in-memory status is lost due to brick - * down scenario - */ - __inode_ctx_set1 (inode, this, - (uint64_t *)&mdata); - } else if (ret && time) { - /* - * This is the first time creating the time - * attr. This happens when you activate this - * feature, and the legacy file will not have - * any xattr set. - * - * New files will create extended attributes. - */ - - /* - * TODO: This is wrong approach, because before - * creating fresh xattr, we should consult - * to all replica and/or distribution set. - * - * We should contact the time management - * xlators, and ask them to create an xattr. - */ - /* We should not be relying on backend file's - * time attributes to load the initial ctime - * time attribute structure. This is incorrect - * as each replica set would have witnessed the - * file creation at different times. - * - * For new file creation, ctime, atime and mtime - * should be same, hence initiate the ctime - * structure with the time from the frame. But - * for the files which were created before ctime - * feature is enabled, this is not accurate but - * still fine as the times would get eventually - * accurate. - */ - mdata->version = 1; - mdata->flags = 0; - mdata->ctime.tv_sec = time->tv_sec; - mdata->ctime.tv_nsec = time->tv_nsec; - mdata->atime.tv_sec = time->tv_sec; - mdata->atime.tv_nsec = time->tv_nsec; - mdata->mtime.tv_sec = time->tv_sec; - mdata->mtime.tv_nsec = time->tv_nsec; - - __inode_ctx_set1 (inode, this, - (uint64_t *)&mdata); - } - } - - /* Earlier, mdata was updated only if the existing time is less - * than the time to be updated. This would fail the scenarios - * where mtime can be set to any time using the syscall. Hence - * just updating without comparison. But the ctime is not - * allowed to changed to older date. - */ + posix_mdata_t *mdata = NULL; + int ret = -1; + int op_errno = 0; + + GF_VALIDATE_OR_GOTO("posix", this, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); - if (flag->ctime && - posix_compare_timespec (time, &mdata->ctime) > 0) { - mdata->ctime = *time; - } - - /* In distributed systems, there could be races with fops - * updating mtime/atime which could result in different - * mtime/atime for same file. So this makes sure, only the - * highest time is retained. If the mtime/atime update comes - * from the explicit utime syscall, it is allowed to set to - * previous time + LOCK(&inode->lock); + { + ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata); + if (ret == -1 || !mdata) { + /* + * Do we need to fetch the data from xattr + * If we does we can compare the value and store + * the largest data in inode ctx. + */ + mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr); + if (!mdata) { + ret = -1; + goto unlock; + } + + ret = posix_fetch_mdata_xattr(this, real_path, fd, inode, + (void *)mdata, &op_errno); + if (ret == 0) { + /* Got mdata from disk, set it in inode ctx. This case + * is hit when in-memory status is lost due to brick + * down scenario */ - if (update_utime) { - if (flag->mtime) { - mdata->mtime = *time; - } - if (flag->atime) { - mdata->atime = *time; - } - } else { - if (flag->mtime && - posix_compare_timespec (time, &mdata->mtime) > 0) { - mdata->mtime = *time; - } - if (flag->atime && - posix_compare_timespec (time, &mdata->atime) > 0) { - mdata->atime = *time; - } - } - - if (inode->ia_type == IA_INVAL) { - /* - * TODO: This is non-linked inode. So we have to sync the - * data into backend. Because inode_link may return - * a different inode. - */ - /* ret = posix_store_mdata_xattr (this, loc, fd, - * mdata); */ - } + __inode_ctx_set1(inode, this, (uint64_t *)&mdata); + } else if (ret && time) { /* - * With this patch set, we are setting the xattr for each update - * We should evaluate the performance, and based on that we can - * decide on asynchronous updation. + * This is the first time creating the time + * attr. This happens when you activate this + * feature, and the legacy file will not have + * any xattr set. + * + * New files will create extended attributes. */ - ret = posix_store_mdata_xattr (this, real_path, fd, inode, - mdata); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_STOREMDATA_FAILED, - "file: %s: gfid: %s key:%s ", - real_path ? real_path : "null", - uuid_utoa(inode->gfid), GF_XATTR_MDATA_KEY); - goto unlock; - } + + /* + * TODO: This is wrong approach, because before + * creating fresh xattr, we should consult + * to all replica and/or distribution set. + * + * We should contact the time management + * xlators, and ask them to create an xattr. + */ + /* We should not be relying on backend file's + * time attributes to load the initial ctime + * time attribute structure. This is incorrect + * as each replica set would have witnessed the + * file creation at different times. + * + * For new file creation, ctime, atime and mtime + * should be same, hence initiate the ctime + * structure with the time from the frame. But + * for the files which were created before ctime + * feature is enabled, this is not accurate but + * still fine as the times would get eventually + * accurate. + */ + mdata->version = 1; + mdata->flags = 0; + mdata->ctime.tv_sec = time->tv_sec; + mdata->ctime.tv_nsec = time->tv_nsec; + mdata->atime.tv_sec = time->tv_sec; + mdata->atime.tv_nsec = time->tv_nsec; + mdata->mtime.tv_sec = time->tv_sec; + mdata->mtime.tv_nsec = time->tv_nsec; + + __inode_ctx_set1(inode, this, (uint64_t *)&mdata); + } } -unlock: - UNLOCK (&inode->lock); -out: - if (ret == 0 && stbuf) { - stbuf->ia_ctime = mdata->ctime.tv_sec; - stbuf->ia_ctime_nsec = mdata->ctime.tv_nsec; - stbuf->ia_mtime = mdata->mtime.tv_sec; - stbuf->ia_mtime_nsec = mdata->mtime.tv_nsec; - stbuf->ia_atime = mdata->atime.tv_sec; - stbuf->ia_atime_nsec = mdata->atime.tv_nsec; + + /* Earlier, mdata was updated only if the existing time is less + * than the time to be updated. This would fail the scenarios + * where mtime can be set to any time using the syscall. Hence + * just updating without comparison. But the ctime is not + * allowed to changed to older date. + */ + + if (flag->ctime && posix_compare_timespec(time, &mdata->ctime) > 0) { + mdata->ctime = *time; + } + + /* In distributed systems, there could be races with fops + * updating mtime/atime which could result in different + * mtime/atime for same file. So this makes sure, only the + * highest time is retained. If the mtime/atime update comes + * from the explicit utime syscall, it is allowed to set to + * previous time + */ + if (update_utime) { + if (flag->mtime) { + mdata->mtime = *time; + } + if (flag->atime) { + mdata->atime = *time; + } + } else { + if (flag->mtime && + posix_compare_timespec(time, &mdata->mtime) > 0) { + mdata->mtime = *time; + } + if (flag->atime && + posix_compare_timespec(time, &mdata->atime) > 0) { + mdata->atime = *time; + } } - return ret; + if (inode->ia_type == IA_INVAL) { + /* + * TODO: This is non-linked inode. So we have to sync the + * data into backend. Because inode_link may return + * a different inode. + */ + /* ret = posix_store_mdata_xattr (this, loc, fd, + * mdata); */ + } + /* + * With this patch set, we are setting the xattr for each update + * We should evaluate the performance, and based on that we can + * decide on asynchronous updation. + */ + ret = posix_store_mdata_xattr(this, real_path, fd, inode, mdata); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STOREMDATA_FAILED, + "file: %s: gfid: %s key:%s ", real_path ? real_path : "null", + uuid_utoa(inode->gfid), GF_XATTR_MDATA_KEY); + goto unlock; + } + } +unlock: + UNLOCK(&inode->lock); +out: + if (ret == 0 && stbuf) { + stbuf->ia_ctime = mdata->ctime.tv_sec; + stbuf->ia_ctime_nsec = mdata->ctime.tv_nsec; + stbuf->ia_mtime = mdata->mtime.tv_sec; + stbuf->ia_mtime_nsec = mdata->mtime.tv_nsec; + stbuf->ia_atime = mdata->atime.tv_sec; + stbuf->ia_atime_nsec = mdata->atime.tv_nsec; + } + + return ret; } /* posix_update_utime_in_mdata updates the posix_mdata_t when mtime/atime * is modified using syscall */ void -posix_update_utime_in_mdata (xlator_t *this, const char *real_path, int fd, - inode_t *inode, - struct iatt *stbuf, int valid) +posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd, + inode_t *inode, struct iatt *stbuf, int valid) { - int32_t ret = 0; + int32_t ret = 0; #if defined(HAVE_UTIMENSAT) - struct timespec tv = {0, }; + struct timespec tv = { + 0, + }; #else - struct timeval tv = {0, }; + struct timeval tv = { + 0, + }; #endif - posix_mdata_flag_t flag = {0, }; - - struct posix_private *priv = NULL; - - priv = this->private; - - if (inode && priv->ctime) { - if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) { - tv.tv_sec = stbuf->ia_atime; - SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, stbuf->ia_atime_nsec); - - flag.ctime = 0; - flag.mtime = 0; - flag.atime = 1; - ret = posix_set_mdata_xattr (this, real_path, -1, inode, &tv, NULL, - &flag, _gf_true); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, errno, - P_MSG_SETMDATA_FAILED, - "posix set mdata atime failed on file:" - " %s gfid:%s", - real_path, uuid_utoa (inode->gfid)); - } - } - - if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) { - tv.tv_sec = stbuf->ia_mtime; - SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, stbuf->ia_mtime_nsec); - - flag.ctime = 1; - flag.mtime = 1; - flag.atime = 0; - - ret = posix_set_mdata_xattr (this, real_path, -1, inode, &tv, NULL, - &flag, _gf_true); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, errno, - P_MSG_SETMDATA_FAILED, - "posix set mdata mtime failed on file:" - " %s gfid:%s", - real_path, uuid_utoa (inode->gfid)); - } - } + posix_mdata_flag_t flag = { + 0, + }; + + struct posix_private *priv = NULL; + + priv = this->private; + + if (inode && priv->ctime) { + if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) { + tv.tv_sec = stbuf->ia_atime; + SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, stbuf->ia_atime_nsec); + + flag.ctime = 0; + flag.mtime = 0; + flag.atime = 1; + ret = posix_set_mdata_xattr(this, real_path, -1, inode, &tv, NULL, + &flag, _gf_true); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata atime failed on file:" + " %s gfid:%s", + real_path, uuid_utoa(inode->gfid)); + } + } - } else { - gf_msg (this->name, GF_LOG_WARNING, errno, - P_MSG_SETMDATA_FAILED, - "posix utime set mdata failed on file"); + if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) { + tv.tv_sec = stbuf->ia_mtime; + SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, stbuf->ia_mtime_nsec); + + flag.ctime = 1; + flag.mtime = 1; + flag.atime = 0; + + ret = posix_set_mdata_xattr(this, real_path, -1, inode, &tv, NULL, + &flag, _gf_true); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata mtime failed on file:" + " %s gfid:%s", + real_path, uuid_utoa(inode->gfid)); + } } - return; + + } else { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix utime set mdata failed on file"); + } + return; } static void -posix_get_mdata_flag (uint64_t flags, posix_mdata_flag_t *flag) +posix_get_mdata_flag(uint64_t flags, posix_mdata_flag_t *flag) { - if (!flag) - return; - - flag->ctime = 0; - flag->atime = 0; - flag->mtime = 0; - - if (flags & MDATA_CTIME) - flag->ctime = 1; - if (flags & MDATA_MTIME) - flag->mtime = 1; - if (flags & MDATA_ATIME) - flag->atime = 1; + if (!flag) + return; + + flag->ctime = 0; + flag->atime = 0; + flag->mtime = 0; + + if (flags & MDATA_CTIME) + flag->ctime = 1; + if (flags & MDATA_MTIME) + flag->mtime = 1; + if (flags & MDATA_ATIME) + flag->atime = 1; } static void -posix_get_parent_mdata_flag (uint64_t flags, posix_mdata_flag_t *flag) +posix_get_parent_mdata_flag(uint64_t flags, posix_mdata_flag_t *flag) { - if (!flag) - return; - - flag->ctime = 0; - flag->atime = 0; - flag->mtime = 0; - - if (flags & MDATA_PAR_CTIME) - flag->ctime = 1; - if (flags & MDATA_PAR_MTIME) - flag->mtime = 1; - if (flags & MDATA_PAR_ATIME) - flag->atime = 1; + if (!flag) + return; + + flag->ctime = 0; + flag->atime = 0; + flag->mtime = 0; + + if (flags & MDATA_PAR_CTIME) + flag->ctime = 1; + if (flags & MDATA_PAR_MTIME) + flag->mtime = 1; + if (flags & MDATA_PAR_ATIME) + flag->atime = 1; } void -posix_set_ctime (call_frame_t *frame, xlator_t *this, const char* real_path, - int fd, inode_t *inode, struct iatt *stbuf) +posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path, + int fd, inode_t *inode, struct iatt *stbuf) { - posix_mdata_flag_t flag = {0,}; - int ret = 0; - struct posix_private *priv = NULL; - - priv = this->private; - - if (priv->ctime) { - (void) posix_get_mdata_flag (frame->root->flags, &flag); - if (frame->root->ctime.tv_sec == 0) { - gf_msg (this->name, GF_LOG_WARNING, errno, - P_MSG_SETMDATA_FAILED, - "posix set mdata failed, No ctime : %s gfid:%s", - real_path, - inode ? uuid_utoa (inode->gfid) : "No inode"); - goto out; - } - - ret = posix_set_mdata_xattr (this, real_path, fd, inode, - &frame->root->ctime, stbuf, &flag, - _gf_false); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, errno, - P_MSG_SETMDATA_FAILED, - "posix set mdata failed on file: %s gfid:%s", - real_path, - inode ? uuid_utoa (inode->gfid) : "No inode"); - } + posix_mdata_flag_t flag = { + 0, + }; + int ret = 0; + struct posix_private *priv = NULL; + + priv = this->private; + + if (priv->ctime) { + (void)posix_get_mdata_flag(frame->root->flags, &flag); + if (frame->root->ctime.tv_sec == 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata failed, No ctime : %s gfid:%s", real_path, + inode ? uuid_utoa(inode->gfid) : "No inode"); + goto out; } - out: - return; + + ret = posix_set_mdata_xattr(this, real_path, fd, inode, + &frame->root->ctime, stbuf, &flag, + _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata failed on file: %s gfid:%s", real_path, + inode ? uuid_utoa(inode->gfid) : "No inode"); + } + } +out: + return; } void -posix_set_parent_ctime (call_frame_t *frame, xlator_t *this, - const char* real_path, int fd, inode_t *inode, - struct iatt *stbuf) +posix_set_parent_ctime(call_frame_t *frame, xlator_t *this, + const char *real_path, int fd, inode_t *inode, + struct iatt *stbuf) { - posix_mdata_flag_t flag = {0,}; - int ret = 0; - struct posix_private *priv = NULL; - - priv = this->private; - - if (inode && priv->ctime) { - (void) posix_get_parent_mdata_flag (frame->root->flags, &flag); - ret = posix_set_mdata_xattr (this, real_path, fd, inode, - &frame->root->ctime, stbuf, &flag, - _gf_false); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, errno, - P_MSG_SETMDATA_FAILED, - "posix set mdata failed on file: %s gfid:%s", - real_path, uuid_utoa (inode->gfid)); - } + posix_mdata_flag_t flag = { + 0, + }; + int ret = 0; + struct posix_private *priv = NULL; + + priv = this->private; + + if (inode && priv->ctime) { + (void)posix_get_parent_mdata_flag(frame->root->flags, &flag); + ret = posix_set_mdata_xattr(this, real_path, fd, inode, + &frame->root->ctime, stbuf, &flag, + _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata failed on file: %s gfid:%s", real_path, + uuid_utoa(inode->gfid)); } - return; + } + return; } |