diff options
Diffstat (limited to 'xlators/features/bit-rot')
-rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot.c | 150 | ||||
-rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot.h | 3 | ||||
-rw-r--r-- | xlators/features/bit-rot/src/stub/bit-rot-common.h | 5 | ||||
-rw-r--r-- | xlators/features/bit-rot/src/stub/bit-rot-stub.c | 700 | ||||
-rw-r--r-- | xlators/features/bit-rot/src/stub/bit-rot-stub.h | 3 |
5 files changed, 435 insertions, 426 deletions
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c index b9adbd6647c..2652f02b4ea 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot.c @@ -485,94 +485,72 @@ br_log_object_path (xlator_t *this, char *op, } static void -br_send_dummy_write (xlator_t *this, fd_t *fd, br_child_t *child, - dict_t *xdata) +br_trigger_sign (xlator_t *this, br_child_t *child, + inode_t *linked_inode, loc_t *loc, gf_boolean_t need_reopen) { - struct iovec iov = {0, }; - struct iobref *iobref = NULL; - struct iobuf *iobuf = NULL; - char *msg = NULL; - size_t size = 0; - int32_t ret = -1; - - GF_VALIDATE_OR_GOTO ("bit-rot", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - GF_VALIDATE_OR_GOTO (this->name, child, out); - - msg = gf_strdup ("GLUSTERFS"); - if (!msg) - goto out; + fd_t *fd = NULL; + int32_t ret = -1; + uint32_t val = 0; + dict_t *dict = NULL; + pid_t pid = GF_CLIENT_PID_BITD; - size = strlen (msg); + syncopctx_setfspid (&pid); - iov.iov_base = msg; - iov.iov_len = size; + val = (need_reopen == _gf_true) ? BR_OBJECT_REOPEN : BR_OBJECT_RESIGN; - iobref = iobref_new (); - if (!iobref) - goto free_msg; + dict = dict_new (); + if (!dict) + goto out; - iobuf = iobuf_get2 (this->ctx->iobuf_pool, size); - if (!iobuf) - goto free_iobref; + ret = dict_set_uint32 (dict, BR_REOPEN_SIGN_HINT_KEY, val); + if (ret) + goto cleanup_dict; - iobref_add (iobref, iobuf); + ret = -1; + fd = fd_create (linked_inode, 0); + if (!fd) { + gf_log (this->name, GF_LOG_ERROR, "Failed to create fd " + "[GFID %s]", uuid_utoa (linked_inode->gfid)); + goto cleanup_dict; + } - iov_unload (iobuf_ptr (iobuf), &iov, 1); /* FIXME!!! */ + ret = syncop_open (child->xl, loc, O_RDWR, fd, NULL, NULL); + if (ret) { + br_log_object (this, "open", linked_inode->gfid, -ret); + goto unref_fd; + } - iov.iov_base = iobuf_ptr (iobuf); - iov.iov_len = size; + fd_bind (fd); - ret = syncop_writev (child->xl, fd, &iov, 1, 0, iobref, 0, xdata, NULL); - if (ret <= 0) { - ret = -1; - gf_log (this->name, GF_LOG_ERROR, - "dummy write failed (%s)", strerror (errno)); - goto free_iobuf; - } + ret = syncop_fsetxattr (child->xl, fd, dict, 0, NULL, NULL); + if (ret) + br_log_object (this, "fsetxattr", linked_inode->gfid, -ret); - /* iobref_unbref() takes care of iobuf unref */ - ret = 0; + /* passthough: fd_unref() */ - free_iobuf: - iobuf_unref (iobuf); - free_iobref: - iobref_unref (iobref); - free_msg: - GF_FREE (msg); + unref_fd: + fd_unref (fd); + cleanup_dict: + dict_unref (dict); out: - return; + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Could not trigger signingd for %s (reopen hint: %d)", + uuid_utoa (linked_inode->gfid), val); + } } static void -br_object_handle_reopen (xlator_t *this, - br_object_t *object, inode_t *linked_inode) +br_object_resign (xlator_t *this, + br_object_t *object, inode_t *linked_inode) { - int32_t ret = -1; - dict_t *dict = NULL; - loc_t loc = {0, }; - - /** - * Here dict is purposefully not checked for NULL, because at any cost - * sending a re-open should not be missed. This re-open is an indication - * for the stub to properly mark inode's status. - */ - dict = dict_new (); - if (dict) { - /* TODO: Make it a #define */ - ret = dict_set_int32 (dict, "br-fd-reopen", 1); - if (ret) - gf_log (this->name, GF_LOG_WARNING, - "Object reopen would trigger versioning."); - } + loc_t loc = {0, }; loc.inode = inode_ref (linked_inode); gf_uuid_copy (loc.gfid, linked_inode->gfid); - br_trigger_sign (this, object->child, linked_inode, &loc, dict); + br_trigger_sign (this, object->child, linked_inode, &loc, _gf_false); - if (dict) - dict_unref (dict); loc_wipe (&loc); } @@ -618,7 +596,7 @@ static inline int32_t br_sign_object (br_object_t *object) * actual signing of the object. */ if (sign_info == BR_SIGN_REOPEN_WAIT) { - br_object_handle_reopen (this, object, linked_inode); + br_object_resign (this, object, linked_inode); goto unref_inode; } @@ -903,41 +881,7 @@ out: return need_sign; } -void -br_trigger_sign (xlator_t *this, br_child_t *child, inode_t *linked_inode, - loc_t *loc, dict_t *xdata) -{ - fd_t *fd = NULL; - int32_t ret = -1; - pid_t pid = GF_CLIENT_PID_BITD; - - syncopctx_setfspid (&pid); - - fd = fd_create (linked_inode, 0); - if (!fd) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to create fd [GFID %s]", - uuid_utoa (linked_inode->gfid)); - goto out; - } - - ret = syncop_open (child->xl, loc, O_RDWR, fd, NULL, NULL); - if (ret) { - br_log_object (this, "open", linked_inode->gfid, -ret); - fd_unref (fd); - fd = NULL; - } else { - fd_bind (fd); - } - - if (fd) { - br_send_dummy_write (this, fd, child, xdata); - syncop_close (fd); - } -out: - return; -} int32_t br_prepare_loc (xlator_t *this, br_child_t *child, loc_t *parent, @@ -1076,7 +1020,7 @@ bitd_oneshot_crawl (xlator_t *subvol, gf_log (this->name, GF_LOG_INFO, "Triggering signing for %s [GFID: %s | Brick: %s]", loc.path, uuid_utoa (linked_inode->gfid), child->brick_path); - br_trigger_sign (this, child, linked_inode, &loc, NULL); + br_trigger_sign (this, child, linked_inode, &loc, _gf_true); ret = 0; diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h index 1705f715f0c..6543be763d6 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot.h @@ -182,8 +182,5 @@ br_prepare_loc (xlator_t *, br_child_t *, loc_t *, gf_dirent_t *, loc_t *); gf_boolean_t bitd_is_bad_file (xlator_t *, br_child_t *, loc_t *, fd_t *); -void -br_trigger_sign (xlator_t *this, br_child_t *child, inode_t *linked_inode, - loc_t *loc, dict_t *xdata); #endif /* __BIT_ROT_H__ */ diff --git a/xlators/features/bit-rot/src/stub/bit-rot-common.h b/xlators/features/bit-rot/src/stub/bit-rot-common.h index 7fd584e5970..a8285d2b560 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-common.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-common.h @@ -119,6 +119,11 @@ typedef enum { /* BitRot stub start time (virtual xattr) */ #define GLUSTERFS_GET_BR_STUB_INIT_TIME "trusted.glusterfs.bit-rot.stub-init" +/* signing/reopen hint */ +#define BR_OBJECT_RESIGN 0 +#define BR_OBJECT_REOPEN 1 +#define BR_REOPEN_SIGN_HINT_KEY "trusted.glusterfs.bit-rot.reopen-hint" + static inline int br_is_signature_type_valid (int8_t signaturetype) { diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c index 93db072f671..524c235b549 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c @@ -255,7 +255,7 @@ br_stub_fill_local (br_stub_local_t *local, local->fopstub = stub; local->versioningtype = versioningtype; local->u.context.version = memversion; - if (fd && !local->u.context.fd) + if (fd) local->u.context.fd = fd_ref (fd); if (inode) local->u.context.inode = inode_ref (inode); @@ -279,6 +279,126 @@ br_stub_cleanup_local (br_stub_local_t *local) memset (local->u.context.gfid, '\0', sizeof (uuid_t)); } +static int +br_stub_need_versioning (xlator_t *this, + fd_t *fd, gf_boolean_t *versioning, + gf_boolean_t *modified, br_stub_inode_ctx_t **ctx) +{ + int32_t ret = -1; + uint64_t ctx_addr = 0; + br_stub_inode_ctx_t *c = NULL; + + *versioning = _gf_false; + *modified = _gf_false; + + ret = br_stub_get_inode_ctx (this, fd->inode, &ctx_addr); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "failed to get the inode " + "context for the inode %s", + uuid_utoa (fd->inode->gfid)); + goto error_return; + } + + c = (br_stub_inode_ctx_t *) (long) ctx_addr; + + LOCK (&fd->inode->lock); + { + if (__br_stub_is_inode_dirty (c)) + *versioning = _gf_true; + if (__br_stub_is_inode_modified (c)) + *modified = _gf_true; + } + UNLOCK (&fd->inode->lock); + + if (ctx) + *ctx = c; + return 0; + + error_return: + return -1; +} + +static int32_t +br_stub_anon_fd_ctx (xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx) +{ + int32_t ret = -1; + br_stub_fd_t *br_stub_fd = NULL; + + br_stub_fd = br_stub_fd_ctx_get (this, fd); + if (!br_stub_fd) { + ret = br_stub_add_fd_to_inode (this, fd, ctx); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "add fd to the inode (gfid: %s)", + uuid_utoa (fd->inode->gfid)); + goto out; + } + } + + ret = 0; + +out: + return ret; +} + +static int +br_stub_versioning_prep (call_frame_t *frame, + xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx) +{ + int32_t ret = -1; + br_stub_local_t *local = NULL; + + local = br_stub_alloc_local (this); + if (!local) { + gf_log (this->name, GF_LOG_ERROR, "local allocation failed " + "(gfid: %s)", uuid_utoa (fd->inode->gfid)); + goto error_return; + } + + if (fd_is_anonymous (fd)) { + ret = br_stub_anon_fd_ctx (this, fd, ctx); + if (ret) + goto free_local; + } + + frame->local = local; + + return 0; + + free_local: + br_stub_dealloc_local (local); + error_return: + return -1; +} + +static int +br_stub_mark_inode_modified (xlator_t *this, br_stub_local_t *local) +{ + fd_t *fd = NULL; + int32_t ret = 0; + uint64_t ctx_addr = 0; + br_stub_inode_ctx_t *ctx = NULL; + + fd = local->u.context.fd; + + ret = br_stub_get_inode_ctx (this, fd->inode, &ctx_addr); + if (ret < 0) + goto error_return; + + ctx = (br_stub_inode_ctx_t *) (long) ctx_addr; + + LOCK (&fd->inode->lock); + { + __br_stub_set_inode_modified (ctx); + } + UNLOCK (&fd->inode->lock); + + return 0; + + error_return: + return -1; +} + /** * callback for inode/fd versioning */ @@ -379,7 +499,6 @@ br_stub_fd_versioning (xlator_t *this, call_frame_t *frame, fd->inode, fd->inode->gfid, versioningtype, memversion); - frame->local = local; STACK_WIND (frame, callback, FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetxattr, fd, dict, flags, xdata); @@ -409,7 +528,6 @@ br_stub_perform_incversioning (xlator_t *this, writeback_version = __br_stub_writeback_version (ctx); - /* inode requires writeback to disk */ op_errno = ENOMEM; dict = dict_new (); if (!dict) @@ -518,54 +636,160 @@ br_stub_prepare_signature (xlator_t *this, dict_t *dict, return -1; } -int -br_stub_fsetxattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, dict_t *dict, int flags, dict_t *xdata) +static void +br_stub_handle_object_signature (call_frame_t *frame, + xlator_t *this, fd_t *fd, dict_t *dict, + br_isignature_t *sign, dict_t *xdata) { - int32_t ret = 0; - br_isignature_t *sign = NULL; - gf_boolean_t xref = _gf_false; + int32_t ret = -1; + gf_boolean_t xref = _gf_false; - if (!IA_ISREG (fd->inode->ia_type)) - goto wind; - ret = dict_get_bin (dict, GLUSTERFS_SET_OBJECT_SIGNATURE, - (void **) &sign); - if (ret < 0) - goto wind; if (frame->root->pid != GF_CLIENT_PID_BITD) - goto unwind; + goto dofop; ret = br_stub_prepare_signature (this, dict, fd->inode, sign); if (ret) - goto unwind; + goto dofop; dict_del (dict, GLUSTERFS_SET_OBJECT_SIGNATURE); + ret = -1; if (!xdata) { xdata = dict_new (); if (!xdata) - goto unwind; + goto dofop; } else { dict_ref (xdata); } xref = _gf_true; ret = dict_set_int32 (xdata, GLUSTERFS_DURABLE_OP, 0); + + dofop: + if (ret) + STACK_UNWIND_STRICT (fsetxattr, frame, -1, EINVAL, NULL); + else { + gf_log (this->name, GF_LOG_DEBUG, "SIGNED VERSION: %lu", + sign->signedversion); + + STACK_WIND (frame, default_setxattr_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->fsetxattr, fd, dict, 0, + xdata); + } + + if (xref) + dict_unref (xdata); +} + +int32_t +br_stub_fsetxattr_resume (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + int32_t ret = -1; + br_stub_local_t *local = NULL; + + local = frame->local; + frame->local = NULL; + + ret = br_stub_mark_inode_modified (this, local); + if (ret) { + op_ret = -1; + op_errno = EINVAL; + } + + STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata); + + br_stub_cleanup_local (local); + br_stub_dealloc_local (local); + + return 0; +} + +static void +br_stub_handle_object_reopen (call_frame_t *frame, + xlator_t *this, fd_t *fd, uint32_t val) +{ + int32_t ret = -1; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + call_stub_t *stub = NULL; + gf_boolean_t inc_version = _gf_false; + gf_boolean_t modified = _gf_false; + br_stub_inode_ctx_t *ctx = NULL; + br_stub_local_t *local = NULL; + + ret = br_stub_need_versioning (this, fd, &inc_version, &modified, &ctx); if (ret) goto unwind; - gf_log (this->name, GF_LOG_DEBUG, "SIGNED VERSION: %lu", - sign->signedversion); + LOCK (&fd->inode->lock); + { + (void) __br_stub_inode_sign_state (ctx, GF_FOP_FSETXATTR, fd); + } + UNLOCK (&fd->inode->lock); + + if ((val == BR_OBJECT_RESIGN) || !inc_version) { + op_ret = op_errno = 0; + goto unwind; + } + + ret = br_stub_versioning_prep (frame, this, fd, ctx); + if (ret) + goto unwind; + local = frame->local; + + stub = fop_fsetxattr_cbk_stub (frame, br_stub_fsetxattr_resume, + 0, 0, NULL); + if (!stub) { + gf_log (this->name, GF_LOG_ERROR, "failed to allocate stub for " + "fsetxattr fop (gfid: %s), unwinding", + uuid_utoa (fd->inode->gfid)); + goto cleanup_local; + } + + (void) br_stub_perform_incversioning (this, frame, stub, fd, ctx); + return; + + cleanup_local: + br_stub_cleanup_local (local); + br_stub_dealloc_local (local); + + unwind: + frame->local = NULL; + STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL); +} + +int +br_stub_fsetxattr (call_frame_t *frame, xlator_t *this, + fd_t *fd, dict_t *dict, int flags, dict_t *xdata) +{ + int32_t ret = 0; + uint32_t val = 0; + br_isignature_t *sign = NULL; + + if (!IA_ISREG (fd->inode->ia_type)) + goto wind; + + /* object signature request */ + ret = dict_get_bin (dict, GLUSTERFS_SET_OBJECT_SIGNATURE, + (void **) &sign); + if (!ret) { + br_stub_handle_object_signature (frame, this, + fd, dict, sign, xdata); + goto done; + } + + /* object reopen request */ + ret = dict_get_uint32 (dict, BR_REOPEN_SIGN_HINT_KEY, &val); + if (!ret) { + br_stub_handle_object_reopen (frame, this, fd, val); + goto done; + } + wind: STACK_WIND (frame, default_setxattr_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetxattr, fd, dict, flags, xdata); - goto done; - - unwind: - STACK_UNWIND_STRICT (setxattr, frame, -1, EINVAL, NULL); done: - if (xref) - dict_unref (xdata); return 0; } @@ -812,110 +1036,29 @@ br_stub_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { - int32_t ret = 0; - uint64_t ctx_addr = 0; - br_stub_inode_ctx_t *ctx = NULL; - br_stub_local_t *local = NULL; + int32_t ret = 0; + br_stub_local_t *local = NULL; - if (frame->local) { - local = frame->local; - frame->local = NULL; - } + local = frame->local; + frame->local = NULL; if (op_ret < 0) goto unwind; - ret = br_stub_get_inode_ctx (this, local->u.context.fd->inode, - &ctx_addr); - if (ret < 0) - goto unwind; - - ctx = (br_stub_inode_ctx_t *) (long) ctx_addr; - - /* Mark the flag to indicate the inode has been modified */ - LOCK (&local->u.context.fd->inode->lock); - { - if (!__br_stub_is_inode_modified (ctx)) - __br_stub_set_inode_modified (ctx); + ret = br_stub_mark_inode_modified (this, local); + if (ret) { + op_ret = -1; + op_errno = EINVAL; } - UNLOCK (&local->u.context.fd->inode->lock); - unwind: - STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, - xdata); + STACK_UNWIND_STRICT (writev, frame, + op_ret, op_errno, prebuf, postbuf, xdata); + br_stub_cleanup_local (local); br_stub_dealloc_local (local); - return 0; -} - -/** - * Ongoing version is increased only for the first modify operation. - * First modify version means the first write or truncate call coming on the - * first fd in the list of inodes. - * For anonymous fds open would not have come, so check if its the first write - * by doing both inode dirty check and ensuring list of fds is empty - */ -static inline gf_boolean_t -br_stub_inc_version (xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx) -{ - gf_boolean_t inc_version = _gf_false; - - GF_VALIDATE_OR_GOTO (this->name, fd, out); - GF_VALIDATE_OR_GOTO (this->name, ctx, out); - - LOCK (&fd->inode->lock); - { - if (__br_stub_is_inode_dirty (ctx)) - inc_version = _gf_true; - } - UNLOCK (&fd->inode->lock); - -out: - return inc_version; -} - -/** - * Since NFS does not do open, writes from NFS are sent over an anonymous - * fd. It means each write fop might come on a different anonymous fd and - * will lead to very large number of notifications being sent. It might - * affect the perfromance as, there will too many sign requests. - * To avoid that whenever the last fd released from an inode (logical release) - * is an anonymous fd the release notification is sent with a flag being set - * __br_stub_anon_release (ctx); - * BitD checks for the flag and if set, it will send a dummy write request - * (again on an anonymous fd) instead of triggering sign. - * Bit-rot-stub should identify such dummy writes and should send success to - * them instead of winding them downwards. - */ -gf_boolean_t -br_stub_dummy_write (call_frame_t *frame) -{ - return (frame->root->pid == GF_CLIENT_PID_BITD) - ? _gf_true : _gf_false; -} - -int32_t -br_stub_anon_fd_ctx (xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx) -{ - int32_t ret = -1; - br_stub_fd_t *br_stub_fd = NULL; - br_stub_fd = br_stub_fd_ctx_get (this, fd); - if (!br_stub_fd) { - ret = br_stub_add_fd_to_inode (this, fd, ctx); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "failed to " - "add fd to the inode (gfid: %s)", - uuid_utoa (fd->inode->gfid)); - goto out; - } - } - - ret = 0; - -out: - return ret; + return 0; } int32_t @@ -923,107 +1066,71 @@ br_stub_writev_resume (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, int32_t count, off_t offset, uint32_t flags, struct iobref *iobref, dict_t *xdata) { - if (frame->root->pid == GF_CLIENT_PID_BITD) - br_stub_writev_cbk (frame, NULL, this, vector->iov_len, 0, - NULL, NULL, NULL); - else - STACK_WIND (frame, br_stub_writev_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->writev, fd, vector, count, - offset, flags, iobref, xdata); + STACK_WIND (frame, br_stub_writev_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vector, count, + offset, flags, iobref, xdata); return 0; } /** - TODO: If possible add pictorial represention of below comment. - - Before sending writev on the ANONYMOUS FD, increase the ongoing - version first. This brings anonymous fd write closer to the regular - fd write by having the ongoing version increased before doing the - write (In regular fd, after open the ongoing version is incremented). - Do following steps to handle writes on anonymous fds: - 1) Increase the on-disk ongoing version - 2) Once versioning is successfully done send write operation. If versioning - fails, then fail the write fop. - 3) In writev_cbk do below things: - a) Increase in-memory version - b) set the fd context (so that br_stub_release is invoked) - c) add the fd to the list of fds maintained in the inode context of - bitrot-stub. - d) Mark inode as non dirty - e) Mard inode as modified (in the inode context) -**/ + * This is probably the most crucial part about the whole versioning thing. + * There's absolutely no differentiation as such between an anonymous fd + * and a regular fd except the fd context initialization. Object versioning + * is perfomed when the inode is dirty. Parallel write operations are no + * special with each write performing object versioning followed by marking + * the inode as non-dirty (synced). This is followed by the actual operation + * (writev() in this case) which on a success marks the inode as modified. + * This prevents signing of objects that have not been modified. + */ int32_t br_stub_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, int32_t count, off_t offset, uint32_t flags, struct iobref *iobref, dict_t *xdata) { - br_stub_local_t *local = NULL; call_stub_t *stub = NULL; int32_t op_ret = -1; int32_t op_errno = EINVAL; gf_boolean_t inc_version = _gf_false; + gf_boolean_t modified = _gf_false; br_stub_inode_ctx_t *ctx = NULL; - uint64_t ctx_addr = 0; int32_t ret = -1; + fop_writev_cbk_t cbk = default_writev_cbk; + br_stub_local_t *local = NULL; GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind); GF_VALIDATE_OR_GOTO (this->name, frame, unwind); GF_VALIDATE_OR_GOTO (this->name, fd, unwind); - local = br_stub_alloc_local (this); - if (!local) { - gf_log (this->name, GF_LOG_ERROR, "local allocation failed " - "(gfid: %s)", uuid_utoa (fd->inode->gfid)); - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - - local->u.context.fd = fd_ref (fd); - frame->local = local; - - ret = br_stub_get_inode_ctx (this, fd->inode, &ctx_addr); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "failed to get the inode " - "context for the inode %s", - uuid_utoa (fd->inode->gfid)); + ret = br_stub_need_versioning (this, fd, &inc_version, &modified, &ctx); + if (ret) goto unwind; - } - ctx = (br_stub_inode_ctx_t *) (long) ctx_addr; - if (fd_is_anonymous (fd)) { - ret = br_stub_anon_fd_ctx (this, fd, ctx); - if (ret) - goto unwind; - } - - /* TODO: Better to do a dummy fsetxattr instead of write. Keep write - simple */ - if (br_stub_dummy_write (frame)) { - LOCK (&fd->inode->lock); - { - (void) __br_stub_inode_sign_state - (ctx, GF_FOP_WRITE, fd); - } - UNLOCK (&fd->inode->lock); - - if (xdata && dict_get (xdata, "br-fd-reopen")) { - op_ret = vector->iov_len; - op_errno = 0; - goto unwind; - } - } + /** + * The inode is not dirty and also witnessed atleast one successful + * modification operation. Therefore, subsequent operations need not + * perform any special tracking. + */ + if (!inc_version && modified) + goto wind; /** - * Check whether this is the first write on this inode since the last - * sign notification has been sent. If so, do versioning. Otherwise - * go ahead with the fop. + * okay.. so, either the inode needs versioning or the modification + * needs to be tracked. ->cbk is set to the appropriate callback + * routine for this. + * NOTE: ->local needs to be deallocated on failures from here on. */ - inc_version = br_stub_inc_version (this, fd, ctx); - if (!inc_version) + ret = br_stub_versioning_prep (frame, this, fd, ctx); + if (ret) + goto unwind; + + local = frame->local; + if (!inc_version) { + br_stub_fill_local (local, NULL, fd, fd->inode, + fd->inode->gfid, BR_STUB_NO_VERSIONING, 0); + cbk = br_stub_writev_cbk; goto wind; + } - /* Create the stub for the write fop */ stub = fop_writev_stub (frame, br_stub_writev_resume, fd, vector, count, offset, flags, iobref, xdata); @@ -1031,24 +1138,27 @@ br_stub_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, gf_log (this->name, GF_LOG_ERROR, "failed to allocate stub for " "write fop (gfid: %s), unwinding", uuid_utoa (fd->inode->gfid)); - goto unwind; + goto cleanup_local; } /* Perform Versioning */ return br_stub_perform_incversioning (this, frame, stub, fd, ctx); -wind: - STACK_WIND (frame, br_stub_writev_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->writev, fd, vector, count, offset, - flags, iobref, xdata); + wind: + STACK_WIND (frame, cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, + fd, vector, count, offset, flags, iobref, xdata); return 0; -unwind: + cleanup_local: + br_stub_cleanup_local (local); + br_stub_dealloc_local (local); + + unwind: frame->local = NULL; STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, NULL, NULL, NULL); - br_stub_cleanup_local (local); - br_stub_dealloc_local (local); + return 0; } @@ -1057,40 +1167,28 @@ br_stub_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { - int32_t ret = 0; - uint64_t ctx_addr = 0; - br_stub_inode_ctx_t *ctx = NULL; - br_stub_local_t *local = NULL; + int32_t ret = -1; + br_stub_local_t *local = NULL; - if (frame->local) { - local = frame->local; - frame->local = NULL; - } + local = frame->local; + frame->local = NULL; if (op_ret < 0) goto unwind; - ret = br_stub_get_inode_ctx (this, local->u.context.fd->inode, - &ctx_addr); - if (ret < 0) - goto unwind; - - ctx = (br_stub_inode_ctx_t *) (long) ctx_addr; - - /* Mark the flag to indicate the inode has been modified */ - LOCK (&local->u.context.fd->inode->lock); - { - if (!__br_stub_is_inode_modified (ctx)) - __br_stub_set_inode_modified (ctx); + ret = br_stub_mark_inode_modified (this, local); + if (ret) { + op_ret = -1; + op_errno = EINVAL; } - UNLOCK (&local->u.context.fd->inode->lock); - unwind: - STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf, postbuf, - xdata); + STACK_UNWIND_STRICT (ftruncate, frame, + op_ret, op_errno, prebuf, postbuf, xdata); + br_stub_cleanup_local (local); br_stub_dealloc_local (local); + return 0; } @@ -1103,6 +1201,7 @@ br_stub_ftruncate_resume (call_frame_t *frame, xlator_t *this, fd_t *fd, return 0; } +/* c.f. br_stub_writev() for explanation */ int32_t br_stub_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata) @@ -1112,72 +1211,59 @@ br_stub_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t op_ret = -1; int32_t op_errno = EINVAL; gf_boolean_t inc_version = _gf_false; + gf_boolean_t modified = _gf_false; br_stub_inode_ctx_t *ctx = NULL; - uint64_t ctx_addr = 0; int32_t ret = -1; + fop_ftruncate_cbk_t cbk = default_ftruncate_cbk; GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind); GF_VALIDATE_OR_GOTO (this->name, frame, unwind); GF_VALIDATE_OR_GOTO (this->name, fd, unwind); - local = br_stub_alloc_local (this); - if (!local) { - gf_log (this->name, GF_LOG_ERROR, "local allocation failed " - "(gfid: %s)", uuid_utoa (fd->inode->gfid)); - op_ret = -1; - op_errno = ENOMEM; + ret = br_stub_need_versioning (this, fd, &inc_version, &modified, &ctx); + if (ret) goto unwind; - } - local->u.context.fd = fd_ref (fd); - frame->local = local; + if (!inc_version && modified) + goto wind; - ret = br_stub_get_inode_ctx (this, fd->inode, &ctx_addr); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "failed to get the inode " - "context for the inode %s", - uuid_utoa (fd->inode->gfid)); + ret = br_stub_versioning_prep (frame, this, fd, ctx); + if (ret) goto unwind; - } - ctx = (br_stub_inode_ctx_t *) (long) ctx_addr; - if (fd_is_anonymous (fd)) { - ret = br_stub_anon_fd_ctx (this, fd, ctx); - if (ret) - goto unwind; - } - - /** - * c.f. br_stub_writev() - */ - inc_version = br_stub_inc_version (this, fd, ctx); - if (!inc_version) + local = frame->local; + if (!inc_version) { + br_stub_fill_local (local, NULL, fd, fd->inode, + fd->inode->gfid, BR_STUB_NO_VERSIONING, 0); + cbk = br_stub_ftruncate_cbk; goto wind; + } - /* Create the stub for the ftruncate fop */ stub = fop_ftruncate_stub (frame, br_stub_ftruncate_resume, fd, offset, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "failed to allocate stub for " "ftruncate fop (gfid: %s), unwinding", uuid_utoa (fd->inode->gfid)); - goto unwind; + goto cleanup_local; } - /* Perform Versioning */ return br_stub_perform_incversioning (this, frame, stub, fd, ctx); -wind: - STACK_WIND (frame, br_stub_ftruncate_cbk, FIRST_CHILD(this), + wind: + STACK_WIND (frame, cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); return 0; -unwind: + cleanup_local: + br_stub_cleanup_local (local); + br_stub_dealloc_local (local); + + unwind: frame->local = NULL; STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, NULL, NULL, NULL); - br_stub_cleanup_local (local); - br_stub_dealloc_local (local); + return 0; } @@ -1186,34 +1272,20 @@ br_stub_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { - int32_t ret = 0; - uint64_t ctx_addr = 0; - br_stub_inode_ctx_t *ctx = NULL; - br_stub_local_t *local = NULL; + int32_t ret = 0; + br_stub_local_t *local = NULL; - if (frame->local) { - local = frame->local; - frame->local = NULL; - } + local = frame->local; + frame->local = NULL; if (op_ret < 0) goto unwind; - ret = br_stub_get_inode_ctx (this, local->u.context.fd->inode, - &ctx_addr); - if (ret < 0) - goto unwind; - - ctx = (br_stub_inode_ctx_t *) (long) ctx_addr; - - /* Mark the flag to indicate the inode has been modified */ - LOCK (&local->u.context.fd->inode->lock); - { - if (!__br_stub_is_inode_modified (ctx)) - __br_stub_set_inode_modified (ctx); + ret = br_stub_mark_inode_modified (this, local); + if (ret) { + op_ret = -1; + op_errno = EINVAL; } - UNLOCK (&local->u.context.fd->inode->lock); - unwind: STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf, postbuf, @@ -1243,6 +1315,8 @@ br_stub_truncate_resume (call_frame_t *frame, xlator_t *this, loc_t *loc, * on an anonymous fd. The fd will be valid till the completion of the * truncate call. It guarantees that release on this anonymous fd will happen * after the truncate call and notification is sent after the truncate call. + * + * c.f. br_writev_cbk() for explanation */ int32_t br_stub_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, @@ -1253,10 +1327,11 @@ br_stub_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t op_ret = -1; int32_t op_errno = EINVAL; gf_boolean_t inc_version = _gf_false; + gf_boolean_t modified = _gf_false; br_stub_inode_ctx_t *ctx = NULL; - uint64_t ctx_addr = 0; int32_t ret = -1; fd_t *fd = NULL; + fop_truncate_cbk_t cbk = default_truncate_cbk; GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind); GF_VALIDATE_OR_GOTO (this->name, frame, unwind); @@ -1270,63 +1345,50 @@ br_stub_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, goto unwind; } - local = br_stub_alloc_local (this); - if (!local) { - gf_log (this->name, GF_LOG_ERROR, "local allocation failed " - "(gfid: %s)", uuid_utoa (loc->inode->gfid)); - op_ret = -1; - op_errno = ENOMEM; + ret = br_stub_need_versioning (this, fd, &inc_version, &modified, &ctx); + if (ret) goto unwind; - } - - local->u.context.fd = fd; - frame->local = local; - ret = br_stub_get_inode_ctx (this, loc->inode, &ctx_addr); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "failed to get the inode " - "context for the inode %s", - uuid_utoa (fd->inode->gfid)); - goto unwind; - } + if (!inc_version && modified) + goto wind; - ctx = (br_stub_inode_ctx_t *) (long) ctx_addr; - ret = br_stub_anon_fd_ctx (this, local->u.context.fd, ctx); + ret = br_stub_versioning_prep (frame, this, fd, ctx); if (ret) goto unwind; - /** - * c.f. br_stub_writev() - */ - inc_version = br_stub_inc_version (this, fd, ctx); - if (!inc_version) + local = frame->local; + if (!inc_version) { + br_stub_fill_local (local, NULL, fd, fd->inode, + fd->inode->gfid, BR_STUB_NO_VERSIONING, 0); + cbk = br_stub_truncate_cbk; goto wind; + } - /* Create the stub for the truncate fop */ stub = fop_truncate_stub (frame, br_stub_truncate_resume, loc, offset, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "failed to allocate stub for " "truncate fop (gfid: %s), unwinding", uuid_utoa (fd->inode->gfid)); - goto unwind; + goto cleanup_local; } - /* Perform Versioning */ - return br_stub_perform_incversioning (this, frame, stub, - local->u.context.fd, ctx); + return br_stub_perform_incversioning (this, frame, stub, fd, ctx); -wind: - STACK_WIND (frame, br_stub_truncate_cbk, FIRST_CHILD(this), + wind: + STACK_WIND (frame, cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); return 0; -unwind: + cleanup_local: + br_stub_cleanup_local (local); + br_stub_dealloc_local (local); + + unwind: frame->local = NULL; STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, NULL, NULL, NULL); - br_stub_cleanup_local (local); - br_stub_dealloc_local (local); + return 0; } @@ -1810,7 +1872,7 @@ __br_stub_inode_sign_state (br_stub_inode_ctx_t *ctx, switch (fop) { - case GF_FOP_WRITE: + case GF_FOP_FSETXATTR: sign_info = ctx->info_sign = BR_SIGN_QUICK; break; diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.h b/xlators/features/bit-rot/src/stub/bit-rot-stub.h index 69e212bb81f..e1e7b383f42 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.h @@ -66,7 +66,8 @@ typedef struct br_stub_local { } u; } br_stub_local_t; -#define BR_STUB_INCREMENTAL_VERSIONING (1<<1) +#define BR_STUB_NO_VERSIONING (1 << 0) +#define BR_STUB_INCREMENTAL_VERSIONING (1 << 1) typedef struct br_stub_private { gf_boolean_t go; |