diff options
Diffstat (limited to 'xlators/cluster/dht/src/dht-rename.c')
| -rw-r--r-- | xlators/cluster/dht/src/dht-rename.c | 2441 |
1 files changed, 1788 insertions, 653 deletions
diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c index f1a4364df20..d9dbf50492f 100644 --- a/xlators/cluster/dht/src/dht-rename.c +++ b/xlators/cluster/dht/src/dht-rename.c @@ -11,852 +11,1987 @@ /* TODO: link(oldpath, newpath) fails if newpath already exists. DHT should * delete the newpath if it gets EEXISTS from link() call. */ -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include "glusterfs.h" -#include "xlator.h" #include "dht-common.h" -#include "defaults.h" +#include "dht-lock.h" +#include <glusterfs/defaults.h> +int +dht_rename_unlock(call_frame_t *frame, xlator_t *this); +int32_t +dht_rename_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata); int -dht_rename_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *stbuf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent, - dict_t *xdata) -{ - dht_local_t *local = NULL; - int this_call_cnt = 0; - call_frame_t *prev = NULL; - - local = frame->local; - prev = cookie; - - if (op_ret == -1) { - /* TODO: undo the damage */ - - gf_log (this->name, GF_LOG_INFO, - "rename %s -> %s on %s failed (%s)", - local->loc.path, local->loc2.path, - prev->this->name, strerror (op_errno)); - - local->op_ret = op_ret; - local->op_errno = op_errno; - goto unwind; - } - /* TODO: construct proper stbuf for dir */ - /* - * FIXME: is this the correct way to build stbuf and - * parent bufs? - */ - dht_iatt_merge (this, &local->stbuf, stbuf, prev->this); - dht_iatt_merge (this, &local->preoldparent, preoldparent, - prev->this); - dht_iatt_merge (this, &local->postoldparent, postoldparent, - prev->this); - dht_iatt_merge (this, &local->preparent, prenewparent, - prev->this); - dht_iatt_merge (this, &local->postparent, postnewparent, - prev->this); +dht_rename_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + dht_local_t *local = NULL; + local = frame->local; -unwind: - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { - WIPE (&local->preoldparent); - WIPE (&local->postoldparent); - WIPE (&local->preparent); - WIPE (&local->postparent); - - DHT_STRIP_PHASE1_FLAGS (&local->stbuf); - DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, - &local->stbuf, &local->preoldparent, - &local->postoldparent, - &local->preparent, &local->postparent, xdata); - } + dht_set_fixed_dir_stat(&local->preoldparent); + dht_set_fixed_dir_stat(&local->postoldparent); + dht_set_fixed_dir_stat(&local->preparent); + dht_set_fixed_dir_stat(&local->postparent); - return 0; + if (IA_ISREG(local->stbuf.ia_type)) + DHT_STRIP_PHASE1_FLAGS(&local->stbuf); + + DHT_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno, + &local->stbuf, &local->preoldparent, &local->postoldparent, + &local->preparent, &local->postparent, local->xattr); + return 0; } +static void +dht_rename_dir_unlock_src(call_frame_t *frame, xlator_t *this) +{ + dht_local_t *local = NULL; + + local = frame->local; + dht_unlock_namespace(frame, &local->lock[0]); + return; +} + +static void +dht_rename_dir_unlock_dst(call_frame_t *frame, xlator_t *this) +{ + dht_local_t *local = NULL; + int op_ret = -1; + char src_gfid[GF_UUID_BUF_SIZE] = {0}; + char dst_gfid[GF_UUID_BUF_SIZE] = {0}; + + local = frame->local; + + /* Unlock entrylk */ + dht_unlock_entrylk_wrapper(frame, &local->lock[1].ns.directory_ns); + + /* Unlock inodelk */ + op_ret = dht_unlock_inodelk(frame, local->lock[1].ns.parent_layout.locks, + local->lock[1].ns.parent_layout.lk_count, + dht_rename_unlock_cbk); + if (op_ret < 0) { + uuid_utoa_r(local->loc.inode->gfid, src_gfid); + + if (local->loc2.inode) + uuid_utoa_r(local->loc2.inode->gfid, dst_gfid); + + if (IA_ISREG(local->stbuf.ia_type)) + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED, + "winding unlock inodelk failed " + "rename (%s:%s:%s %s:%s:%s), " + "stale locks left on bricks", + local->loc.path, src_gfid, local->src_cached->name, + local->loc2.path, dst_gfid, + local->dst_cached ? local->dst_cached->name : NULL); + else + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED, + "winding unlock inodelk failed " + "rename (%s:%s %s:%s), " + "stale locks left on bricks", + local->loc.path, src_gfid, local->loc2.path, dst_gfid); + + dht_rename_unlock_cbk(frame, NULL, this, 0, 0, NULL); + } + + return; +} +static int +dht_rename_dir_unlock(call_frame_t *frame, xlator_t *this) +{ + dht_rename_dir_unlock_src(frame, this); + dht_rename_dir_unlock_dst(frame, this); + return 0; +} int -dht_rename_hashed_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *stbuf, - struct iatt *preoldparent, - struct iatt *postoldparent, - struct iatt *prenewparent, - struct iatt *postnewparent, dict_t *xdata) -{ - dht_conf_t *conf = NULL; - dht_local_t *local = NULL; - int call_cnt = 0; - call_frame_t *prev = NULL; - int i = 0; - - conf = this->private; - local = frame->local; - prev = cookie; - - if (op_ret == -1) { - /* TODO: undo the damage */ - - gf_log (this->name, GF_LOG_INFO, - "rename %s -> %s on %s failed (%s)", - local->loc.path, local->loc2.path, - prev->this->name, strerror (op_errno)); - - local->op_ret = op_ret; - local->op_errno = op_errno; - goto unwind; - } - /* TODO: construct proper stbuf for dir */ - /* - * FIXME: is this the correct way to build stbuf and - * parent bufs? +dht_rename_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *stbuf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) +{ + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + int this_call_cnt = 0; + xlator_t *prev = NULL; + int i = 0; + char gfid[GF_UUID_BUF_SIZE] = {0}; + int subvol_cnt = -1; + + conf = this->private; + local = frame->local; + prev = cookie; + subvol_cnt = dht_subvol_cnt(this, prev); + local->ret_cache[subvol_cnt] = op_ret; + + if (op_ret == -1) { + gf_uuid_unparse(local->loc.inode->gfid, gfid); + + gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_RENAME_FAILED, + "Rename %s -> %s on %s failed, (gfid = %s)", local->loc.path, + local->loc2.path, prev->name, gfid); + + local->op_ret = op_ret; + local->op_errno = op_errno; + goto unwind; + } + /* TODO: construct proper stbuf for dir */ + /* + * FIXME: is this the correct way to build stbuf and + * parent bufs? + */ + dht_iatt_merge(this, &local->stbuf, stbuf); + dht_iatt_merge(this, &local->preoldparent, preoldparent); + dht_iatt_merge(this, &local->postoldparent, postoldparent); + dht_iatt_merge(this, &local->preparent, prenewparent); + dht_iatt_merge(this, &local->postparent, postnewparent); + +unwind: + this_call_cnt = dht_frame_return(frame); + if (is_last_call(this_call_cnt)) { + /* We get here with local->call_cnt == 0. Which means + * we are the only one executing this code, there is + * no contention. Therefore it's safe to manipulate or + * deref local->call_cnt directly (without locking). */ - dht_iatt_merge (this, &local->stbuf, stbuf, prev->this); - dht_iatt_merge (this, &local->preoldparent, preoldparent, - prev->this); - dht_iatt_merge (this, &local->postoldparent, postoldparent, - prev->this); - dht_iatt_merge (this, &local->preparent, prenewparent, - prev->this); - dht_iatt_merge (this, &local->postparent, postnewparent, - prev->this); - - call_cnt = local->call_cnt = conf->subvolume_cnt - 1; - - if (!local->call_cnt) - goto unwind; - - for (i = 0; i < conf->subvolume_cnt; i++) { - if (conf->subvolumes[i] == local->dst_hashed) + if (local->ret_cache[conf->subvolume_cnt] == 0) { + /* count errant subvols in last field of ret_cache */ + for (i = 0; i < conf->subvolume_cnt; i++) { + if (local->ret_cache[i] != 0) + ++local->ret_cache[conf->subvolume_cnt]; + } + if (local->ret_cache[conf->subvolume_cnt]) { + /* undoing the damage: + * for all subvolumes, where rename + * succeeded, we perform the reverse operation + */ + for (i = 0; i < conf->subvolume_cnt; i++) { + if (local->ret_cache[i] == 0) + ++local->call_cnt; + } + for (i = 0; i < conf->subvolume_cnt; i++) { + if (local->ret_cache[i]) continue; - STACK_WIND (frame, dht_rename_dir_cbk, - conf->subvolumes[i], - conf->subvolumes[i]->fops->rename, - &local->loc, &local->loc2, NULL); - if (!--call_cnt) - break; - } + STACK_WIND(frame, dht_rename_dir_cbk, conf->subvolumes[i], + conf->subvolumes[i]->fops->rename, &local->loc2, + &local->loc, NULL); + } - return 0; -unwind: - WIPE (&local->preoldparent); - WIPE (&local->postoldparent); - WIPE (&local->preparent); - WIPE (&local->postparent); + return 0; + } + } - DHT_STRIP_PHASE1_FLAGS (&local->stbuf); - DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, - &local->stbuf, &local->preoldparent, - &local->postoldparent, - &local->preparent, &local->postparent, NULL); + WIPE(&local->preoldparent); + WIPE(&local->postoldparent); + WIPE(&local->preparent); + WIPE(&local->postparent); - return 0; + dht_rename_dir_unlock(frame, this); + } + + return 0; } +int +dht_rename_hashed_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *stbuf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) +{ + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + int call_cnt = 0; + xlator_t *prev = NULL; + int i = 0; + char gfid[GF_UUID_BUF_SIZE] = {0}; + + conf = this->private; + local = frame->local; + prev = cookie; + + if (op_ret == -1) { + gf_uuid_unparse(local->loc.inode->gfid, gfid); + + gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_RENAME_FAILED, + "rename %s -> %s on %s failed, (gfid = %s) ", local->loc.path, + local->loc2.path, prev->name, gfid); + + local->op_ret = op_ret; + local->op_errno = op_errno; + goto unwind; + } + /* TODO: construct proper stbuf for dir */ + /* + * FIXME: is this the correct way to build stbuf and + * parent bufs? + */ + dht_iatt_merge(this, &local->stbuf, stbuf); + dht_iatt_merge(this, &local->preoldparent, preoldparent); + dht_iatt_merge(this, &local->postoldparent, postoldparent); + dht_iatt_merge(this, &local->preparent, prenewparent); + dht_iatt_merge(this, &local->postparent, postnewparent); + + call_cnt = local->call_cnt = conf->subvolume_cnt - 1; + + if (!local->call_cnt) + goto unwind; + + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->subvolumes[i] == local->dst_hashed) + continue; + STACK_WIND_COOKIE( + frame, dht_rename_dir_cbk, conf->subvolumes[i], conf->subvolumes[i], + conf->subvolumes[i]->fops->rename, &local->loc, &local->loc2, NULL); + if (!--call_cnt) + break; + } + + return 0; +unwind: + WIPE(&local->preoldparent); + WIPE(&local->postoldparent); + WIPE(&local->preparent); + WIPE(&local->postparent); + + dht_rename_dir_unlock(frame, this); + return 0; +} int -dht_rename_dir_do (call_frame_t *frame, xlator_t *this) +dht_rename_dir_do(call_frame_t *frame, xlator_t *this) { - dht_local_t *local = NULL; + dht_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (local->op_ret == -1) - goto err; + if (local->op_ret == -1) + goto err; - local->op_ret = 0; + local->op_ret = 0; - STACK_WIND (frame, dht_rename_hashed_dir_cbk, - local->dst_hashed, - local->dst_hashed->fops->rename, - &local->loc, &local->loc2, NULL); - return 0; + STACK_WIND_COOKIE(frame, dht_rename_hashed_dir_cbk, local->dst_hashed, + local->dst_hashed, local->dst_hashed->fops->rename, + &local->loc, &local->loc2, NULL); + return 0; err: - DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, NULL, NULL, - NULL, NULL, NULL, NULL); - return 0; + dht_rename_dir_unlock(frame, this); + return 0; } - int -dht_rename_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, gf_dirent_t *entries, - dict_t *xdata) -{ - dht_local_t *local = NULL; - int this_call_cnt = -1; - call_frame_t *prev = NULL; - - local = frame->local; - prev = cookie; - - if (op_ret > 2) { - gf_log (this->name, GF_LOG_TRACE, - "readdir on %s for %s returned %d entries", - prev->this->name, local->loc.path, op_ret); - local->op_ret = -1; - local->op_errno = ENOTEMPTY; - } +dht_rename_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *entries, + dict_t *xdata) +{ + dht_local_t *local = NULL; + int this_call_cnt = -1; + xlator_t *prev = NULL; - this_call_cnt = dht_frame_return (frame); + local = frame->local; + prev = cookie; - if (is_last_call (this_call_cnt)) { - dht_rename_dir_do (frame, this); - } + if (op_ret > 2) { + gf_msg_trace(this->name, 0, "readdir on %s for %s returned %d entries", + prev->name, local->loc.path, op_ret); + local->op_ret = -1; + local->op_errno = ENOTEMPTY; + } - return 0; -} + this_call_cnt = dht_frame_return(frame); + if (is_last_call(this_call_cnt)) { + dht_rename_dir_do(frame, this); + } + + return 0; +} int -dht_rename_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, fd_t *fd, dict_t *xdata) +dht_rename_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, fd_t *fd, dict_t *xdata) { - dht_local_t *local = NULL; - int this_call_cnt = -1; - call_frame_t *prev = NULL; + dht_local_t *local = NULL; + int this_call_cnt = -1; + xlator_t *prev = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; + local = frame->local; + prev = cookie; - local = frame->local; - prev = cookie; + if (op_ret == -1) { + gf_uuid_unparse(local->loc.inode->gfid, gfid); + gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_OPENDIR_FAILED, + "opendir on %s for %s failed,(gfid = %s) ", prev->name, + local->loc.path, gfid); + goto err; + } - if (op_ret == -1) { - gf_log (this->name, GF_LOG_INFO, - "opendir on %s for %s failed (%s)", - prev->this->name, local->loc.path, - strerror (op_errno)); - goto err; - } - - STACK_WIND (frame, dht_rename_readdir_cbk, - prev->this, prev->this->fops->readdir, - local->fd, 4096, 0, NULL); + fd_bind(fd); + STACK_WIND_COOKIE(frame, dht_rename_readdir_cbk, prev, prev, + prev->fops->readdir, local->fd, 4096, 0, NULL); - return 0; + return 0; err: - this_call_cnt = dht_frame_return (frame); + this_call_cnt = dht_frame_return(frame); - if (is_last_call (this_call_cnt)) { - dht_rename_dir_do (frame, this); - } + if (is_last_call(this_call_cnt)) { + dht_rename_dir_do(frame, this); + } - return 0; + return 0; } - int -dht_rename_dir (call_frame_t *frame, xlator_t *this) +dht_rename_dir_lock2_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - dht_conf_t *conf = NULL; - dht_local_t *local = NULL; - int i = 0; - int op_errno = -1; + dht_local_t *local = NULL; + char src_gfid[GF_UUID_BUF_SIZE] = {0}; + char dst_gfid[GF_UUID_BUF_SIZE] = {0}; + dht_conf_t *conf = NULL; + int i = 0; + + local = frame->local; + conf = this->private; + + if (op_ret < 0) { + uuid_utoa_r(local->loc.inode->gfid, src_gfid); + + if (local->loc2.inode) + uuid_utoa_r(local->loc2.inode->gfid, dst_gfid); + + gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR, + "acquiring entrylk after inodelk failed" + "rename (%s:%s:%s %s:%s:%s)", + local->loc.path, src_gfid, local->src_cached->name, + local->loc2.path, dst_gfid, + local->dst_cached ? local->dst_cached->name : NULL); + + local->op_ret = -1; + local->op_errno = op_errno; + goto err; + } + + local->fd = fd_create(local->loc.inode, frame->root->pid); + if (!local->fd) { + op_errno = ENOMEM; + goto err; + } + + local->op_ret = 0; + + if (!local->dst_cached) { + dht_rename_dir_do(frame, this); + return 0; + } + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND_COOKIE(frame, dht_rename_opendir_cbk, conf->subvolumes[i], + conf->subvolumes[i], + conf->subvolumes[i]->fops->opendir, &local->loc2, + local->fd, NULL); + } - conf = frame->this->private; - local = frame->local; + return 0; - local->call_cnt = conf->subvolume_cnt; +err: + /* No harm in calling an extra unlock */ + dht_rename_dir_unlock(frame, this); + return 0; +} - for (i = 0; i < conf->subvolume_cnt; i++) { - if (!conf->subvolume_status[i]) { - gf_log (this->name, GF_LOG_INFO, - "one of the subvolumes down (%s)", - conf->subvolumes[i]->name); - op_errno = ENOTCONN; - goto err; - } - } +int +dht_rename_dir_lock1_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + dht_local_t *local = NULL; + char src_gfid[GF_UUID_BUF_SIZE] = {0}; + char dst_gfid[GF_UUID_BUF_SIZE] = {0}; + int ret = 0; + loc_t *loc = NULL; + xlator_t *subvol = NULL; + + local = frame->local; + + if (op_ret < 0) { + uuid_utoa_r(local->loc.inode->gfid, src_gfid); + + if (local->loc2.inode) + uuid_utoa_r(local->loc2.inode->gfid, dst_gfid); + + gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR, + "acquiring entrylk after inodelk failed" + "rename (%s:%s:%s %s:%s:%s)", + local->loc.path, src_gfid, local->src_cached->name, + local->loc2.path, dst_gfid, + local->dst_cached ? local->dst_cached->name : NULL); + + local->op_ret = -1; + local->op_errno = op_errno; + goto err; + } + + if (local->current == &local->lock[0]) { + loc = &local->loc2; + subvol = local->dst_hashed; + local->current = &local->lock[1]; + } else { + loc = &local->loc; + subvol = local->src_hashed; + local->current = &local->lock[0]; + } + ret = dht_protect_namespace(frame, loc, subvol, &local->current->ns, + dht_rename_dir_lock2_cbk); + if (ret < 0) { + op_errno = EINVAL; + goto err; + } + + return 0; +err: + /* No harm in calling an extra unlock */ + dht_rename_dir_unlock(frame, this); + return 0; +} - local->fd = fd_create (local->loc.inode, frame->root->pid); - if (!local->fd) { - op_errno = ENOMEM; - goto err; +/* + * If the hashed subvolumes of both source and dst are the different, + * lock in dictionary order of hashed subvol->name. This is important + * in case the parent directory is the same for both src and dst to + * prevent inodelk deadlocks when racing with a fix-layout op on the parent. + * + * If the hashed subvols are the same, use the gfid/name to determine + * the order of taking locks to prevent entrylk deadlocks when the parent + * dirs are the same. + * + */ +static int +dht_order_rename_lock(call_frame_t *frame, loc_t **loc, xlator_t **subvol) +{ + int ret = 0; + int op_ret = 0; + dht_local_t *local = NULL; + char *src = NULL; + char *dst = NULL; + + local = frame->local; + + if (local->src_hashed->name == local->dst_hashed->name) { + ret = 0; + } else { + ret = strcmp(local->src_hashed->name, local->dst_hashed->name); + } + + if (ret == 0) { + /* hashed subvols are the same for src and dst */ + /* Entrylks need to be ordered*/ + + src = alloca(GF_UUID_BNAME_BUF_SIZE + strlen(local->loc.name) + 1); + if (!src) { + gf_msg(frame->this->name, GF_LOG_ERROR, ENOMEM, 0, + "Insufficient memory for src"); + op_ret = -1; + goto out; } - local->op_ret = 0; + if (!gf_uuid_is_null(local->loc.pargfid)) + uuid_utoa_r(local->loc.pargfid, src); + else if (local->loc.parent) + uuid_utoa_r(local->loc.parent->gfid, src); + else + src[0] = '\0'; - if (!local->dst_cached) { - dht_rename_dir_do (frame, this); - return 0; - } + strcat(src, local->loc.name); - for (i = 0; i < conf->subvolume_cnt; i++) { - STACK_WIND (frame, dht_rename_opendir_cbk, - conf->subvolumes[i], - conf->subvolumes[i]->fops->opendir, - &local->loc2, local->fd, NULL); + dst = alloca(GF_UUID_BNAME_BUF_SIZE + strlen(local->loc2.name) + 1); + if (!dst) { + gf_msg(frame->this->name, GF_LOG_ERROR, ENOMEM, 0, + "Insufficient memory for dst"); + op_ret = -1; + goto out; } - return 0; + if (!gf_uuid_is_null(local->loc2.pargfid)) + uuid_utoa_r(local->loc2.pargfid, dst); + else if (local->loc2.parent) + uuid_utoa_r(local->loc2.parent->gfid, dst); + else + dst[0] = '\0'; + + strcat(dst, local->loc2.name); + ret = strcmp(src, dst); + } + + if (ret <= 0) { + /*inodelk in dictionary order of hashed subvol names*/ + /*entrylk in dictionary order of gfid/basename */ + local->current = &local->lock[0]; + *loc = &local->loc; + *subvol = local->src_hashed; + + } else { + local->current = &local->lock[1]; + *loc = &local->loc2; + *subvol = local->dst_hashed; + } + + op_ret = 0; + +out: + return op_ret; +} + +int +dht_rename_dir(call_frame_t *frame, xlator_t *this) +{ + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + loc_t *loc = NULL; + xlator_t *subvol = NULL; + int i = 0; + int ret = 0; + int op_errno = -1; + + conf = frame->this->private; + local = frame->local; + + local->ret_cache = GF_CALLOC(conf->subvolume_cnt + 1, sizeof(int), + gf_dht_ret_cache_t); + + if (local->ret_cache == NULL) { + op_errno = ENOMEM; + goto err; + } + + local->call_cnt = conf->subvolume_cnt; + + for (i = 0; i < conf->subvolume_cnt; i++) { + if (!conf->subvolume_status[i]) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_RENAME_FAILED, + "Rename dir failed: subvolume down (%s)", + conf->subvolumes[i]->name); + op_errno = ENOTCONN; + goto err; + } + } + + /* Locks on src and dst needs to ordered which otherwise might cause + * deadlocks when rename (src, dst) and rename (dst, src) is done from + * two different clients + */ + ret = dht_order_rename_lock(frame, &loc, &subvol); + if (ret) { + op_errno = ENOMEM; + goto err; + } + + /* Rename must take locks on src to avoid lookup selfheal from + * recreating src on those subvols where the rename was successful. + * The locks can't be issued parallel as two different clients might + * attempt same rename command and be in dead lock. + */ + ret = dht_protect_namespace(frame, loc, subvol, &local->current->ns, + dht_rename_dir_lock1_cbk); + if (ret < 0) { + op_errno = EINVAL; + goto err; + } + + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, - NULL, NULL); - return 0; + DHT_STACK_UNWIND(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL, + NULL); + return 0; } +static int +dht_rename_track_for_changelog(xlator_t *this, dict_t *xattr, loc_t *oldloc, + loc_t *newloc) +{ + int ret = -1; + dht_changelog_rename_info_t *info = NULL; + char *name = NULL; + int len1 = 0; + int len2 = 0; + int size = 0; + + if (!xattr || !oldloc || !newloc || !this) + return ret; + + len1 = strlen(oldloc->name) + 1; + len2 = strlen(newloc->name) + 1; + size = sizeof(dht_changelog_rename_info_t) + len1 + len2; + + info = GF_CALLOC(size, sizeof(char), gf_common_mt_char); + if (!info) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to calloc memory"); + return ret; + } + + gf_uuid_copy(info->old_pargfid, oldloc->pargfid); + gf_uuid_copy(info->new_pargfid, newloc->pargfid); + + info->oldname_len = len1; + info->newname_len = len2; + strncpy(info->buffer, oldloc->name, len1); + name = info->buffer + len1; + strncpy(name, newloc->name, len2); + + ret = dict_set_bin(xattr, DHT_CHANGELOG_RENAME_OP_KEY, info, size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value: key = %s," + " path = %s", + DHT_CHANGELOG_RENAME_OP_KEY, oldloc->name); + GF_FREE(info); + } + + return ret; +} + +#define DHT_MARKER_DONT_ACCOUNT(xattr) \ + do { \ + int tmp = -1; \ + if (!xattr) { \ + xattr = dict_new(); \ + if (!xattr) \ + break; \ + } \ + tmp = dict_set_str(xattr, GLUSTERFS_MARKER_DONT_ACCOUNT_KEY, "yes"); \ + if (tmp) { \ + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, \ + "Failed to set dictionary value: key = %s," \ + " path = %s", \ + GLUSTERFS_MARKER_DONT_ACCOUNT_KEY, local->loc.path); \ + } \ + } while (0) + +#define DHT_CHANGELOG_TRACK_AS_RENAME(xattr, oldloc, newloc) \ + do { \ + int tmp = -1; \ + if (!xattr) { \ + xattr = dict_new(); \ + if (!xattr) { \ + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, \ + "Failed to create dictionary to " \ + "track rename"); \ + break; \ + } \ + } \ + \ + tmp = dht_rename_track_for_changelog(this, xattr, oldloc, newloc); \ + \ + if (tmp) { \ + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, \ + "Failed to set dictionary value: key = %s," \ + " path = %s", \ + DHT_CHANGELOG_RENAME_OP_KEY, (oldloc)->path); \ + } \ + } while (0) int -dht_rename_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +dht_rename_unlock(call_frame_t *frame, xlator_t *this) { - dht_local_t *local = NULL; - call_frame_t *prev = NULL; - int this_call_cnt = 0; + dht_local_t *local = NULL; + int op_ret = -1; + char src_gfid[GF_UUID_BUF_SIZE] = {0}; + char dst_gfid[GF_UUID_BUF_SIZE] = {0}; + dht_ilock_wrap_t inodelk_wrapper = { + 0, + }; + + local = frame->local; + inodelk_wrapper.locks = local->rename_inodelk_backward_compatible; + inodelk_wrapper.lk_count = local->rename_inodelk_bc_count; + + op_ret = dht_unlock_inodelk_wrapper(frame, &inodelk_wrapper); + if (op_ret < 0) { + uuid_utoa_r(local->loc.inode->gfid, src_gfid); + + if (local->loc2.inode) + uuid_utoa_r(local->loc2.inode->gfid, dst_gfid); + + if (IA_ISREG(local->stbuf.ia_type)) + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED, + "winding unlock inodelk failed " + "rename (%s:%s:%s %s:%s:%s), " + "stale locks left on bricks", + local->loc.path, src_gfid, local->src_cached->name, + local->loc2.path, dst_gfid, + local->dst_cached ? local->dst_cached->name : NULL); + else + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED, + "winding unlock inodelk failed " + "rename (%s:%s %s:%s), " + "stale locks left on bricks", + local->loc.path, src_gfid, local->loc2.path, dst_gfid); + } + + dht_unlock_namespace(frame, &local->lock[0]); + dht_unlock_namespace(frame, &local->lock[1]); + + dht_rename_unlock_cbk(frame, NULL, this, local->op_ret, local->op_errno, + NULL); + return 0; +} - local = frame->local; - prev = cookie; +int +dht_rename_done(call_frame_t *frame, xlator_t *this) +{ + dht_local_t *local = NULL; - if (!local) { - gf_log (this->name, GF_LOG_ERROR, - "!local, should not happen"); - goto out; - } + local = frame->local; - this_call_cnt = dht_frame_return (frame); + if (local->linked == _gf_true) { + local->linked = _gf_false; + dht_linkfile_attr_heal(frame, this); + } - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "%s: unlink on %s failed (%s)", - local->loc.path, prev->this->name, strerror (op_errno)); - } + dht_rename_unlock(frame, this); + return 0; +} - WIPE (&local->preoldparent); - WIPE (&local->postoldparent); - WIPE (&local->preparent); - WIPE (&local->postparent); - - if (is_last_call (this_call_cnt)) { - DHT_STRIP_PHASE1_FLAGS (&local->stbuf); - DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, - &local->stbuf, &local->preoldparent, - &local->postoldparent, &local->preparent, - &local->postparent, NULL); - } +int +dht_rename_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + dht_local_t *local = NULL; + xlator_t *prev = NULL; + int this_call_cnt = 0; + + local = frame->local; + prev = cookie; + + FRAME_SU_UNDO(frame, dht_local_t); + if (!local) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_VALUE, + "!local, should not happen"); + goto out; + } + + this_call_cnt = dht_frame_return(frame); + + if (op_ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLINK_FAILED, + "%s: Rename: unlink on %s failed ", local->loc.path, prev->name); + } + + WIPE(&local->preoldparent); + WIPE(&local->postoldparent); + WIPE(&local->preparent); + WIPE(&local->postparent); + + if (is_last_call(this_call_cnt)) { + dht_rename_done(frame, this); + } out: - return 0; + return 0; } - int -dht_rename_cleanup (call_frame_t *frame) +dht_rename_cleanup(call_frame_t *frame) { - dht_local_t *local = NULL; - xlator_t *this = NULL; - xlator_t *src_hashed = NULL; - xlator_t *src_cached = NULL; - xlator_t *dst_hashed = NULL; - xlator_t *dst_cached = NULL; - int call_cnt = 0; + dht_local_t *local = NULL; + xlator_t *this = NULL; + xlator_t *src_hashed = NULL; + xlator_t *src_cached = NULL; + xlator_t *dst_hashed = NULL; + xlator_t *dst_cached = NULL; + int call_cnt = 0; + dict_t *xattr = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; + local = frame->local; + this = frame->this; - local = frame->local; - this = frame->this; + src_hashed = local->src_hashed; + src_cached = local->src_cached; + dst_hashed = local->dst_hashed; + dst_cached = local->dst_cached; - src_hashed = local->src_hashed; - src_cached = local->src_cached; - dst_hashed = local->dst_hashed; - dst_cached = local->dst_cached; + if (src_cached == dst_cached) + goto nolinks; - if (src_cached == dst_cached) - goto nolinks; + if (local->linked && (dst_hashed != src_hashed) && + (dst_hashed != src_cached)) { + call_cnt++; + } - if (dst_hashed != src_hashed && dst_hashed != src_cached) - call_cnt++; + if (local->added_link && (src_cached != dst_hashed)) { + call_cnt++; + } - if (src_cached != dst_hashed) - call_cnt++; + local->call_cnt = call_cnt; - local->call_cnt = call_cnt; + if (!call_cnt) + goto nolinks; - if (!call_cnt) - goto nolinks; + DHT_MARK_FOP_INTERNAL(xattr); - if (dst_hashed != src_hashed && dst_hashed != src_cached) { - gf_log (this->name, GF_LOG_TRACE, - "unlinking linkfile %s @ %s => %s", - local->loc.path, dst_hashed->name, src_cached->name); - STACK_WIND (frame, dht_rename_unlink_cbk, - dst_hashed, dst_hashed->fops->unlink, - &local->loc, 0, NULL); - } + gf_uuid_unparse(local->loc.inode->gfid, gfid); - if (src_cached != dst_hashed) { - gf_log (this->name, GF_LOG_TRACE, - "unlinking link %s => %s (%s)", local->loc.path, - local->loc2.path, src_cached->name); - STACK_WIND (frame, dht_rename_unlink_cbk, - src_cached, src_cached->fops->unlink, - &local->loc2, 0, NULL); - } + if (local->linked && (dst_hashed != src_hashed) && + (dst_hashed != src_cached)) { + dict_t *xattr_new = NULL; - return 0; + gf_msg_trace(this->name, 0, + "unlinking linkfile %s @ %s => %s, (gfid = %s)", + local->loc.path, dst_hashed->name, src_cached->name, gfid); -nolinks: - WIPE (&local->preoldparent); - WIPE (&local->postoldparent); - WIPE (&local->preparent); - WIPE (&local->postparent); + xattr_new = dict_copy_with_ref(xattr, NULL); - DHT_STRIP_PHASE1_FLAGS (&local->stbuf); - DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, - &local->stbuf, &local->preoldparent, - &local->postoldparent, &local->preparent, - &local->postparent, NULL); + DHT_MARKER_DONT_ACCOUNT(xattr_new); - return 0; -} + FRAME_SU_DO(frame, dht_local_t); + STACK_WIND_COOKIE(frame, dht_rename_unlink_cbk, dst_hashed, dst_hashed, + dst_hashed->fops->unlink, &local->loc, 0, xattr_new); + dict_unref(xattr_new); + xattr_new = NULL; + } -int -dht_rename_links_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) -{ - call_frame_t *prev = NULL; - dht_local_t *local = NULL; - - prev = cookie; - local = frame->local; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "link/file %s on %s failed (%s)", - local->loc.path, prev->this->name, strerror (op_errno)); + if (local->added_link && (src_cached != dst_hashed)) { + dict_t *xattr_new = NULL; + + gf_msg_trace(this->name, 0, "unlinking link %s => %s (%s), (gfid = %s)", + local->loc.path, local->loc2.path, src_cached->name, gfid); + + xattr_new = dict_copy_with_ref(xattr, NULL); + + if (gf_uuid_compare(local->loc.pargfid, local->loc2.pargfid) == 0) { + DHT_MARKER_DONT_ACCOUNT(xattr_new); } + /* * + * The link to file is created using root permission. + * Hence deletion should happen using root. Otherwise + * it will fail. + */ + FRAME_SU_DO(frame, dht_local_t); + STACK_WIND_COOKIE(frame, dht_rename_unlink_cbk, src_cached, src_cached, + src_cached->fops->unlink, &local->loc2, 0, xattr_new); - DHT_STACK_DESTROY (frame); + dict_unref(xattr_new); + xattr_new = NULL; + } - return 0; -} + if (xattr) + dict_unref(xattr); + + return 0; + +nolinks: + WIPE(&local->preoldparent); + WIPE(&local->postoldparent); + WIPE(&local->preparent); + WIPE(&local->postparent); + dht_rename_unlock(frame, this); + return 0; +} int -dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *stbuf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent, - dict_t *xdata) -{ - dht_local_t *local = NULL; - call_frame_t *prev = NULL; - xlator_t *src_hashed = NULL; - xlator_t *src_cached = NULL; - xlator_t *dst_hashed = NULL; - xlator_t *dst_cached = NULL; - xlator_t *rename_subvol = NULL; - call_frame_t *link_frame = NULL; - dht_local_t *link_local = NULL; - - local = frame->local; - prev = cookie; - - src_hashed = local->src_hashed; - src_cached = local->src_cached; - dst_hashed = local->dst_hashed; - dst_cached = local->dst_cached; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "%s: rename on %s failed (%s)", local->loc.path, - prev->this->name, strerror (op_errno)); - local->op_ret = op_ret; - local->op_errno = op_errno; - goto cleanup; - } +dht_rename_unlink(call_frame_t *frame, xlator_t *this) +{ + dht_local_t *local = NULL; + xlator_t *src_hashed = NULL; + xlator_t *src_cached = NULL; + xlator_t *dst_hashed = NULL; + xlator_t *dst_cached = NULL; + xlator_t *rename_subvol = NULL; + dict_t *xattr = NULL; - if ((src_cached == dst_cached) && (dst_hashed != dst_cached)) { - link_frame = copy_frame (frame); - if (!link_frame) { - goto err; - } + local = frame->local; - /* fop value sent as maxvalue because it is not used - anywhere in this case */ - link_local = dht_local_init (link_frame, &local->loc2, NULL, - GF_FOP_MAXVALUE); - if (!link_local) { - goto err; - } + src_hashed = local->src_hashed; + src_cached = local->src_cached; + dst_hashed = local->dst_hashed; + dst_cached = local->dst_cached; - if (link_local->loc.inode) - inode_unref (link_local->loc.inode); - link_local->loc.inode = inode_ref (local->loc.inode); - uuid_copy (link_local->gfid, local->loc.inode->gfid); + local->call_cnt = 0; - dht_linkfile_create (link_frame, dht_rename_links_create_cbk, - src_cached, dst_hashed, &link_local->loc); - } + /* NOTE: rename_subvol is the same subvolume from which dht_rename_cbk + * is called. since rename has already happened on rename_subvol, + * unlink shouldn't be sent for oldpath (either linkfile or cached-file) + * on rename_subvol. */ + if (src_cached == dst_cached) + rename_subvol = src_cached; + else + rename_subvol = dst_hashed; -err: - dht_iatt_merge (this, &local->stbuf, stbuf, prev->this); - dht_iatt_merge (this, &local->preoldparent, preoldparent, prev->this); - dht_iatt_merge (this, &local->postoldparent, postoldparent, prev->this); - dht_iatt_merge (this, &local->preparent, prenewparent, prev->this); - dht_iatt_merge (this, &local->postparent, postnewparent, prev->this); - - /* NOTE: rename_subvol is the same subvolume from which dht_rename_cbk - * is called. since rename has already happened on rename_subvol, - * unlink should not be sent for oldpath (either linkfile or cached-file) - * on rename_subvol. */ - if (src_cached == dst_cached) - rename_subvol = src_cached; - else - rename_subvol = dst_hashed; + /* TODO: delete files in background */ - /* TODO: delete files in background */ + if (src_cached != dst_hashed && src_cached != dst_cached) + local->call_cnt++; - if (src_cached != dst_hashed && src_cached != dst_cached) - local->call_cnt++; + if (src_hashed != rename_subvol && src_hashed != src_cached) + local->call_cnt++; - if (src_hashed != rename_subvol && src_hashed != src_cached) - local->call_cnt++; + if (dst_cached && dst_cached != dst_hashed && dst_cached != src_cached) + local->call_cnt++; - if (dst_cached && dst_cached != dst_hashed && dst_cached != src_cached) - local->call_cnt++; + if (local->call_cnt == 0) + goto unwind; - if (local->call_cnt == 0) - goto unwind; + DHT_MARK_FOP_INTERNAL(xattr); - if (src_cached != dst_hashed && src_cached != dst_cached) { - gf_log (this->name, GF_LOG_TRACE, - "deleting old src datafile %s @ %s", - local->loc.path, src_cached->name); + if (src_cached != dst_hashed && src_cached != dst_cached) { + dict_t *xattr_new = NULL; - STACK_WIND (frame, dht_rename_unlink_cbk, - src_cached, src_cached->fops->unlink, - &local->loc, 0, NULL); - } + xattr_new = dict_copy_with_ref(xattr, NULL); - if (src_hashed != rename_subvol && src_hashed != src_cached) { - gf_log (this->name, GF_LOG_TRACE, - "deleting old src linkfile %s @ %s", - local->loc.path, src_hashed->name); + gf_msg_trace(this->name, 0, "deleting old src datafile %s @ %s", + local->loc.path, src_cached->name); - STACK_WIND (frame, dht_rename_unlink_cbk, - src_hashed, src_hashed->fops->unlink, - &local->loc, 0, NULL); + if (gf_uuid_compare(local->loc.pargfid, local->loc2.pargfid) == 0) { + DHT_MARKER_DONT_ACCOUNT(xattr_new); } - if (dst_cached - && (dst_cached != dst_hashed) - && (dst_cached != src_cached)) { - gf_log (this->name, GF_LOG_TRACE, - "deleting old dst datafile %s @ %s", - local->loc2.path, dst_cached->name); + DHT_CHANGELOG_TRACK_AS_RENAME(xattr_new, &local->loc, &local->loc2); + STACK_WIND_COOKIE(frame, dht_rename_unlink_cbk, src_cached, src_cached, + src_cached->fops->unlink, &local->loc, 0, xattr_new); - STACK_WIND (frame, dht_rename_unlink_cbk, - dst_cached, dst_cached->fops->unlink, - &local->loc2, 0, NULL); - } - return 0; + dict_unref(xattr_new); + xattr_new = NULL; + } -unwind: - WIPE (&local->preoldparent); - WIPE (&local->postoldparent); - WIPE (&local->preparent); - WIPE (&local->postparent); + if (src_hashed != rename_subvol && src_hashed != src_cached) { + dict_t *xattr_new = NULL; - DHT_STRIP_PHASE1_FLAGS (&local->stbuf); - DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, - &local->stbuf, &local->preoldparent, - &local->postoldparent, &local->preparent, - &local->postparent, NULL); + xattr_new = dict_copy_with_ref(xattr, NULL); - return 0; + gf_msg_trace(this->name, 0, "deleting old src linkfile %s @ %s", + local->loc.path, src_hashed->name); -cleanup: - dht_rename_cleanup (frame); + DHT_MARKER_DONT_ACCOUNT(xattr_new); - return 0; + STACK_WIND_COOKIE(frame, dht_rename_unlink_cbk, src_hashed, src_hashed, + src_hashed->fops->unlink, &local->loc, 0, xattr_new); + + dict_unref(xattr_new); + xattr_new = NULL; + } + + if (dst_cached && (dst_cached != dst_hashed) && + (dst_cached != src_cached)) { + gf_msg_trace(this->name, 0, "deleting old dst datafile %s @ %s", + local->loc2.path, dst_cached->name); + + STACK_WIND_COOKIE(frame, dht_rename_unlink_cbk, dst_cached, dst_cached, + dst_cached->fops->unlink, &local->loc2, 0, xattr); + } + if (xattr) + dict_unref(xattr); + return 0; + +unwind: + WIPE(&local->preoldparent); + WIPE(&local->postoldparent); + WIPE(&local->preparent); + WIPE(&local->postparent); + + dht_rename_done(frame, this); + + return 0; } +int +dht_rename_links_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + xlator_t *prev = NULL; + dht_local_t *local = NULL; + call_frame_t *main_frame = NULL; + + prev = cookie; + local = frame->local; + main_frame = local->main_frame; + + /* TODO: Handle this case in lookup-optimize */ + if (op_ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_CREATE_LINK_FAILED, + "link/file %s on %s failed", local->loc.path, prev->name); + } + + if (local->linked == _gf_true) { + local->linked = _gf_false; + dht_linkfile_attr_heal(frame, this); + } + + dht_rename_unlink(main_frame, this); + DHT_STACK_DESTROY(frame); + return 0; +} int -dht_do_rename (call_frame_t *frame) +dht_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *stbuf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *dst_hashed = NULL; - xlator_t *src_cached = NULL; - xlator_t *dst_cached = NULL; - xlator_t *this = NULL; - xlator_t *rename_subvol = NULL; + dht_local_t *local = NULL; + xlator_t *prev = NULL; + xlator_t *src_cached = NULL; + xlator_t *dst_hashed = NULL; + xlator_t *dst_cached = NULL; + call_frame_t *link_frame = NULL; + dht_local_t *link_local = NULL; + + local = frame->local; + prev = cookie; + + src_cached = local->src_cached; + dst_hashed = local->dst_hashed; + dst_cached = local->dst_cached; + + if (local->linked == _gf_true) + FRAME_SU_UNDO(frame, dht_local_t); + + /* It is a critical failure iff we fail to rename the cached file + * if the rename of the linkto failed, it is not a critical failure, + * and we do not want to lose the created hard link for the new + * name as that could have been read by other clients. + * + * NOTE: If another client is attempting the same oldname -> newname + * rename, and finds both file names as existing, and are hard links + * to each other, then FUSE would send in an unlink for oldname. In + * this time duration if we treat the linkto as a critical error and + * unlink the newname we created, we would have effectively lost the + * file to rename operations. + * + * Repercussions of treating this as a non-critical error is that + * we could leave behind a stale linkto file and/or not create the new + * linkto file, the second case would be rectified by a subsequent + * lookup, the first case by a rebalance, like for all stale linkto + * files */ + + if (op_ret == -1) { + /* Critical failure: unable to rename the cached file */ + if (prev == src_cached) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_RENAME_FAILED, + "%s: Rename on %s failed, (gfid = %s) ", local->loc.path, + prev->name, + local->loc.inode ? uuid_utoa(local->loc.inode->gfid) : ""); + local->op_ret = op_ret; + local->op_errno = op_errno; + goto cleanup; + } else { + /* Non-critical failure, unable to rename the linkto + * file + */ + gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_RENAME_FAILED, + "%s: Rename (linkto file) on %s failed, " + "(gfid = %s) ", + local->loc.path, prev->name, + local->loc.inode ? uuid_utoa(local->loc.inode->gfid) : ""); + } + } + if (xdata) { + if (!local->xattr) + local->xattr = dict_ref(xdata); + else + local->xattr = dict_copy_with_ref(xdata, local->xattr); + } + + /* Merge attrs only from src_cached. In case there of src_cached != + * dst_hashed, this ignores linkfile attrs. */ + if (prev == src_cached) { + dht_iatt_merge(this, &local->stbuf, stbuf); + dht_iatt_merge(this, &local->preoldparent, preoldparent); + dht_iatt_merge(this, &local->postoldparent, postoldparent); + dht_iatt_merge(this, &local->preparent, prenewparent); + dht_iatt_merge(this, &local->postparent, postnewparent); + } + + /* Create the linkto file for the dst file */ + if ((src_cached == dst_cached) && (dst_hashed != dst_cached)) { + link_frame = copy_frame(frame); + if (!link_frame) { + goto unlink; + } + /* fop value sent as maxvalue because it is not used + * anywhere in this case */ + link_local = dht_local_init(link_frame, &local->loc2, NULL, + GF_FOP_MAXVALUE); + if (!link_local) { + goto unlink; + } - local = frame->local; - this = frame->this; + if (link_local->loc.inode) + inode_unref(link_local->loc.inode); + link_local->loc.inode = inode_ref(local->loc.inode); + link_local->main_frame = frame; + link_local->stbuf = local->stbuf; + gf_uuid_copy(link_local->gfid, local->loc.inode->gfid); - dst_hashed = local->dst_hashed; - dst_cached = local->dst_cached; - src_cached = local->src_cached; + dht_linkfile_create(link_frame, dht_rename_links_create_cbk, this, + src_cached, dst_hashed, &link_local->loc); + return 0; + } - if (src_cached == dst_cached) - rename_subvol = src_cached; - else - rename_subvol = dst_hashed; +unlink: - gf_log (this->name, GF_LOG_TRACE, - "renaming %s => %s (%s)", - local->loc.path, local->loc2.path, rename_subvol->name); + if (link_frame) { + DHT_STACK_DESTROY(link_frame); + } + dht_rename_unlink(frame, this); + return 0; - STACK_WIND (frame, dht_rename_cbk, - rename_subvol, rename_subvol->fops->rename, - &local->loc, &local->loc2, NULL); +cleanup: + dht_rename_cleanup(frame); - return 0; + return 0; } +int +dht_do_rename(call_frame_t *frame) +{ + dht_local_t *local = NULL; + xlator_t *dst_hashed = NULL; + xlator_t *src_cached = NULL; + xlator_t *dst_cached = NULL; + xlator_t *this = NULL; + xlator_t *rename_subvol = NULL; + + local = frame->local; + this = frame->this; + + dst_hashed = local->dst_hashed; + dst_cached = local->dst_cached; + src_cached = local->src_cached; + + if (src_cached == dst_cached) + rename_subvol = src_cached; + else + rename_subvol = dst_hashed; + + if ((src_cached != dst_hashed) && (rename_subvol == dst_hashed)) { + DHT_MARKER_DONT_ACCOUNT(local->xattr_req); + } + + if (rename_subvol == src_cached) { + DHT_CHANGELOG_TRACK_AS_RENAME(local->xattr_req, &local->loc, + &local->loc2); + } + + gf_msg_trace(this->name, 0, "renaming %s => %s (%s)", local->loc.path, + local->loc2.path, rename_subvol->name); + + if (local->linked == _gf_true) + FRAME_SU_DO(frame, dht_local_t); + STACK_WIND_COOKIE(frame, dht_rename_cbk, rename_subvol, rename_subvol, + rename_subvol->fops->rename, &local->loc, &local->loc2, + local->xattr_req); + return 0; +} int -dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) -{ - dht_local_t *local = NULL; - call_frame_t *prev = NULL; - int this_call_cnt = 0; - - - local = frame->local; - prev = cookie; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "link/file on %s failed (%s)", - prev->this->name, strerror (op_errno)); - local->op_ret = -1; - if (op_errno != ENOENT) - local->op_errno = op_errno; - } +dht_rename_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + dht_local_t *local = NULL; + xlator_t *prev = NULL; - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { - if (local->op_ret == -1) - goto cleanup; + local = frame->local; + prev = cookie; - dht_do_rename (frame); - } + if (op_ret == -1) { + gf_msg_debug(this->name, 0, "link/file on %s failed (%s)", prev->name, + strerror(op_errno)); + local->op_ret = -1; + local->op_errno = op_errno; + local->added_link = _gf_false; + } else + dht_iatt_merge(this, &local->stbuf, stbuf); - return 0; + if (local->op_ret == -1) + goto cleanup; + + dht_do_rename(frame); + + return 0; cleanup: - dht_rename_cleanup (frame); + dht_rename_cleanup(frame); - return 0; + return 0; } - int -dht_rename_unlink_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) +dht_rename_linkto_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - dht_local_t *local = NULL; - call_frame_t *prev = NULL; + dht_local_t *local = NULL; + xlator_t *prev = NULL; + xlator_t *src_cached = NULL; + dict_t *xattr = NULL; + local = frame->local; + DHT_MARK_FOP_INTERNAL(xattr); + prev = cookie; + src_cached = local->src_cached; - local = frame->local; - prev = cookie; + if (op_ret == -1) { + gf_msg_debug(this->name, 0, "link/file on %s failed (%s)", prev->name, + strerror(op_errno)); + local->op_ret = -1; + local->op_errno = op_errno; + } - if ((op_ret == -1) && (op_errno != ENOENT)) { - gf_log (this->name, GF_LOG_DEBUG, - "unlink of %s on %s failed (%s)", - local->loc2.path, prev->this->name, - strerror (op_errno)); - local->op_ret = -1; - local->op_errno = op_errno; - } + /* If linkto creation failed move to failure cleanup code, + * instead of continuing with creating the link file */ + if (local->op_ret != 0) { + goto cleanup; + } - if (local->op_ret == -1) - goto cleanup; + gf_msg_trace(this->name, 0, "link %s => %s (%s)", local->loc.path, + local->loc2.path, src_cached->name); + if (gf_uuid_compare(local->loc.pargfid, local->loc2.pargfid) == 0) { + DHT_MARKER_DONT_ACCOUNT(xattr); + } - dht_do_rename (frame); + local->added_link = _gf_true; - return 0; + STACK_WIND_COOKIE(frame, dht_rename_link_cbk, src_cached, src_cached, + src_cached->fops->link, &local->loc, &local->loc2, xattr); + + if (xattr) + dict_unref(xattr); + + return 0; cleanup: - dht_rename_cleanup (frame); + dht_rename_cleanup(frame); + + if (xattr) + dict_unref(xattr); - return 0; + return 0; } +int +dht_rename_unlink_links_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + dht_local_t *local = NULL; + xlator_t *prev = NULL; + + local = frame->local; + prev = cookie; + + if ((op_ret == -1) && (op_errno != ENOENT)) { + gf_msg_debug(this->name, 0, "unlink of %s on %s failed (%s)", + local->loc2.path, prev->name, strerror(op_errno)); + local->op_ret = -1; + local->op_errno = op_errno; + } + + if (local->op_ret == -1) + goto cleanup; + + dht_do_rename(frame); + + return 0; + +cleanup: + dht_rename_cleanup(frame); + + return 0; +} int -dht_rename_create_links (call_frame_t *frame) +dht_rename_create_links(call_frame_t *frame) { - dht_local_t *local = NULL; - xlator_t *this = NULL; - xlator_t *src_hashed = NULL; - xlator_t *src_cached = NULL; - xlator_t *dst_hashed = NULL; - xlator_t *dst_cached = NULL; - int call_cnt = 0; + dht_local_t *local = NULL; + xlator_t *this = NULL; + xlator_t *src_hashed = NULL; + xlator_t *src_cached = NULL; + xlator_t *dst_hashed = NULL; + xlator_t *dst_cached = NULL; + int call_cnt = 0; + dict_t *xattr = NULL; + local = frame->local; + this = frame->this; - local = frame->local; - this = frame->this; + src_hashed = local->src_hashed; + src_cached = local->src_cached; + dst_hashed = local->dst_hashed; + dst_cached = local->dst_cached; - src_hashed = local->src_hashed; - src_cached = local->src_cached; - dst_hashed = local->dst_hashed; - dst_cached = local->dst_cached; + DHT_MARK_FOP_INTERNAL(xattr); + if (src_cached == dst_cached) { + dict_t *xattr_new = NULL; - if (src_cached == dst_cached) { - if (dst_hashed == dst_cached) - goto nolinks; + if (dst_hashed == dst_cached) + goto nolinks; - gf_log (this->name, GF_LOG_TRACE, - "unlinking dst linkfile %s @ %s", - local->loc2.path, dst_hashed->name); + xattr_new = dict_copy_with_ref(xattr, NULL); - STACK_WIND (frame, dht_rename_unlink_links_cbk, - dst_hashed, dst_hashed->fops->unlink, - &local->loc2, 0, NULL); - return 0; - } + gf_msg_trace(this->name, 0, "unlinking dst linkfile %s @ %s", + local->loc2.path, dst_hashed->name); - if (dst_hashed != src_hashed && dst_hashed != src_cached) - call_cnt++; + DHT_MARKER_DONT_ACCOUNT(xattr_new); - if (src_cached != dst_hashed) - call_cnt++; + STACK_WIND_COOKIE(frame, dht_rename_unlink_links_cbk, dst_hashed, + dst_hashed, dst_hashed->fops->unlink, &local->loc2, 0, + xattr_new); - local->call_cnt = call_cnt; + dict_unref(xattr_new); + if (xattr) + dict_unref(xattr); - if (dst_hashed != src_hashed && dst_hashed != src_cached) { - gf_log (this->name, GF_LOG_TRACE, - "linkfile %s @ %s => %s", - local->loc.path, dst_hashed->name, src_cached->name); - memcpy (local->gfid, local->loc.inode->gfid, 16); - dht_linkfile_create (frame, dht_rename_links_cbk, - src_cached, dst_hashed, &local->loc); - } + return 0; + } + + if (src_cached != dst_hashed) { + /* needed to create the link file */ + call_cnt++; + if (dst_hashed != src_hashed) + /* needed to create the linkto file */ + call_cnt++; + } + + /* We should not have any failures post the link creation, as this + * introduces the newname into the namespace. Clients could have cached + * the existence of the newname and may start taking actions based on + * the same. Hence create the linkto first, and then attempt the link. + * + * NOTE: If another client is attempting the same oldname -> newname + * rename, and finds both file names as existing, and are hard links + * to each other, then FUSE would send in an unlink for oldname. In + * this time duration if we treat the linkto as a critical error and + * unlink the newname we created, we would have effectively lost the + * file to rename operations. */ + if (dst_hashed != src_hashed && src_cached != dst_hashed) { + gf_msg_trace(this->name, 0, "linkfile %s @ %s => %s", local->loc.path, + dst_hashed->name, src_cached->name); + + memcpy(local->gfid, local->loc.inode->gfid, 16); + dht_linkfile_create(frame, dht_rename_linkto_cbk, this, src_cached, + dst_hashed, &local->loc); + } else if (src_cached != dst_hashed) { + dict_t *xattr_new = NULL; + + xattr_new = dict_copy_with_ref(xattr, NULL); + + gf_msg_trace(this->name, 0, "link %s => %s (%s)", local->loc.path, + local->loc2.path, src_cached->name); + if (gf_uuid_compare(local->loc.pargfid, local->loc2.pargfid) == 0) { + DHT_MARKER_DONT_ACCOUNT(xattr_new); + } - if (src_cached != dst_hashed) { - gf_log (this->name, GF_LOG_TRACE, - "link %s => %s (%s)", local->loc.path, - local->loc2.path, src_cached->name); - STACK_WIND (frame, dht_rename_links_cbk, - src_cached, src_cached->fops->link, - &local->loc, &local->loc2, NULL); - } + local->added_link = _gf_true; + + STACK_WIND_COOKIE(frame, dht_rename_link_cbk, src_cached, src_cached, + src_cached->fops->link, &local->loc, &local->loc2, + xattr_new); + + dict_unref(xattr_new); + } nolinks: - if (!call_cnt) { - /* skip to next step */ - dht_do_rename (frame); + if (!call_cnt) { + /* skip to next step */ + dht_do_rename(frame); + } + if (xattr) + dict_unref(xattr); + + return 0; +} + +int +dht_rename_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *stbuf, dict_t *xattr, + struct iatt *postparent) +{ + dht_local_t *local = NULL; + int call_cnt = 0; + dht_conf_t *conf = NULL; + char gfid_local[GF_UUID_BUF_SIZE] = {0}; + char gfid_server[GF_UUID_BUF_SIZE] = {0}; + int child_index = -1; + gf_boolean_t is_src = _gf_false; + loc_t *loc = NULL; + + child_index = (long)cookie; + + local = frame->local; + conf = this->private; + + is_src = (child_index == 0); + if (is_src) + loc = &local->loc; + else + loc = &local->loc2; + + if (op_ret >= 0) { + if (is_src) + local->src_cached = dht_subvol_get_cached(this, local->loc.inode); + else { + if (loc->inode) + gf_uuid_unparse(loc->inode->gfid, gfid_local); + + gf_msg_debug(this->name, 0, + "dst_cached before lookup: %s, " + "(path:%s)(gfid:%s),", + local->loc2.path, + local->dst_cached ? local->dst_cached->name : NULL, + local->dst_cached ? gfid_local : NULL); + + local->dst_cached = dht_subvol_get_cached(this, + local->loc2_copy.inode); + + gf_uuid_unparse(stbuf->ia_gfid, gfid_local); + + gf_msg_debug(this->name, GF_LOG_WARNING, + "dst_cached after lookup: %s, " + "(path:%s)(gfid:%s)", + local->loc2.path, + local->dst_cached ? local->dst_cached->name : NULL, + local->dst_cached ? gfid_local : NULL); + + if ((local->loc2.inode == NULL) || + gf_uuid_compare(stbuf->ia_gfid, local->loc2.inode->gfid)) { + if (local->loc2.inode != NULL) { + inode_unlink(local->loc2.inode, local->loc2.parent, + local->loc2.name); + inode_unref(local->loc2.inode); + } + + local->loc2.inode = inode_link(local->loc2_copy.inode, + local->loc2_copy.parent, + local->loc2_copy.name, stbuf); + gf_uuid_copy(local->loc2.gfid, stbuf->ia_gfid); + } + } + } + + if (op_ret < 0) { + if (is_src) { + /* The meaning of is_linkfile is overloaded here. For locking + * to work properly both rebalance and rename should acquire + * lock on datafile. The reason for sending this lookup is to + * find out whether we've acquired a lock on data file. + * Between the lookup before rename and this rename, the + * file could be migrated by a rebalance process and now this + * file this might be a linkto file. We verify that by sending + * this lookup. However, if this lookup fails we cannot really + * say whether we've acquired lock on a datafile or linkto file. + * So, we act conservatively and _assume_ + * that this is a linkfile and fail the rename operation. + */ + local->is_linkfile = _gf_true; + local->op_errno = op_errno; + } else { + if (local->dst_cached) + gf_msg_debug(this->name, op_errno, + "file %s (gfid:%s) was present " + "(hashed-subvol=%s, " + "cached-subvol=%s) before rename," + " but lookup failed", + local->loc2.path, + uuid_utoa(local->loc2.inode->gfid), + local->dst_hashed->name, local->dst_cached->name); + if (dht_inode_missing(op_errno)) + local->dst_cached = NULL; + } + } else if (is_src && xattr && + check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name)) { + local->is_linkfile = _gf_true; + /* Found linkto file instead of data file, passdown ENOENT + * based on the above comment */ + local->op_errno = ENOENT; + } + + if (!local->is_linkfile && (op_ret >= 0) && + gf_uuid_compare(loc->gfid, stbuf->ia_gfid)) { + gf_uuid_unparse(loc->gfid, gfid_local); + gf_uuid_unparse(stbuf->ia_gfid, gfid_server); + + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_MISMATCH, + "path:%s, received a different gfid, local_gfid= %s" + " server_gfid: %s", + local->loc.path, gfid_local, gfid_server); + + /* Will passdown ENOENT anyway since the file we sent on + * rename is replaced with a different file */ + local->op_errno = ENOENT; + /* Since local->is_linkfile is used here to detect failure, + * marking this to true */ + local->is_linkfile = _gf_true; + } + + call_cnt = dht_frame_return(frame); + if (is_last_call(call_cnt)) { + if (local->is_linkfile) { + local->op_ret = -1; + goto fail; } - return 0; -} + dht_rename_create_links(frame); + } + return 0; +fail: + dht_rename_unlock(frame, this); + return 0; +} int -dht_rename (call_frame_t *frame, xlator_t *this, - loc_t *oldloc, loc_t *newloc, dict_t *xdata) -{ - xlator_t *src_cached = NULL; - xlator_t *src_hashed = NULL; - xlator_t *dst_cached = NULL; - xlator_t *dst_hashed = NULL; - int op_errno = -1; - int ret = -1; - dht_local_t *local = NULL; - - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (oldloc, err); - VALIDATE_OR_GOTO (newloc, err); - - src_hashed = dht_subvol_get_hashed (this, oldloc); - if (!src_hashed) { - gf_log (this->name, GF_LOG_INFO, - "no subvolume in layout for path=%s", - oldloc->path); - op_errno = EINVAL; - goto err; - } +dht_rename_file_lock1_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + dht_local_t *local = NULL; + char src_gfid[GF_UUID_BUF_SIZE] = {0}; + char dst_gfid[GF_UUID_BUF_SIZE] = {0}; + int ret = 0; + loc_t *loc = NULL; + xlator_t *subvol = NULL; + + local = frame->local; + + if (op_ret < 0) { + uuid_utoa_r(local->loc.inode->gfid, src_gfid); + + if (local->loc2.inode) + uuid_utoa_r(local->loc2.inode->gfid, dst_gfid); + + gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR, + "protecting namespace of %s failed" + "rename (%s:%s:%s %s:%s:%s)", + local->current == &local->lock[0] ? local->loc.path + : local->loc2.path, + local->loc.path, src_gfid, local->src_hashed->name, + local->loc2.path, dst_gfid, + local->dst_hashed ? local->dst_hashed->name : NULL); + + local->op_ret = -1; + local->op_errno = op_errno; + goto err; + } + + if (local->current == &local->lock[0]) { + loc = &local->loc2; + subvol = local->dst_hashed; + local->current = &local->lock[1]; + } else { + loc = &local->loc; + subvol = local->src_hashed; + local->current = &local->lock[0]; + } + + ret = dht_protect_namespace(frame, loc, subvol, &local->current->ns, + dht_rename_lock_cbk); + if (ret < 0) { + op_errno = EINVAL; + goto err; + } + + return 0; +err: + /* No harm in calling an extra unlock */ + dht_rename_unlock(frame, this); + return 0; +} - src_cached = dht_subvol_get_cached (this, oldloc->inode); - if (!src_cached) { - gf_log (this->name, GF_LOG_INFO, - "no cached subvolume for path=%s", oldloc->path); - op_errno = EINVAL; - goto err; - } +int32_t +dht_rename_file_protect_namespace(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) +{ + dht_local_t *local = NULL; + char src_gfid[GF_UUID_BUF_SIZE] = {0}; + char dst_gfid[GF_UUID_BUF_SIZE] = {0}; + int ret = 0; + loc_t *loc = NULL; + xlator_t *subvol = NULL; + + local = frame->local; + + if (op_ret < 0) { + uuid_utoa_r(local->loc.inode->gfid, src_gfid); + + if (local->loc2.inode) + uuid_utoa_r(local->loc2.inode->gfid, dst_gfid); + + gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR, + "acquiring inodelk failed " + "rename (%s:%s:%s %s:%s:%s)", + local->loc.path, src_gfid, local->src_cached->name, + local->loc2.path, dst_gfid, + local->dst_cached ? local->dst_cached->name : NULL); + + local->op_ret = -1; + local->op_errno = op_errno; + + goto err; + } + + /* Locks on src and dst needs to ordered which otherwise might cause + * deadlocks when rename (src, dst) and rename (dst, src) is done from + * two different clients + */ + ret = dht_order_rename_lock(frame, &loc, &subvol); + if (ret) { + local->op_errno = ENOMEM; + goto err; + } + + ret = dht_protect_namespace(frame, loc, subvol, &local->current->ns, + dht_rename_file_lock1_cbk); + if (ret < 0) { + op_errno = EINVAL; + goto err; + } + + return 0; - dst_hashed = dht_subvol_get_hashed (this, newloc); - if (!dst_hashed) { - gf_log (this->name, GF_LOG_INFO, - "no subvolume in layout for path=%s", - newloc->path); - op_errno = EINVAL; - goto err; - } +err: + /* Its fine to call unlock even when no locks are acquired, as we check + * for lock->locked before winding a unlock call. + */ + dht_rename_unlock(frame, this); - if (newloc->inode) - dst_cached = dht_subvol_get_cached (this, newloc->inode); + return 0; +} - local = dht_local_init (frame, oldloc, NULL, GF_FOP_RENAME); - if (!local) { - op_errno = ENOMEM; - goto err; - } - /* cached_subvol will be set from dht_local_init, reset it to NULL, - as the logic of handling rename is different */ - local->cached_subvol = NULL; - - ret = loc_copy (&local->loc2, newloc); - if (ret == -1) { - op_errno = ENOMEM; - goto err; +int32_t +dht_rename_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + dht_local_t *local = NULL; + char src_gfid[GF_UUID_BUF_SIZE] = {0}; + char dst_gfid[GF_UUID_BUF_SIZE] = {0}; + dict_t *xattr_req = NULL; + dht_conf_t *conf = NULL; + int i = 0; + xlator_t *subvol = NULL; + dht_lock_t *lock = NULL; + + local = frame->local; + conf = this->private; + + if (op_ret < 0) { + uuid_utoa_r(local->loc.inode->gfid, src_gfid); + + if (local->loc2.inode) + uuid_utoa_r(local->loc2.inode->gfid, dst_gfid); + + gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR, + "protecting namespace of %s failed. " + "rename (%s:%s:%s %s:%s:%s)", + local->current == &local->lock[0] ? local->loc.path + : local->loc2.path, + local->loc.path, src_gfid, local->src_hashed->name, + local->loc2.path, dst_gfid, + local->dst_hashed ? local->dst_hashed->name : NULL); + + local->op_ret = -1; + local->op_errno = op_errno; + + goto done; + } + + xattr_req = dict_new(); + if (xattr_req == NULL) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto done; + } + + op_ret = dict_set_uint32(xattr_req, conf->link_xattr_name, 256); + if (op_ret < 0) { + local->op_ret = -1; + local->op_errno = -op_ret; + goto done; + } + + /* dst_cached might've changed. This normally happens for two reasons: + * 1. rebalance migrated dst + * 2. Another parallel rename was done overwriting dst + * + * Doing a lookup on local->loc2 when dst exists, but is associated + * with a different gfid will result in an ESTALE error. So, do a fresh + * lookup with a new inode on dst-path and handle change of dst-cached + * in the cbk. Also, to identify dst-cached changes we do a lookup on + * "this" rather than the subvol. + */ + loc_copy(&local->loc2_copy, &local->loc2); + inode_unref(local->loc2_copy.inode); + local->loc2_copy.inode = inode_new(local->loc.inode->table); + + /* Why not use local->lock.locks[?].loc for lookup post lock phase + * --------------------------------------------------------------- + * "layout.parent_layout.locks[?].loc" does not have the name and pargfid + * populated. + * Reason: If we had populated the name and pargfid, server might + * resolve to a successful lookup even if there is a file with same name + * with a different gfid(unlink & create) as server does name based + * resolution on first priority. And this can result in operating on a + * different inode entirely. + * + * Now consider a scenario where source file was renamed by some other + * client to a new name just before this lock was granted. So if a + * lookup would be done on local->lock[0].layout.parent_layout.locks[?].loc, + * server will send success even if the entry was renamed (since server will + * do a gfid based resolution). So once a lock is granted, make sure the + * file exists with the name that the client requested with. + * */ + + local->call_cnt = 2; + for (i = 0; i < 2; i++) { + if (i == 0) { + lock = local->rename_inodelk_backward_compatible[0]; + if (gf_uuid_compare(local->loc.gfid, lock->loc.gfid) == 0) + subvol = lock->xl; + else { + lock = local->rename_inodelk_backward_compatible[1]; + subvol = lock->xl; + } + } else { + subvol = this; } - local->src_hashed = src_hashed; - local->src_cached = src_cached; - local->dst_hashed = dst_hashed; - local->dst_cached = dst_cached; + STACK_WIND_COOKIE(frame, dht_rename_lookup_cbk, (void *)(long)i, subvol, + subvol->fops->lookup, + (i == 0) ? &local->loc : &local->loc2_copy, + xattr_req); + } - gf_log (this->name, GF_LOG_TRACE, - "renaming %s (hash=%s/cache=%s) => %s (hash=%s/cache=%s)", - oldloc->path, src_hashed->name, src_cached->name, - newloc->path, dst_hashed->name, - dst_cached ? dst_cached->name : "<nul>"); + dict_unref(xattr_req); + return 0; - if (IA_ISDIR (oldloc->inode->ia_type)) { - dht_rename_dir (frame, this); - } else { - local->op_ret = 0; - dht_rename_create_links (frame); +done: + /* Its fine to call unlock even when no locks are acquired, as we check + * for lock->locked before winding a unlock call. + */ + dht_rename_unlock(frame, this); + + if (xattr_req) + dict_unref(xattr_req); + + return 0; +} + +int +dht_rename_lock(call_frame_t *frame) +{ + dht_local_t *local = NULL; + int count = 1, ret = -1; + dht_lock_t **lk_array = NULL; + + local = frame->local; + + if (local->dst_cached) + count++; + + lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer); + if (lk_array == NULL) + goto err; + + lk_array[0] = dht_lock_new(frame->this, local->src_cached, &local->loc, + F_WRLCK, DHT_FILE_MIGRATE_DOMAIN, NULL, + FAIL_ON_ANY_ERROR); + if (lk_array[0] == NULL) + goto err; + + if (local->dst_cached) { + /* dst might be removed by the time inodelk reaches bricks, + * which can result in ESTALE errors. POSIX imposes no + * restriction for dst to be present for renames to be + * successful. So, we'll ignore ESTALE errors. As far as + * synchronization on dst goes, we'll achieve the same by + * holding entrylk on parent directory of dst in the namespace + * of basename(dst). Also, there might not be quorum in cluster + * xlators like EC/disperse on errno, in which case they return + * EIO. For eg., in a disperse (4 + 2), 3 might return success + * and three might return ESTALE. Disperse, having no Quorum + * unwinds inodelk with EIO. So, ignore EIO too. + */ + lk_array[1] = dht_lock_new(frame->this, local->dst_cached, &local->loc2, + F_WRLCK, DHT_FILE_MIGRATE_DOMAIN, NULL, + IGNORE_ENOENT_ESTALE_EIO); + if (lk_array[1] == NULL) + goto err; + } + + local->rename_inodelk_backward_compatible = lk_array; + local->rename_inodelk_bc_count = count; + + /* retaining inodelks for the sake of backward compatibility. Please + * make sure to remove this inodelk once all of 3.10, 3.12 and 3.13 + * reach EOL. Better way of getting synchronization would be to acquire + * entrylks on src and dst parent directories in the namespace of + * basenames of src and dst + */ + ret = dht_blocking_inodelk(frame, lk_array, count, + dht_rename_file_protect_namespace); + if (ret < 0) { + local->rename_inodelk_backward_compatible = NULL; + local->rename_inodelk_bc_count = 0; + goto err; + } + + return 0; +err: + if (lk_array != NULL) { + int tmp_count = 0, i = 0; + + for (i = 0; (i < count) && (lk_array[i]); i++, tmp_count++) + ; + + dht_lock_array_free(lk_array, tmp_count); + GF_FREE(lk_array); + } + + return -1; +} + +int +dht_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) +{ + xlator_t *src_cached = NULL; + xlator_t *src_hashed = NULL; + xlator_t *dst_cached = NULL; + xlator_t *dst_hashed = NULL; + int op_errno = -1; + int ret = -1; + dht_local_t *local = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; + char newgfid[GF_UUID_BUF_SIZE] = {0}; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(oldloc, err); + VALIDATE_OR_GOTO(newloc, err); + + gf_uuid_unparse(oldloc->inode->gfid, gfid); + + src_hashed = dht_subvol_get_hashed(this, oldloc); + if (!src_hashed) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_RENAME_FAILED, + "No hashed subvolume in layout for path=%s," + "(gfid = %s)", + oldloc->path, gfid); + op_errno = EINVAL; + goto err; + } + + src_cached = dht_subvol_get_cached(this, oldloc->inode); + if (!src_cached) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_RENAME_FAILED, + "No cached subvolume for path = %s," + "(gfid = %s)", + oldloc->path, gfid); + + op_errno = EINVAL; + goto err; + } + + dst_hashed = dht_subvol_get_hashed(this, newloc); + if (!dst_hashed) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_RENAME_FAILED, + "No hashed subvolume in layout for path=%s", newloc->path); + op_errno = EINVAL; + goto err; + } + + if (newloc->inode) + dst_cached = dht_subvol_get_cached(this, newloc->inode); + + local = dht_local_init(frame, oldloc, NULL, GF_FOP_RENAME); + if (!local) { + op_errno = ENOMEM; + goto err; + } + /* cached_subvol will be set from dht_local_init, reset it to NULL, + as the logic of handling rename is different */ + local->cached_subvol = NULL; + + ret = loc_copy(&local->loc2, newloc); + if (ret == -1) { + op_errno = ENOMEM; + goto err; + } + + local->src_hashed = src_hashed; + local->src_cached = src_cached; + local->dst_hashed = dst_hashed; + local->dst_cached = dst_cached; + if (xdata) + local->xattr_req = dict_ref(xdata); + + if (newloc->inode) + gf_uuid_unparse(newloc->inode->gfid, newgfid); + + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_RENAME_INFO, + "renaming %s (%s) (hash=%s/cache=%s) => %s (%s) " + "(hash=%s/cache=%s) ", + oldloc->path, gfid, src_hashed->name, src_cached->name, newloc->path, + newloc->inode ? newgfid : NULL, dst_hashed->name, + dst_cached ? dst_cached->name : "<nul>"); + + if (IA_ISDIR(oldloc->inode->ia_type)) { + dht_rename_dir(frame, this); + } else { + local->op_ret = 0; + ret = dht_rename_lock(frame); + if (ret < 0) { + op_errno = ENOMEM; + goto err; } + } - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, - NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL, + NULL); - return 0; + return 0; +} + +int +dht_pt_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) +{ + gf_boolean_t free_xdata = _gf_false; + + /* Just a pass through */ + if (!IA_ISDIR(oldloc->inode->ia_type)) { + if (!xdata) { + free_xdata = _gf_true; + } + DHT_CHANGELOG_TRACK_AS_RENAME(xdata, oldloc, newloc); + } + default_rename(frame, this, oldloc, newloc, xdata); + if (free_xdata && xdata) { + dict_unref(xdata); + xdata = NULL; + } + return 0; } |
