diff options
Diffstat (limited to 'xlators/cluster/nsr-server/src/recon_notify.c')
-rw-r--r-- | xlators/cluster/nsr-server/src/recon_notify.c | 43 |
1 files changed, 40 insertions, 3 deletions
diff --git a/xlators/cluster/nsr-server/src/recon_notify.c b/xlators/cluster/nsr-server/src/recon_notify.c index 7a0de85b1..7397192ae 100644 --- a/xlators/cluster/nsr-server/src/recon_notify.c +++ b/xlators/cluster/nsr-server/src/recon_notify.c @@ -120,12 +120,49 @@ nsr_recon_set_leader (xlator_t *this) // in the callback (once reconciliation is done), // we will unfence the IOs. // TBD - error handling later. - glfs_lseek(ctx->fd, nsr_recon_xlator_sector_1, SEEK_SET); + if (glfs_lseek(ctx->fd, nsr_recon_xlator_sector_1, SEEK_SET) == -1) { + gf_log (this->name, GF_LOG_ERROR, + "doing lseek failed\n"); + return; + } + glusterfs_this_set(old); gf_log (this->name, GF_LOG_INFO, "Writing to local node to set leader"); - glfs_write(ctx->fd, &role, - sizeof(role), 0); + do { + if (priv->leader != _gf_true) { + glusterfs_this_set(old); + gf_log (this->name, GF_LOG_ERROR, "no longer leader\n"); + return; + } + if (glfs_write(ctx->fd, &role, sizeof(role), 0) == -1) { + if (errno == EAGAIN) { + // Wait for old reconciliation to bail out. + glusterfs_this_set(old); + gf_log (this->name, GF_LOG_ERROR, + "write failed with retry. retrying after some time\n"); + sleep(5); + continue; + } + else{ + glusterfs_this_set(old); + gf_log (this->name, GF_LOG_ERROR, + "doing write failed\n"); + // This is because reconciliation has returned with error + // because some node has died in between. + // What should be done? Either we retry being leader + // or hook to CHILD_DOWN notification. + // Put that logic later. As of now we will just retry. + // This is easier. + sleep(5); + continue; + } + } else { + glusterfs_this_set(old); + gf_log (this->name, GF_LOG_INFO, "doing write with success\n"); + break; + } + } while(1); glusterfs_this_set(old); gf_log (this->name, GF_LOG_INFO, "glfs_write returned. unfencing IO\n"); |