diff options
Diffstat (limited to 'xlators/features/locks/src/inodelk.c')
-rw-r--r-- | xlators/features/locks/src/inodelk.c | 1669 |
1 files changed, 826 insertions, 843 deletions
diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c index 890ac8b6d00..eff58a79569 100644 --- a/xlators/features/locks/src/inodelk.c +++ b/xlators/features/locks/src/inodelk.c @@ -21,115 +21,115 @@ #include "pl-messages.h" void -__delete_inode_lock (pl_inode_lock_t *lock) +__delete_inode_lock(pl_inode_lock_t *lock) { - list_del_init (&lock->list); + list_del_init(&lock->list); } static void -__pl_inodelk_ref (pl_inode_lock_t *lock) +__pl_inodelk_ref(pl_inode_lock_t *lock) { - lock->ref++; + lock->ref++; } void -__pl_inodelk_unref (pl_inode_lock_t *lock) +__pl_inodelk_unref(pl_inode_lock_t *lock) { - lock->ref--; - if (!lock->ref) { - GF_FREE (lock->connection_id); - GF_FREE (lock); - } + lock->ref--; + if (!lock->ref) { + GF_FREE(lock->connection_id); + GF_FREE(lock); + } } -/* Check if 2 inodelks are conflicting on type. Only 2 shared locks don't conflict */ +/* Check if 2 inodelks are conflicting on type. Only 2 shared locks don't + * conflict */ static int -inodelk_type_conflict (pl_inode_lock_t *l1, pl_inode_lock_t *l2) +inodelk_type_conflict(pl_inode_lock_t *l1, pl_inode_lock_t *l2) { - if (l2->fl_type == F_WRLCK || l1->fl_type == F_WRLCK) - return 1; + if (l2->fl_type == F_WRLCK || l1->fl_type == F_WRLCK) + return 1; - return 0; + return 0; } void -pl_print_inodelk (char *str, int size, int cmd, struct gf_flock *flock, const char *domain) +pl_print_inodelk(char *str, int size, int cmd, struct gf_flock *flock, + const char *domain) { - char *cmd_str = NULL; - char *type_str = NULL; + char *cmd_str = NULL; + char *type_str = NULL; - switch (cmd) { + switch (cmd) { #if F_GETLK != F_GETLK64 case F_GETLK64: #endif case F_GETLK: - cmd_str = "GETLK"; - break; + cmd_str = "GETLK"; + break; #if F_SETLK != F_SETLK64 case F_SETLK64: #endif case F_SETLK: - cmd_str = "SETLK"; - break; + cmd_str = "SETLK"; + break; #if F_SETLKW != F_SETLKW64 case F_SETLKW64: #endif case F_SETLKW: - cmd_str = "SETLKW"; - break; + cmd_str = "SETLKW"; + break; default: - cmd_str = "UNKNOWN"; - break; - } + cmd_str = "UNKNOWN"; + break; + } - switch (flock->l_type) { + switch (flock->l_type) { case F_RDLCK: - type_str = "READ"; - break; + type_str = "READ"; + break; case F_WRLCK: - type_str = "WRITE"; - break; + type_str = "WRITE"; + break; case F_UNLCK: - type_str = "UNLOCK"; - break; + type_str = "UNLOCK"; + break; default: - type_str = "UNKNOWN"; - break; - } - - snprintf (str, size, "lock=INODELK, cmd=%s, type=%s, " - "domain: %s, start=%llu, len=%llu, pid=%llu", - cmd_str, type_str, domain, - (unsigned long long) flock->l_start, - (unsigned long long) flock->l_len, - (unsigned long long) flock->l_pid); + type_str = "UNKNOWN"; + break; + } + + snprintf(str, size, + "lock=INODELK, cmd=%s, type=%s, " + "domain: %s, start=%llu, len=%llu, pid=%llu", + cmd_str, type_str, domain, (unsigned long long)flock->l_start, + (unsigned long long)flock->l_len, + (unsigned long long)flock->l_pid); } /* Determine if the two inodelks overlap reach other's lock regions */ static int -inodelk_overlap (pl_inode_lock_t *l1, pl_inode_lock_t *l2) +inodelk_overlap(pl_inode_lock_t *l1, pl_inode_lock_t *l2) { - return ((l1->fl_end >= l2->fl_start) && - (l2->fl_end >= l1->fl_start)); + return ((l1->fl_end >= l2->fl_start) && (l2->fl_end >= l1->fl_start)); } /* Returns true if the 2 inodelks have the same owner */ static int -same_inodelk_owner (pl_inode_lock_t *l1, pl_inode_lock_t *l2) +same_inodelk_owner(pl_inode_lock_t *l1, pl_inode_lock_t *l2) { - return (is_same_lkowner (&l1->owner, &l2->owner) && - (l1->client == l2->client)); + return (is_same_lkowner(&l1->owner, &l2->owner) && + (l1->client == l2->client)); } /* Returns true if the 2 inodelks conflict with each other */ static int -inodelk_conflict (pl_inode_lock_t *l1, pl_inode_lock_t *l2) +inodelk_conflict(pl_inode_lock_t *l1, pl_inode_lock_t *l2) { - return (inodelk_overlap (l1, l2) && - inodelk_type_conflict (l1, l2)); + return (inodelk_overlap(l1, l2) && inodelk_type_conflict(l1, l2)); } /* @@ -138,24 +138,23 @@ inodelk_conflict (pl_inode_lock_t *l1, pl_inode_lock_t *l2) * true if it exceeds the configured threshold, false otherwise. */ static inline gf_boolean_t -__stale_inodelk (xlator_t *this, pl_inode_lock_t *candidate_lock, +__stale_inodelk(xlator_t *this, pl_inode_lock_t *candidate_lock, pl_inode_lock_t *requested_lock, time_t *lock_age_sec) { - posix_locks_private_t *priv = NULL; - struct timeval curr; - - priv = this->private; - gettimeofday (&curr, NULL); - /* Question: Should we just prune them all given the - * chance? Or just the locks we are attempting to acquire? - */ - if (inodelk_conflict (candidate_lock, requested_lock)) { - *lock_age_sec = curr.tv_sec - - candidate_lock->granted_time.tv_sec; - if (*lock_age_sec > priv->revocation_secs) - return _gf_true; - } - return _gf_false; + posix_locks_private_t *priv = NULL; + struct timeval curr; + + priv = this->private; + gettimeofday(&curr, NULL); + /* Question: Should we just prune them all given the + * chance? Or just the locks we are attempting to acquire? + */ + if (inodelk_conflict(candidate_lock, requested_lock)) { + *lock_age_sec = curr.tv_sec - candidate_lock->granted_time.tv_sec; + if (*lock_age_sec > priv->revocation_secs) + return _gf_true; + } + return _gf_false; } /* Examine any locks held on this inode and potentially revoke the lock @@ -167,992 +166,976 @@ __stale_inodelk (xlator_t *this, pl_inode_lock_t *candidate_lock, * are beyond a threshold. */ static gf_boolean_t -__inodelk_prune_stale (xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom, - pl_inode_lock_t *lock) +__inodelk_prune_stale(xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom, + pl_inode_lock_t *lock) { - posix_locks_private_t *priv = NULL; - pl_inode_lock_t *tmp = NULL; - pl_inode_lock_t *lk = NULL; - gf_boolean_t revoke_lock = _gf_false; - int bcount = 0; - int gcount = 0; - int op_errno = 0; - clrlk_args args; - args.opts = NULL; - time_t lk_age_sec = 0; - uint32_t max_blocked = 0; - char *reason_str = NULL; - - priv = this->private; - - args.type = CLRLK_INODE; - if (priv->revocation_clear_all == _gf_true) - args.kind = CLRLK_ALL; - else - args.kind = CLRLK_GRANTED; - - if (list_empty (&dom->inodelk_list)) - goto out; - - pthread_mutex_lock (&pinode->mutex); - list_for_each_entry_safe (lk, tmp, &dom->inodelk_list, list) { - if (__stale_inodelk (this, lk, lock, &lk_age_sec) == _gf_true) { - revoke_lock = _gf_true; - reason_str = "age"; - break; - } + posix_locks_private_t *priv = NULL; + pl_inode_lock_t *tmp = NULL; + pl_inode_lock_t *lk = NULL; + gf_boolean_t revoke_lock = _gf_false; + int bcount = 0; + int gcount = 0; + int op_errno = 0; + clrlk_args args; + args.opts = NULL; + time_t lk_age_sec = 0; + uint32_t max_blocked = 0; + char *reason_str = NULL; + + priv = this->private; + + args.type = CLRLK_INODE; + if (priv->revocation_clear_all == _gf_true) + args.kind = CLRLK_ALL; + else + args.kind = CLRLK_GRANTED; + + if (list_empty(&dom->inodelk_list)) + goto out; + + pthread_mutex_lock(&pinode->mutex); + list_for_each_entry_safe(lk, tmp, &dom->inodelk_list, list) + { + if (__stale_inodelk(this, lk, lock, &lk_age_sec) == _gf_true) { + revoke_lock = _gf_true; + reason_str = "age"; + break; } + } - max_blocked = priv->revocation_max_blocked; - if (max_blocked != 0 && revoke_lock == _gf_false) { - list_for_each_entry_safe (lk, tmp, &dom->blocked_inodelks, - blocked_locks) { - max_blocked--; - if (max_blocked == 0) { - revoke_lock = _gf_true; - reason_str = "max blocked"; - break; - } - } + max_blocked = priv->revocation_max_blocked; + if (max_blocked != 0 && revoke_lock == _gf_false) { + list_for_each_entry_safe(lk, tmp, &dom->blocked_inodelks, blocked_locks) + { + max_blocked--; + if (max_blocked == 0) { + revoke_lock = _gf_true; + reason_str = "max blocked"; + break; + } } - pthread_mutex_unlock (&pinode->mutex); + } + pthread_mutex_unlock(&pinode->mutex); out: - if (revoke_lock == _gf_true) { - clrlk_clear_inodelk (this, pinode, dom, &args, &bcount, &gcount, - &op_errno); - gf_log (this->name, GF_LOG_WARNING, - "Lock revocation [reason: %s; gfid: %s; domain: %s; " - "age: %ld sec] - Inode lock revoked: %d granted & %d " - "blocked locks cleared", - reason_str, uuid_utoa (pinode->gfid), dom->domain, - lk_age_sec, gcount, bcount); - } - return revoke_lock; + if (revoke_lock == _gf_true) { + clrlk_clear_inodelk(this, pinode, dom, &args, &bcount, &gcount, + &op_errno); + gf_log(this->name, GF_LOG_WARNING, + "Lock revocation [reason: %s; gfid: %s; domain: %s; " + "age: %ld sec] - Inode lock revoked: %d granted & %d " + "blocked locks cleared", + reason_str, uuid_utoa(pinode->gfid), dom->domain, lk_age_sec, + gcount, bcount); + } + return revoke_lock; } static gf_boolean_t __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock, struct timespec *now) { - posix_locks_private_t *priv; - int64_t elapsed; + posix_locks_private_t *priv; + int64_t elapsed; - priv = this->private; + priv = this->private; - /* If this lock is in a list, it means that we are about to send a - * notification for it, so no need to do anything else. */ - if (!list_empty(&lock->contend)) { - return _gf_false; - } - - elapsed = now->tv_sec; - elapsed -= lock->contention_time.tv_sec; - if (now->tv_nsec < lock->contention_time.tv_nsec) { - elapsed--; - } - if (elapsed < priv->notify_contention_delay) { - return _gf_false; - } + /* If this lock is in a list, it means that we are about to send a + * notification for it, so no need to do anything else. */ + if (!list_empty(&lock->contend)) { + return _gf_false; + } + + elapsed = now->tv_sec; + elapsed -= lock->contention_time.tv_sec; + if (now->tv_nsec < lock->contention_time.tv_nsec) { + elapsed--; + } + if (elapsed < priv->notify_contention_delay) { + return _gf_false; + } - /* All contention notifications will be sent outside of the locked - * region. This means that currently granted locks might have already - * been unlocked by that time. To avoid the lock or the inode to be - * destroyed before we process them, we take an additional reference - * on both. */ - inode_ref(lock->pl_inode->inode); - __pl_inodelk_ref(lock); + /* All contention notifications will be sent outside of the locked + * region. This means that currently granted locks might have already + * been unlocked by that time. To avoid the lock or the inode to be + * destroyed before we process them, we take an additional reference + * on both. */ + inode_ref(lock->pl_inode->inode); + __pl_inodelk_ref(lock); - lock->contention_time = *now; + lock->contention_time = *now; - return _gf_true; + return _gf_true; } void inodelk_contention_notify(xlator_t *this, struct list_head *contend) { - struct gf_upcall up; - struct gf_upcall_inodelk_contention lc; - pl_inode_lock_t *lock; - pl_inode_t *pl_inode; - client_t *client; - gf_boolean_t notify; + struct gf_upcall up; + struct gf_upcall_inodelk_contention lc; + pl_inode_lock_t *lock; + pl_inode_t *pl_inode; + client_t *client; + gf_boolean_t notify; - while (!list_empty(contend)) { - lock = list_first_entry(contend, pl_inode_lock_t, contend); + while (!list_empty(contend)) { + lock = list_first_entry(contend, pl_inode_lock_t, contend); - pl_inode = lock->pl_inode; - - pthread_mutex_lock(&pl_inode->mutex); - - /* If the lock has already been released, no notification is - * sent. We clear the notification time in this case. */ - notify = !list_empty(&lock->list); - if (!notify) { - lock->contention_time.tv_sec = 0; - lock->contention_time.tv_nsec = 0; - } else { - memcpy(&lc.flock, &lock->user_flock, sizeof(lc.flock)); - lc.pid = lock->client_pid; - lc.domain = lock->volume; - lc.xdata = NULL; - - gf_uuid_copy(up.gfid, lock->pl_inode->gfid); - client = (client_t *)lock->client; - if (client == NULL) { - /* A NULL client can be found if the inodelk - * was issued by a server side xlator. */ - up.client_uid = NULL; - } else { - up.client_uid = client->client_uid; - } - } + pl_inode = lock->pl_inode; - pthread_mutex_unlock(&pl_inode->mutex); + pthread_mutex_lock(&pl_inode->mutex); + + /* If the lock has already been released, no notification is + * sent. We clear the notification time in this case. */ + notify = !list_empty(&lock->list); + if (!notify) { + lock->contention_time.tv_sec = 0; + lock->contention_time.tv_nsec = 0; + } else { + memcpy(&lc.flock, &lock->user_flock, sizeof(lc.flock)); + lc.pid = lock->client_pid; + lc.domain = lock->volume; + lc.xdata = NULL; + + gf_uuid_copy(up.gfid, lock->pl_inode->gfid); + client = (client_t *)lock->client; + if (client == NULL) { + /* A NULL client can be found if the inodelk + * was issued by a server side xlator. */ + up.client_uid = NULL; + } else { + up.client_uid = client->client_uid; + } + } - if (notify) { - up.event_type = GF_UPCALL_INODELK_CONTENTION; - up.data = &lc; - - if (this->notify(this, GF_EVENT_UPCALL, &up) < 0) { - gf_msg_debug(this->name, 0, - "Inodelk contention notification " - "failed"); - } else { - gf_msg_debug(this->name, 0, - "Inodelk contention notification " - "sent"); - } - } + pthread_mutex_unlock(&pl_inode->mutex); + + if (notify) { + up.event_type = GF_UPCALL_INODELK_CONTENTION; + up.data = &lc; + + if (this->notify(this, GF_EVENT_UPCALL, &up) < 0) { + gf_msg_debug(this->name, 0, + "Inodelk contention notification " + "failed"); + } else { + gf_msg_debug(this->name, 0, + "Inodelk contention notification " + "sent"); + } + } - pthread_mutex_lock(&pl_inode->mutex); + pthread_mutex_lock(&pl_inode->mutex); - list_del_init(&lock->contend); - __pl_inodelk_unref(lock); + list_del_init(&lock->contend); + __pl_inodelk_unref(lock); - pthread_mutex_unlock(&pl_inode->mutex); + pthread_mutex_unlock(&pl_inode->mutex); - inode_unref(pl_inode->inode); - } + inode_unref(pl_inode->inode); + } } /* Determine if lock is grantable or not */ static pl_inode_lock_t * -__inodelk_grantable (xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock, - struct timespec *now, struct list_head *contend) +__inodelk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock, + struct timespec *now, struct list_head *contend) { - pl_inode_lock_t *l = NULL; - pl_inode_lock_t *ret = NULL; - - list_for_each_entry (l, &dom->inodelk_list, list){ - if (inodelk_conflict (lock, l) && - !same_inodelk_owner (lock, l)) { - if (ret == NULL) { - ret = l; - if (contend == NULL) { - break; - } - } - if (__inodelk_needs_contention_notify(this, l, now)) { - list_add_tail(&l->contend, contend); - } + pl_inode_lock_t *l = NULL; + pl_inode_lock_t *ret = NULL; + + list_for_each_entry(l, &dom->inodelk_list, list) + { + if (inodelk_conflict(lock, l) && !same_inodelk_owner(lock, l)) { + if (ret == NULL) { + ret = l; + if (contend == NULL) { + break; } + } + if (__inodelk_needs_contention_notify(this, l, now)) { + list_add_tail(&l->contend, contend); + } } + } - return ret; + return ret; } static pl_inode_lock_t * -__blocked_lock_conflict (pl_dom_list_t *dom, pl_inode_lock_t *lock) +__blocked_lock_conflict(pl_dom_list_t *dom, pl_inode_lock_t *lock) { - pl_inode_lock_t *l = NULL; + pl_inode_lock_t *l = NULL; - list_for_each_entry (l, &dom->blocked_inodelks, blocked_locks) { - if (inodelk_conflict (lock, l)) { - return l; - } + list_for_each_entry(l, &dom->blocked_inodelks, blocked_locks) + { + if (inodelk_conflict(lock, l)) { + return l; } + } - return NULL; + return NULL; } static int -__owner_has_lock (pl_dom_list_t *dom, pl_inode_lock_t *newlock) +__owner_has_lock(pl_dom_list_t *dom, pl_inode_lock_t *newlock) { - pl_inode_lock_t *lock = NULL; + pl_inode_lock_t *lock = NULL; - list_for_each_entry (lock, &dom->inodelk_list, list) { - if (same_inodelk_owner (lock, newlock)) - return 1; - } + list_for_each_entry(lock, &dom->inodelk_list, list) + { + if (same_inodelk_owner(lock, newlock)) + return 1; + } - list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) { - if (same_inodelk_owner (lock, newlock)) - return 1; - } + list_for_each_entry(lock, &dom->blocked_inodelks, blocked_locks) + { + if (same_inodelk_owner(lock, newlock)) + return 1; + } - return 0; + return 0; } static int __lock_blocked_add(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock, int can_block) { - struct timeval now; + struct timeval now; - gettimeofday(&now, NULL); + gettimeofday(&now, NULL); - if (can_block == 0) { - goto out; - } + if (can_block == 0) { + goto out; + } - lock->blkd_time = now; - list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks); + lock->blkd_time = now; + list_add_tail(&lock->blocked_locks, &dom->blocked_inodelks); - gf_msg_trace (this->name, 0, "%s (pid=%d) (lk-owner=%s) %"PRId64" - " - "%"PRId64" => Blocked", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, lkowner_utoa (&lock->owner), - lock->user_flock.l_start, lock->user_flock.l_len); + gf_msg_trace(this->name, 0, + "%s (pid=%d) (lk-owner=%s) %" PRId64 + " - " + "%" PRId64 " => Blocked", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid, + lkowner_utoa(&lock->owner), lock->user_flock.l_start, + lock->user_flock.l_len); out: - return -EAGAIN; + return -EAGAIN; } /* Determines if lock can be granted and adds the lock. If the lock * is blocking, adds it to the blocked_inodelks list of the domain. */ static int -__lock_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock, - int can_block, pl_dom_list_t *dom, struct timespec *now, - struct list_head *contend) +__lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock, + int can_block, pl_dom_list_t *dom, struct timespec *now, + struct list_head *contend) { - pl_inode_lock_t *conf = NULL; - int ret = -EINVAL; - - conf = __inodelk_grantable (this, dom, lock, now, contend); - if (conf) { - ret = __lock_blocked_add(this, dom, lock, can_block); - goto out; + pl_inode_lock_t *conf = NULL; + int ret = -EINVAL; + + conf = __inodelk_grantable(this, dom, lock, now, contend); + if (conf) { + ret = __lock_blocked_add(this, dom, lock, can_block); + goto out; + } + + /* To prevent blocked locks starvation, check if there are any blocked + * locks thay may conflict with this lock. If there is then don't grant + * the lock. BUT grant the lock if the owner already has lock to allow + * nested locks. + * Example: + * SHD from Machine1 takes (gfid, 0-infinity) and is granted. + * SHD from machine2 takes (gfid, 0-infinity) and is blocked. + * When SHD from Machine1 takes (gfid, 0-128KB) it + * needs to be granted, without which the earlier lock on 0-infinity + * will not be unlocked by SHD from Machine1. + * TODO: Find why 'owner_has_lock' is checked even for blocked locks. + */ + if (__blocked_lock_conflict(dom, lock) && !(__owner_has_lock(dom, lock))) { + if (can_block != 0) { + gf_log(this->name, GF_LOG_DEBUG, + "Lock is grantable, but blocking to prevent " + "starvation"); } - /* To prevent blocked locks starvation, check if there are any blocked - * locks thay may conflict with this lock. If there is then don't grant - * the lock. BUT grant the lock if the owner already has lock to allow - * nested locks. - * Example: - * SHD from Machine1 takes (gfid, 0-infinity) and is granted. - * SHD from machine2 takes (gfid, 0-infinity) and is blocked. - * When SHD from Machine1 takes (gfid, 0-128KB) it - * needs to be granted, without which the earlier lock on 0-infinity - * will not be unlocked by SHD from Machine1. - * TODO: Find why 'owner_has_lock' is checked even for blocked locks. - */ - if (__blocked_lock_conflict (dom, lock) && - !(__owner_has_lock (dom, lock))) { - if (can_block != 0) { - gf_log (this->name, GF_LOG_DEBUG, - "Lock is grantable, but blocking to prevent " - "starvation"); - } - - ret = __lock_blocked_add(this, dom, lock, can_block); - goto out; - } - __pl_inodelk_ref (lock); - gettimeofday (&lock->granted_time, NULL); - list_add (&lock->list, &dom->inodelk_list); + ret = __lock_blocked_add(this, dom, lock, can_block); + goto out; + } + __pl_inodelk_ref(lock); + gettimeofday(&lock->granted_time, NULL); + list_add(&lock->list, &dom->inodelk_list); - ret = 0; + ret = 0; out: - return ret; + return ret; } /* Return true if the two inodelks have exactly same lock boundaries */ static int -inodelks_equal (pl_inode_lock_t *l1, pl_inode_lock_t *l2) +inodelks_equal(pl_inode_lock_t *l1, pl_inode_lock_t *l2) { - if ((l1->fl_start == l2->fl_start) && - (l1->fl_end == l2->fl_end)) - return 1; + if ((l1->fl_start == l2->fl_start) && (l1->fl_end == l2->fl_end)) + return 1; - return 0; + return 0; } - static pl_inode_lock_t * -find_matching_inodelk (pl_inode_lock_t *lock, pl_dom_list_t *dom) +find_matching_inodelk(pl_inode_lock_t *lock, pl_dom_list_t *dom) { - pl_inode_lock_t *l = NULL; - list_for_each_entry (l, &dom->inodelk_list, list) { - if (inodelks_equal (l, lock) && - same_inodelk_owner (l, lock)) - return l; - } - return NULL; + pl_inode_lock_t *l = NULL; + list_for_each_entry(l, &dom->inodelk_list, list) + { + if (inodelks_equal(l, lock) && same_inodelk_owner(l, lock)) + return l; + } + return NULL; } /* Set F_UNLCK removes a lock which has the exact same lock boundaries * as the UNLCK lock specifies. If such a lock is not found, returns invalid */ static pl_inode_lock_t * -__inode_unlock_lock (xlator_t *this, pl_inode_lock_t *lock, pl_dom_list_t *dom) +__inode_unlock_lock(xlator_t *this, pl_inode_lock_t *lock, pl_dom_list_t *dom) { - - pl_inode_lock_t *conf = NULL; - - conf = find_matching_inodelk (lock, dom); - if (!conf) { - gf_log (this->name, GF_LOG_ERROR, - " Matching lock not found for unlock %llu-%llu, by %s " - "on %p", (unsigned long long)lock->fl_start, - (unsigned long long)lock->fl_end, - lkowner_utoa (&lock->owner), lock->client); - goto out; - } - __delete_inode_lock (conf); - gf_log (this->name, GF_LOG_DEBUG, - " Matching lock found for unlock %llu-%llu, by %s on %p", - (unsigned long long)lock->fl_start, - (unsigned long long)lock->fl_end, lkowner_utoa (&lock->owner), - lock->client); + pl_inode_lock_t *conf = NULL; + + conf = find_matching_inodelk(lock, dom); + if (!conf) { + gf_log(this->name, GF_LOG_ERROR, + " Matching lock not found for unlock %llu-%llu, by %s " + "on %p", + (unsigned long long)lock->fl_start, + (unsigned long long)lock->fl_end, lkowner_utoa(&lock->owner), + lock->client); + goto out; + } + __delete_inode_lock(conf); + gf_log(this->name, GF_LOG_DEBUG, + " Matching lock found for unlock %llu-%llu, by %s on %p", + (unsigned long long)lock->fl_start, (unsigned long long)lock->fl_end, + lkowner_utoa(&lock->owner), lock->client); out: - return conf; + return conf; } - static void -__grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, - struct list_head *granted, pl_dom_list_t *dom, - struct timespec *now, struct list_head *contend) +__grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted, pl_dom_list_t *dom, + struct timespec *now, struct list_head *contend) { - int bl_ret = 0; - pl_inode_lock_t *bl = NULL; - pl_inode_lock_t *tmp = NULL; + int bl_ret = 0; + pl_inode_lock_t *bl = NULL; + pl_inode_lock_t *tmp = NULL; - struct list_head blocked_list; + struct list_head blocked_list; - INIT_LIST_HEAD (&blocked_list); - list_splice_init (&dom->blocked_inodelks, &blocked_list); + INIT_LIST_HEAD(&blocked_list); + list_splice_init(&dom->blocked_inodelks, &blocked_list); - list_for_each_entry_safe (bl, tmp, &blocked_list, blocked_locks) { + list_for_each_entry_safe(bl, tmp, &blocked_list, blocked_locks) + { + list_del_init(&bl->blocked_locks); - list_del_init (&bl->blocked_locks); + bl_ret = __lock_inodelk(this, pl_inode, bl, 1, dom, now, contend); - bl_ret = __lock_inodelk (this, pl_inode, bl, 1, dom, now, - contend); - - if (bl_ret == 0) { - list_add (&bl->blocked_locks, granted); - } + if (bl_ret == 0) { + list_add(&bl->blocked_locks, granted); } - return; + } + return; } /* Grant all inodelks blocked on a lock */ void -grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, - pl_dom_list_t *dom, struct timespec *now, - struct list_head *contend) +grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, + pl_dom_list_t *dom, struct timespec *now, + struct list_head *contend) { - struct list_head granted; - pl_inode_lock_t *lock; - pl_inode_lock_t *tmp; - - INIT_LIST_HEAD (&granted); - - pthread_mutex_lock (&pl_inode->mutex); + struct list_head granted; + pl_inode_lock_t *lock; + pl_inode_lock_t *tmp; + + INIT_LIST_HEAD(&granted); + + pthread_mutex_lock(&pl_inode->mutex); + { + __grant_blocked_inode_locks(this, pl_inode, &granted, dom, now, + contend); + } + pthread_mutex_unlock(&pl_inode->mutex); + + list_for_each_entry_safe(lock, tmp, &granted, blocked_locks) + { + gf_log(this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => Granted", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid, + lkowner_utoa(&lock->owner), lock->user_flock.l_start, + lock->user_flock.l_len); + + pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock, + 0, 0, lock->volume); + + STACK_UNWIND_STRICT(inodelk, lock->frame, 0, 0, NULL); + lock->frame = NULL; + } + + pthread_mutex_lock(&pl_inode->mutex); + { + list_for_each_entry_safe(lock, tmp, &granted, blocked_locks) { - __grant_blocked_inode_locks (this, pl_inode, &granted, dom, - now, contend); + list_del_init(&lock->blocked_locks); + __pl_inodelk_unref(lock); } - pthread_mutex_unlock (&pl_inode->mutex); - - list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) { - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Granted", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lkowner_utoa (&lock->owner), - lock->user_flock.l_start, - lock->user_flock.l_len); - - pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW, - &lock->user_flock, 0, 0, lock->volume); - - STACK_UNWIND_STRICT (inodelk, lock->frame, 0, 0, NULL); - lock->frame = NULL; - } - - pthread_mutex_lock (&pl_inode->mutex); - { - list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) { - list_del_init (&lock->blocked_locks); - __pl_inodelk_unref (lock); - } - } - pthread_mutex_unlock (&pl_inode->mutex); + } + pthread_mutex_unlock(&pl_inode->mutex); } - static void -pl_inodelk_log_cleanup (pl_inode_lock_t *lock) +pl_inodelk_log_cleanup(pl_inode_lock_t *lock) { - pl_inode_t *pl_inode = NULL; + pl_inode_t *pl_inode = NULL; - pl_inode = lock->pl_inode; + pl_inode = lock->pl_inode; - gf_log (THIS->name, GF_LOG_WARNING, "releasing lock on %s held by " - "{client=%p, pid=%"PRId64" lk-owner=%s}", - uuid_utoa (pl_inode->gfid), lock->client, - (uint64_t) lock->client_pid, lkowner_utoa (&lock->owner)); + gf_log(THIS->name, GF_LOG_WARNING, + "releasing lock on %s held by " + "{client=%p, pid=%" PRId64 " lk-owner=%s}", + uuid_utoa(pl_inode->gfid), lock->client, (uint64_t)lock->client_pid, + lkowner_utoa(&lock->owner)); } - /* Release all inodelks from this client */ int -pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx) +pl_inodelk_client_cleanup(xlator_t *this, pl_ctx_t *ctx) { - posix_locks_private_t *priv; - pl_inode_lock_t *tmp = NULL; - pl_inode_lock_t *l = NULL; - pl_dom_list_t *dom = NULL; - pl_inode_t *pl_inode = NULL; - struct list_head *pcontend = NULL; - struct list_head released; - struct list_head unwind; - struct list_head contend; - struct timespec now = { }; - - priv = this->private; - - INIT_LIST_HEAD (&released); - INIT_LIST_HEAD (&unwind); - - if (priv->notify_contention) { - pcontend = &contend; - INIT_LIST_HEAD (pcontend); - timespec_now(&now); - } - - pthread_mutex_lock (&ctx->lock); + posix_locks_private_t *priv; + pl_inode_lock_t *tmp = NULL; + pl_inode_lock_t *l = NULL; + pl_dom_list_t *dom = NULL; + pl_inode_t *pl_inode = NULL; + struct list_head *pcontend = NULL; + struct list_head released; + struct list_head unwind; + struct list_head contend; + struct timespec now = {}; + + priv = this->private; + + INIT_LIST_HEAD(&released); + INIT_LIST_HEAD(&unwind); + + if (priv->notify_contention) { + pcontend = &contend; + INIT_LIST_HEAD(pcontend); + timespec_now(&now); + } + + pthread_mutex_lock(&ctx->lock); + { + list_for_each_entry_safe(l, tmp, &ctx->inodelk_lockers, client_list) { - list_for_each_entry_safe (l, tmp, &ctx->inodelk_lockers, - client_list) { - pl_inodelk_log_cleanup (l); - - pl_inode = l->pl_inode; - - pthread_mutex_lock (&pl_inode->mutex); - { - /* If the inodelk object is part of granted list but not - * blocked list, then perform the following actions: - * i. delete the object from granted list; - * ii. grant other locks (from other clients) that may - * have been blocked on this inodelk; and - * iii. unref the object. - * - * If the inodelk object (L1) is part of both granted - * and blocked lists, then this means that a parallel - * unlock on another inodelk (L2 say) may have 'granted' - * L1 and added it to 'granted' list in - * __grant_blocked_node_locks() (although using the - * 'blocked_locks' member). In that case, the cleanup - * codepath must try and grant other overlapping - * blocked inodelks from other clients, now that L1 is - * out of their way and then unref L1 in the end, and - * leave it to the other thread (the one executing - * unlock codepath) to unwind L1's frame, delete it from - * blocked_locks list, and perform the last unref on L1. - * - * If the inodelk object (L1) is part of blocked list - * only, the cleanup code path must: - * i. delete it from the blocked_locks list inside - * this critical section, - * ii. unwind its frame with EAGAIN, - * iii. try and grant blocked inode locks from other - * clients that were otherwise grantable, but just - * got blocked to avoid leaving L1 to starve - * forever. - * iv. unref the object. - */ - list_del_init (&l->client_list); - - if (!list_empty (&l->list)) { - __delete_inode_lock (l); - list_add_tail (&l->client_list, - &released); - } else { - list_del_init(&l->blocked_locks); - list_add_tail (&l->client_list, - &unwind); - } - } - pthread_mutex_unlock (&pl_inode->mutex); + pl_inodelk_log_cleanup(l); + + pl_inode = l->pl_inode; + + pthread_mutex_lock(&pl_inode->mutex); + { + /* If the inodelk object is part of granted list but not + * blocked list, then perform the following actions: + * i. delete the object from granted list; + * ii. grant other locks (from other clients) that may + * have been blocked on this inodelk; and + * iii. unref the object. + * + * If the inodelk object (L1) is part of both granted + * and blocked lists, then this means that a parallel + * unlock on another inodelk (L2 say) may have 'granted' + * L1 and added it to 'granted' list in + * __grant_blocked_node_locks() (although using the + * 'blocked_locks' member). In that case, the cleanup + * codepath must try and grant other overlapping + * blocked inodelks from other clients, now that L1 is + * out of their way and then unref L1 in the end, and + * leave it to the other thread (the one executing + * unlock codepath) to unwind L1's frame, delete it from + * blocked_locks list, and perform the last unref on L1. + * + * If the inodelk object (L1) is part of blocked list + * only, the cleanup code path must: + * i. delete it from the blocked_locks list inside + * this critical section, + * ii. unwind its frame with EAGAIN, + * iii. try and grant blocked inode locks from other + * clients that were otherwise grantable, but just + * got blocked to avoid leaving L1 to starve + * forever. + * iv. unref the object. + */ + list_del_init(&l->client_list); + + if (!list_empty(&l->list)) { + __delete_inode_lock(l); + list_add_tail(&l->client_list, &released); + } else { + list_del_init(&l->blocked_locks); + list_add_tail(&l->client_list, &unwind); } + } + pthread_mutex_unlock(&pl_inode->mutex); } - pthread_mutex_unlock (&ctx->lock); + } + pthread_mutex_unlock(&ctx->lock); - list_for_each_entry_safe (l, tmp, &unwind, client_list) { - list_del_init (&l->client_list); + list_for_each_entry_safe(l, tmp, &unwind, client_list) + { + list_del_init(&l->client_list); - if (l->frame) - STACK_UNWIND_STRICT (inodelk, l->frame, -1, EAGAIN, - NULL); - list_add_tail (&l->client_list, &released); - } + if (l->frame) + STACK_UNWIND_STRICT(inodelk, l->frame, -1, EAGAIN, NULL); + list_add_tail(&l->client_list, &released); + } - list_for_each_entry_safe (l, tmp, &released, client_list) { - list_del_init (&l->client_list); + list_for_each_entry_safe(l, tmp, &released, client_list) + { + list_del_init(&l->client_list); - pl_inode = l->pl_inode; + pl_inode = l->pl_inode; - dom = get_domain (pl_inode, l->volume); + dom = get_domain(pl_inode, l->volume); - grant_blocked_inode_locks (this, pl_inode, dom, &now, - pcontend); + grant_blocked_inode_locks(this, pl_inode, dom, &now, pcontend); - pthread_mutex_lock (&pl_inode->mutex); - { - __pl_inodelk_unref (l); - } - pthread_mutex_unlock (&pl_inode->mutex); - inode_unref (pl_inode->inode); + pthread_mutex_lock(&pl_inode->mutex); + { + __pl_inodelk_unref(l); } + pthread_mutex_unlock(&pl_inode->mutex); + inode_unref(pl_inode->inode); + } - if (pcontend != NULL) { - inodelk_contention_notify(this, pcontend); - } + if (pcontend != NULL) { + inodelk_contention_notify(this, pcontend); + } - return 0; + return 0; } - static int -pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, - pl_inode_lock_t *lock, int can_block, pl_dom_list_t *dom, - inode_t *inode) +pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, + pl_inode_lock_t *lock, int can_block, pl_dom_list_t *dom, + inode_t *inode) { - posix_locks_private_t *priv = NULL; - int ret = -EINVAL; - pl_inode_lock_t *retlock = NULL; - gf_boolean_t unref = _gf_true; - gf_boolean_t need_inode_unref = _gf_false; - struct list_head *pcontend = NULL; - struct list_head contend; - struct timespec now = { }; - short fl_type; - - lock->pl_inode = pl_inode; - fl_type = lock->fl_type; - - priv = this->private; - - /* Ideally, AFTER a successful lock (both blocking and non-blocking) or - * an unsuccessful blocking lock operation, the inode needs to be ref'd. - * - * But doing so might give room to a race where the lock-requesting - * client could send a DISCONNECT just before this thread refs the inode - * after the locking is done, and the epoll thread could unref the inode - * in cleanup which means the inode's refcount would come down to 0, and - * the call to pl_forget() at this point destroys @pl_inode. Now when - * the io-thread executing this function tries to access pl_inode, - * it could crash on account of illegal memory access. - * - * To get around this problem, the inode is ref'd once even before - * adding the lock into client_list as a precautionary measure. - * This way even if there are DISCONNECTs, there will always be 1 extra - * ref on the inode, so @pl_inode is still alive until after the - * current stack unwinds. - */ - pl_inode->inode = inode_ref (inode); - - if (priv->revocation_secs != 0) { - if (lock->fl_type != F_UNLCK) { - __inodelk_prune_stale (this, pl_inode, dom, lock); - } else if (priv->monkey_unlocking == _gf_true) { - if (pl_does_monkey_want_stuck_lock ()) { - pthread_mutex_lock (&pl_inode->mutex); - { - __pl_inodelk_unref (lock); - } - pthread_mutex_unlock (&pl_inode->mutex); - inode_unref (pl_inode->inode); - gf_log (this->name, GF_LOG_WARNING, - "MONKEY LOCKING (forcing stuck lock)!"); - return 0; - } - } - } - - if (priv->notify_contention) { - pcontend = &contend; - INIT_LIST_HEAD(pcontend); - timespec_now(&now); - } - - if (ctx) - pthread_mutex_lock (&ctx->lock); - pthread_mutex_lock (&pl_inode->mutex); - { - if (lock->fl_type != F_UNLCK) { - ret = __lock_inodelk (this, pl_inode, lock, can_block, - dom, &now, pcontend); - if (ret == 0) { - lock->frame = NULL; - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => OK", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lkowner_utoa (&lock->owner), - lock->fl_start, - lock->fl_end); - } else if (ret == -EAGAIN) { - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => NOK", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lkowner_utoa (&lock->owner), - lock->user_flock.l_start, - lock->user_flock.l_len); - if (can_block) - unref = _gf_false; - /* For all but the case where a non-blocking - * lock attempt fails, the extra ref taken at - * the start of this function must be negated. - */ - else - need_inode_unref = _gf_true; - } - - if (ctx && (!ret || can_block)) - list_add_tail (&lock->client_list, - &ctx->inodelk_lockers); - } else { - /* Irrespective of whether unlock succeeds or not, - * the extra inode ref that was done at the start of - * this function must be negated. Towards this, - * @need_inode_unref flag is set unconditionally here. - */ - need_inode_unref = _gf_true; - retlock = __inode_unlock_lock (this, lock, dom); - if (!retlock) { - gf_log (this->name, GF_LOG_DEBUG, - "Bad Unlock issued on Inode lock"); - ret = -EINVAL; - goto out; - } - list_del_init (&retlock->client_list); - __pl_inodelk_unref (retlock); - - ret = 0; + posix_locks_private_t *priv = NULL; + int ret = -EINVAL; + pl_inode_lock_t *retlock = NULL; + gf_boolean_t unref = _gf_true; + gf_boolean_t need_inode_unref = _gf_false; + struct list_head *pcontend = NULL; + struct list_head contend; + struct timespec now = {}; + short fl_type; + + lock->pl_inode = pl_inode; + fl_type = lock->fl_type; + + priv = this->private; + + /* Ideally, AFTER a successful lock (both blocking and non-blocking) or + * an unsuccessful blocking lock operation, the inode needs to be ref'd. + * + * But doing so might give room to a race where the lock-requesting + * client could send a DISCONNECT just before this thread refs the inode + * after the locking is done, and the epoll thread could unref the inode + * in cleanup which means the inode's refcount would come down to 0, and + * the call to pl_forget() at this point destroys @pl_inode. Now when + * the io-thread executing this function tries to access pl_inode, + * it could crash on account of illegal memory access. + * + * To get around this problem, the inode is ref'd once even before + * adding the lock into client_list as a precautionary measure. + * This way even if there are DISCONNECTs, there will always be 1 extra + * ref on the inode, so @pl_inode is still alive until after the + * current stack unwinds. + */ + pl_inode->inode = inode_ref(inode); + + if (priv->revocation_secs != 0) { + if (lock->fl_type != F_UNLCK) { + __inodelk_prune_stale(this, pl_inode, dom, lock); + } else if (priv->monkey_unlocking == _gf_true) { + if (pl_does_monkey_want_stuck_lock()) { + pthread_mutex_lock(&pl_inode->mutex); + { + __pl_inodelk_unref(lock); } -out: - if (unref) - __pl_inodelk_unref (lock); - } - pthread_mutex_unlock (&pl_inode->mutex); - if (ctx) - pthread_mutex_unlock (&ctx->lock); - - /* The following (extra) unref corresponds to the ref that - * was done at the time the lock was granted. - */ - if ((fl_type == F_UNLCK) && (ret == 0)) { - inode_unref (pl_inode->inode); - grant_blocked_inode_locks (this, pl_inode, dom, &now, - pcontend); - } - - if (need_inode_unref) { - inode_unref (pl_inode->inode); + pthread_mutex_unlock(&pl_inode->mutex); + inode_unref(pl_inode->inode); + gf_log(this->name, GF_LOG_WARNING, + "MONKEY LOCKING (forcing stuck lock)!"); + return 0; + } } + } + + if (priv->notify_contention) { + pcontend = &contend; + INIT_LIST_HEAD(pcontend); + timespec_now(&now); + } + + if (ctx) + pthread_mutex_lock(&ctx->lock); + pthread_mutex_lock(&pl_inode->mutex); + { + if (lock->fl_type != F_UNLCK) { + ret = __lock_inodelk(this, pl_inode, lock, can_block, dom, &now, + pcontend); + if (ret == 0) { + lock->frame = NULL; + gf_log(this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 + " => OK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, lkowner_utoa(&lock->owner), + lock->fl_start, lock->fl_end); + } else if (ret == -EAGAIN) { + gf_log(this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 + " => NOK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, lkowner_utoa(&lock->owner), + lock->user_flock.l_start, lock->user_flock.l_len); + if (can_block) + unref = _gf_false; + /* For all but the case where a non-blocking + * lock attempt fails, the extra ref taken at + * the start of this function must be negated. + */ + else + need_inode_unref = _gf_true; + } + + if (ctx && (!ret || can_block)) + list_add_tail(&lock->client_list, &ctx->inodelk_lockers); + } else { + /* Irrespective of whether unlock succeeds or not, + * the extra inode ref that was done at the start of + * this function must be negated. Towards this, + * @need_inode_unref flag is set unconditionally here. + */ + need_inode_unref = _gf_true; + retlock = __inode_unlock_lock(this, lock, dom); + if (!retlock) { + gf_log(this->name, GF_LOG_DEBUG, + "Bad Unlock issued on Inode lock"); + ret = -EINVAL; + goto out; + } + list_del_init(&retlock->client_list); + __pl_inodelk_unref(retlock); - if (pcontend != NULL) { - inodelk_contention_notify(this, pcontend); + ret = 0; } - - return ret; + out: + if (unref) + __pl_inodelk_unref(lock); + } + pthread_mutex_unlock(&pl_inode->mutex); + if (ctx) + pthread_mutex_unlock(&ctx->lock); + + /* The following (extra) unref corresponds to the ref that + * was done at the time the lock was granted. + */ + if ((fl_type == F_UNLCK) && (ret == 0)) { + inode_unref(pl_inode->inode); + grant_blocked_inode_locks(this, pl_inode, dom, &now, pcontend); + } + + if (need_inode_unref) { + inode_unref(pl_inode->inode); + } + + if (pcontend != NULL) { + inodelk_contention_notify(this, pcontend); + } + + return ret; } /* Create a new inode_lock_t */ pl_inode_lock_t * -new_inode_lock (struct gf_flock *flock, client_t *client, pid_t client_pid, - call_frame_t *frame, xlator_t *this, const char *volume, - char *conn_id) +new_inode_lock(struct gf_flock *flock, client_t *client, pid_t client_pid, + call_frame_t *frame, xlator_t *this, const char *volume, + char *conn_id) { - pl_inode_lock_t *lock = NULL; - - lock = GF_CALLOC (1, sizeof (*lock), - gf_locks_mt_pl_inode_lock_t); - if (!lock) { - return NULL; - } - - lock->fl_start = flock->l_start; - lock->fl_type = flock->l_type; - - if (flock->l_len == 0) - lock->fl_end = LLONG_MAX; - else - lock->fl_end = flock->l_start + flock->l_len - 1; - - lock->client = client; - lock->client_pid = client_pid; - lock->volume = volume; - lock->owner = frame->root->lk_owner; - lock->frame = frame; - lock->this = this; - - if (conn_id) { - lock->connection_id = gf_strdup (conn_id); - } - - INIT_LIST_HEAD (&lock->list); - INIT_LIST_HEAD (&lock->blocked_locks); - INIT_LIST_HEAD (&lock->client_list); - INIT_LIST_HEAD (&lock->contend); - __pl_inodelk_ref (lock); + pl_inode_lock_t *lock = NULL; - return lock; + lock = GF_CALLOC(1, sizeof(*lock), gf_locks_mt_pl_inode_lock_t); + if (!lock) { + return NULL; + } + + lock->fl_start = flock->l_start; + lock->fl_type = flock->l_type; + + if (flock->l_len == 0) + lock->fl_end = LLONG_MAX; + else + lock->fl_end = flock->l_start + flock->l_len - 1; + + lock->client = client; + lock->client_pid = client_pid; + lock->volume = volume; + lock->owner = frame->root->lk_owner; + lock->frame = frame; + lock->this = this; + + if (conn_id) { + lock->connection_id = gf_strdup(conn_id); + } + + INIT_LIST_HEAD(&lock->list); + INIT_LIST_HEAD(&lock->blocked_locks); + INIT_LIST_HEAD(&lock->client_list); + INIT_LIST_HEAD(&lock->contend); + __pl_inodelk_ref(lock); + + return lock; } int32_t -_pl_convert_volume (const char *volume, char **res) +_pl_convert_volume(const char *volume, char **res) { - char *mdata_vol = NULL; - int ret = 0; - - mdata_vol = strrchr (volume, ':'); - //if the volume already ends with :metadata don't bother - if (mdata_vol && (strcmp (mdata_vol, ":metadata") == 0)) - return 0; + char *mdata_vol = NULL; + int ret = 0; - ret = gf_asprintf (res, "%s:metadata", volume); - if (ret <= 0) - return ENOMEM; + mdata_vol = strrchr(volume, ':'); + // if the volume already ends with :metadata don't bother + if (mdata_vol && (strcmp(mdata_vol, ":metadata") == 0)) return 0; + + ret = gf_asprintf(res, "%s:metadata", volume); + if (ret <= 0) + return ENOMEM; + return 0; } int32_t -_pl_convert_volume_for_special_range (struct gf_flock *flock, - const char *volume, char **res) +_pl_convert_volume_for_special_range(struct gf_flock *flock, const char *volume, + char **res) { - int32_t ret = 0; + int32_t ret = 0; - if ((flock->l_start == LLONG_MAX -1) && - (flock->l_len == 0)) { - ret = _pl_convert_volume (volume, res); - } + if ((flock->l_start == LLONG_MAX - 1) && (flock->l_len == 0)) { + ret = _pl_convert_volume(volume, res); + } - return ret; + return ret; } /* Common inodelk code called from pl_inodelk and pl_finodelk */ int -pl_common_inodelk (call_frame_t *frame, xlator_t *this, - const char *volume, inode_t *inode, int32_t cmd, - struct gf_flock *flock, loc_t *loc, fd_t *fd, dict_t *xdata) +pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, + inode_t *inode, int32_t cmd, struct gf_flock *flock, + loc_t *loc, fd_t *fd, dict_t *xdata) { - int32_t op_ret = -1; - int32_t op_errno = 0; - int ret = -1; - GF_UNUSED int dict_ret = -1; - int can_block = 0; - pl_inode_t * pinode = NULL; - pl_inode_lock_t * reqlock = NULL; - pl_dom_list_t * dom = NULL; - char *res = NULL; - char *res1 = NULL; - char *conn_id = NULL; - pl_ctx_t *ctx = NULL; - - if (xdata) - dict_ret = dict_get_str (xdata, "connection-id", &conn_id); - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (inode, unwind); - VALIDATE_OR_GOTO (flock, unwind); - - if ((flock->l_start < 0) || (flock->l_len < 0)) { - op_errno = EINVAL; - goto unwind; - } - - op_errno = _pl_convert_volume_for_special_range (flock, volume, &res); - if (op_errno) - goto unwind; - if (res) - volume = res; - - pl_trace_in (this, frame, fd, loc, cmd, flock, volume); - - if (frame->root->client) { - ctx = pl_ctx_get (frame->root->client, this); - if (!ctx) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed"); - goto unwind; - } - } - - pinode = pl_inode_get (this, inode); - if (!pinode) { - op_errno = ENOMEM; - goto unwind; + int32_t op_ret = -1; + int32_t op_errno = 0; + int ret = -1; + GF_UNUSED int dict_ret = -1; + int can_block = 0; + pl_inode_t *pinode = NULL; + pl_inode_lock_t *reqlock = NULL; + pl_dom_list_t *dom = NULL; + char *res = NULL; + char *res1 = NULL; + char *conn_id = NULL; + pl_ctx_t *ctx = NULL; + + if (xdata) + dict_ret = dict_get_str(xdata, "connection-id", &conn_id); + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(inode, unwind); + VALIDATE_OR_GOTO(flock, unwind); + + if ((flock->l_start < 0) || (flock->l_len < 0)) { + op_errno = EINVAL; + goto unwind; + } + + op_errno = _pl_convert_volume_for_special_range(flock, volume, &res); + if (op_errno) + goto unwind; + if (res) + volume = res; + + pl_trace_in(this, frame, fd, loc, cmd, flock, volume); + + if (frame->root->client) { + ctx = pl_ctx_get(frame->root->client, this); + if (!ctx) { + op_errno = ENOMEM; + gf_log(this->name, GF_LOG_INFO, "pl_ctx_get() failed"); + goto unwind; } - - dom = get_domain (pinode, volume); - if (!dom) { - op_errno = ENOMEM; - goto unwind; - } - - reqlock = new_inode_lock (flock, frame->root->client, frame->root->pid, - frame, this, dom->domain, conn_id); - - if (!reqlock) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - - - switch (cmd) { + } + + pinode = pl_inode_get(this, inode); + if (!pinode) { + op_errno = ENOMEM; + goto unwind; + } + + dom = get_domain(pinode, volume); + if (!dom) { + op_errno = ENOMEM; + goto unwind; + } + + reqlock = new_inode_lock(flock, frame->root->client, frame->root->pid, + frame, this, dom->domain, conn_id); + + if (!reqlock) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + switch (cmd) { case F_SETLKW: - can_block = 1; + can_block = 1; - /* fall through */ + /* fall through */ case F_SETLK: - memcpy (&reqlock->user_flock, flock, sizeof (struct gf_flock)); - ret = pl_inode_setlk (this, ctx, pinode, reqlock, can_block, - dom, inode); - - if (ret < 0) { - if ((can_block) && (F_UNLCK != flock->l_type)) { - pl_trace_block (this, frame, fd, loc, - cmd, flock, volume); - goto out; - } - gf_log (this->name, GF_LOG_TRACE, "returning EAGAIN"); - op_errno = -ret; - goto unwind; + memcpy(&reqlock->user_flock, flock, sizeof(struct gf_flock)); + ret = pl_inode_setlk(this, ctx, pinode, reqlock, can_block, dom, + inode); + + if (ret < 0) { + if ((can_block) && (F_UNLCK != flock->l_type)) { + pl_trace_block(this, frame, fd, loc, cmd, flock, volume); + goto out; } - break; + gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN"); + op_errno = -ret; + goto unwind; + } + break; default: - op_errno = ENOTSUP; - gf_log (this->name, GF_LOG_DEBUG, - "Lock command F_GETLK not supported for [f]inodelk " - "(cmd=%d)", - cmd); - goto unwind; - } + op_errno = ENOTSUP; + gf_log(this->name, GF_LOG_DEBUG, + "Lock command F_GETLK not supported for [f]inodelk " + "(cmd=%d)", + cmd); + goto unwind; + } - op_ret = 0; + op_ret = 0; unwind: - if (flock != NULL) - pl_trace_out (this, frame, fd, loc, cmd, flock, op_ret, - op_errno, volume); + if (flock != NULL) + pl_trace_out(this, frame, fd, loc, cmd, flock, op_ret, op_errno, + volume); - STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, NULL); + STACK_UNWIND_STRICT(inodelk, frame, op_ret, op_errno, NULL); out: - GF_FREE (res); - GF_FREE (res1); - return 0; + GF_FREE(res); + GF_FREE(res1); + return 0; } int -pl_inodelk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock, - dict_t *xdata) +pl_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, + int32_t cmd, struct gf_flock *flock, dict_t *xdata) { - pl_common_inodelk (frame, this, volume, loc->inode, cmd, flock, - loc, NULL, xdata); + pl_common_inodelk(frame, this, volume, loc->inode, cmd, flock, loc, NULL, + xdata); - return 0; + return 0; } int -pl_finodelk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock, - dict_t *xdata) +pl_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + int32_t cmd, struct gf_flock *flock, dict_t *xdata) { - pl_common_inodelk (frame, this, volume, fd->inode, cmd, flock, - NULL, fd, xdata); - - return 0; + pl_common_inodelk(frame, this, volume, fd->inode, cmd, flock, NULL, fd, + xdata); + return 0; } static int32_t -__get_inodelk_dom_count (pl_dom_list_t *dom) +__get_inodelk_dom_count(pl_dom_list_t *dom) { - pl_inode_lock_t *lock = NULL; - int32_t count = 0; - - list_for_each_entry (lock, &dom->inodelk_list, list) { - count++; - } - list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) { - count++; - } - return count; + pl_inode_lock_t *lock = NULL; + int32_t count = 0; + + list_for_each_entry(lock, &dom->inodelk_list, list) { count++; } + list_for_each_entry(lock, &dom->blocked_inodelks, blocked_locks) + { + count++; + } + return count; } /* Returns the no. of locks (blocked/granted) held on a given domain name * If @domname is NULL, returns the no. of locks in all the domains present. * If @domname is non-NULL and non-existent, returns 0 */ int32_t -__get_inodelk_count (xlator_t *this, pl_inode_t *pl_inode, char *domname) +__get_inodelk_count(xlator_t *this, pl_inode_t *pl_inode, char *domname) { - int32_t count = 0; - pl_dom_list_t *dom = NULL; - - list_for_each_entry (dom, &pl_inode->dom_list, inode_list) { - if (domname) { - if (strcmp (domname, dom->domain) == 0) { - count = __get_inodelk_dom_count (dom); - goto out; - } - - } else { - /* Counting locks from all domains */ - count += __get_inodelk_dom_count (dom); + int32_t count = 0; + pl_dom_list_t *dom = NULL; + + list_for_each_entry(dom, &pl_inode->dom_list, inode_list) + { + if (domname) { + if (strcmp(domname, dom->domain) == 0) { + count = __get_inodelk_dom_count(dom); + goto out; + } - } + } else { + /* Counting locks from all domains */ + count += __get_inodelk_dom_count(dom); } + } out: - return count; + return count; } int32_t -get_inodelk_count (xlator_t *this, inode_t *inode, char *domname) +get_inodelk_count(xlator_t *this, inode_t *inode, char *domname) { - pl_inode_t *pl_inode = NULL; - uint64_t tmp_pl_inode = 0; - int ret = 0; - int32_t count = 0; + pl_inode_t *pl_inode = NULL; + uint64_t tmp_pl_inode = 0; + int ret = 0; + int32_t count = 0; - ret = inode_ctx_get (inode, this, &tmp_pl_inode); - if (ret != 0) { - goto out; - } + ret = inode_ctx_get(inode, this, &tmp_pl_inode); + if (ret != 0) { + goto out; + } - pl_inode = (pl_inode_t *)(long) tmp_pl_inode; + pl_inode = (pl_inode_t *)(long)tmp_pl_inode; - pthread_mutex_lock (&pl_inode->mutex); - { - count = __get_inodelk_count (this, pl_inode, domname); - } - pthread_mutex_unlock (&pl_inode->mutex); + pthread_mutex_lock(&pl_inode->mutex); + { + count = __get_inodelk_count(this, pl_inode, domname); + } + pthread_mutex_unlock(&pl_inode->mutex); out: - return count; + return count; } |