diff options
Diffstat (limited to 'xlators/features/upcall/src/upcall-internal.c')
| -rw-r--r-- | xlators/features/upcall/src/upcall-internal.c | 689 |
1 files changed, 689 insertions, 0 deletions
diff --git a/xlators/features/upcall/src/upcall-internal.c b/xlators/features/upcall/src/upcall-internal.c new file mode 100644 index 00000000000..c641bd6f432 --- /dev/null +++ b/xlators/features/upcall/src/upcall-internal.c @@ -0,0 +1,689 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <unistd.h> +#include <fcntl.h> +#include <limits.h> + +#include <glusterfs/glusterfs.h> +#include <glusterfs/compat.h> +#include <glusterfs/xlator.h> +#include <glusterfs/logging.h> +#include <glusterfs/common-utils.h> + +#include <glusterfs/statedump.h> +#include <glusterfs/syncop.h> + +#include "upcall.h" +#include "upcall-mem-types.h" +#include "glusterfs3-xdr.h" +#include "protocol-common.h" +#include <glusterfs/defaults.h> + +/* + * Check if any of the upcall options are enabled: + * - cache_invalidation + */ +gf_boolean_t +is_upcall_enabled(xlator_t *this) +{ + upcall_private_t *priv = NULL; + + if (this->private) { + priv = (upcall_private_t *)this->private; + return priv->cache_invalidation_enabled; + } + + return _gf_false; +} + +/* + * Get the cache_invalidation_timeout + */ +static int32_t +get_cache_invalidation_timeout(xlator_t *this) +{ + upcall_private_t *priv = NULL; + + if (this->private) { + priv = (upcall_private_t *)this->private; + return priv->cache_invalidation_timeout; + } + + return 0; +} + +static upcall_client_t * +__add_upcall_client(call_frame_t *frame, client_t *client, + upcall_inode_ctx_t *up_inode_ctx, time_t now) +{ + upcall_client_t *up_client_entry = GF_MALLOC( + sizeof(*up_client_entry), gf_upcall_mt_upcall_client_entry_t); + if (!up_client_entry) { + gf_msg("upcall", GF_LOG_WARNING, 0, UPCALL_MSG_NO_MEMORY, + "Memory allocation failed"); + return NULL; + } + INIT_LIST_HEAD(&up_client_entry->client_list); + up_client_entry->client_uid = gf_strdup(client->client_uid); + up_client_entry->access_time = now; + up_client_entry->expire_time_attr = get_cache_invalidation_timeout( + frame->this); + + list_add_tail(&up_client_entry->client_list, &up_inode_ctx->client_list); + + gf_log(THIS->name, GF_LOG_DEBUG, "upcall_entry_t client added - %s", + up_client_entry->client_uid); + + return up_client_entry; +} + +static int +__upcall_inode_ctx_set(inode_t *inode, xlator_t *this) +{ + upcall_inode_ctx_t *inode_ctx = NULL; + upcall_private_t *priv = NULL; + int ret = -1; + uint64_t ctx = 0; + + priv = this->private; + GF_ASSERT(priv); + + ret = __inode_ctx_get(inode, this, &ctx); + + if (!ret) + goto out; + + inode_ctx = GF_MALLOC(sizeof(upcall_inode_ctx_t), + gf_upcall_mt_upcall_inode_ctx_t); + + if (!inode_ctx) { + ret = -ENOMEM; + goto out; + } + + pthread_mutex_init(&inode_ctx->client_list_lock, NULL); + INIT_LIST_HEAD(&inode_ctx->inode_ctx_list); + INIT_LIST_HEAD(&inode_ctx->client_list); + inode_ctx->destroy = 0; + gf_uuid_copy(inode_ctx->gfid, inode->gfid); + + ctx = (long)inode_ctx; + ret = __inode_ctx_set(inode, this, &ctx); + if (ret) { + gf_log(this->name, GF_LOG_DEBUG, "failed to set inode ctx (%p)", inode); + GF_FREE(inode_ctx); + goto out; + } + + /* add this inode_ctx to the global list */ + LOCK(&priv->inode_ctx_lk); + { + list_add_tail(&inode_ctx->inode_ctx_list, &priv->inode_ctx_list); + } + UNLOCK(&priv->inode_ctx_lk); +out: + return ret; +} + +static upcall_inode_ctx_t * +__upcall_inode_ctx_get(inode_t *inode, xlator_t *this) +{ + upcall_inode_ctx_t *inode_ctx = NULL; + uint64_t ctx = 0; + int ret = 0; + + ret = __inode_ctx_get(inode, this, &ctx); + + if (ret < 0) { + ret = __upcall_inode_ctx_set(inode, this); + if (ret < 0) + goto out; + + ret = __inode_ctx_get(inode, this, &ctx); + if (ret < 0) + goto out; + } + + inode_ctx = (upcall_inode_ctx_t *)(long)(ctx); + +out: + return inode_ctx; +} + +upcall_inode_ctx_t * +upcall_inode_ctx_get(inode_t *inode, xlator_t *this) +{ + upcall_inode_ctx_t *inode_ctx = NULL; + + LOCK(&inode->lock); + { + inode_ctx = __upcall_inode_ctx_get(inode, this); + } + UNLOCK(&inode->lock); + + return inode_ctx; +} + +static int +__upcall_cleanup_client_entry(upcall_client_t *up_client) +{ + list_del_init(&up_client->client_list); + + GF_FREE(up_client->client_uid); + GF_FREE(up_client); + + return 0; +} + +static int +upcall_cleanup_expired_clients(xlator_t *this, upcall_inode_ctx_t *up_inode_ctx, + time_t now) +{ + upcall_client_t *up_client = NULL; + upcall_client_t *tmp = NULL; + int ret = -1; + time_t timeout = 0; + time_t t_expired = 0; + + timeout = get_cache_invalidation_timeout(this); + + pthread_mutex_lock(&up_inode_ctx->client_list_lock); + { + list_for_each_entry_safe(up_client, tmp, &up_inode_ctx->client_list, + client_list) + { + t_expired = now - up_client->access_time; + + if (t_expired > (2 * timeout)) { + gf_log(THIS->name, GF_LOG_TRACE, "Cleaning up client_entry(%s)", + up_client->client_uid); + + ret = __upcall_cleanup_client_entry(up_client); + + if (ret) { + gf_msg("upcall", GF_LOG_WARNING, 0, + UPCALL_MSG_INTERNAL_ERROR, + "Client entry cleanup failed (%p)", up_client); + goto out; + } + } + } + } + pthread_mutex_unlock(&up_inode_ctx->client_list_lock); + + ret = 0; +out: + return ret; +} + +/* + * Free Upcall inode_ctx client list + */ +int +__upcall_cleanup_inode_ctx_client_list(upcall_inode_ctx_t *inode_ctx) +{ + upcall_client_t *up_client = NULL; + upcall_client_t *tmp = NULL; + + list_for_each_entry_safe(up_client, tmp, &inode_ctx->client_list, + client_list) + { + __upcall_cleanup_client_entry(up_client); + } + + return 0; +} + +static void +upcall_cache_forget(xlator_t *this, inode_t *inode, + upcall_inode_ctx_t *up_inode_ctx); + +/* + * Free upcall_inode_ctx + */ +int +upcall_cleanup_inode_ctx(xlator_t *this, inode_t *inode) +{ + uint64_t ctx = 0; + upcall_inode_ctx_t *inode_ctx = NULL; + int ret = 0; + upcall_private_t *priv = NULL; + + priv = this->private; + GF_ASSERT(priv); + + ret = inode_ctx_del(inode, this, &ctx); + + if (ret < 0) { + gf_msg("upcall", GF_LOG_WARNING, 0, UPCALL_MSG_INTERNAL_ERROR, + "Failed to del upcall_inode_ctx (%p)", inode); + goto out; + } + + inode_ctx = (upcall_inode_ctx_t *)(long)ctx; + + if (inode_ctx) { + /* Invalidate all the upcall cache entries */ + upcall_cache_forget(this, inode, inode_ctx); + + /* do we really need lock? yes now reaper thread + * may also be trying to cleanup the client entries. + */ + pthread_mutex_lock(&inode_ctx->client_list_lock); + { + if (!list_empty(&inode_ctx->client_list)) { + __upcall_cleanup_inode_ctx_client_list(inode_ctx); + } + } + pthread_mutex_unlock(&inode_ctx->client_list_lock); + + /* Mark the inode_ctx to be destroyed */ + inode_ctx->destroy = 1; + gf_msg_debug("upcall", 0, "set upcall_inode_ctx (%p) to destroy mode", + inode_ctx); + } + +out: + return ret; +} + +/* + * Traverse through the list of upcall_inode_ctx(s), + * cleanup the expired client entries and destroy the ctx + * which is no longer valid and has destroy bit set. + */ +void * +upcall_reaper_thread(void *data) +{ + upcall_private_t *priv = NULL; + upcall_inode_ctx_t *inode_ctx = NULL; + upcall_inode_ctx_t *tmp = NULL; + xlator_t *this = NULL; + time_t timeout = 0; + time_t time_now; + + this = (xlator_t *)data; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + time_now = gf_time(); + while (!priv->fini) { + list_for_each_entry_safe(inode_ctx, tmp, &priv->inode_ctx_list, + inode_ctx_list) + { + /* cleanup expired clients */ + upcall_cleanup_expired_clients(this, inode_ctx, time_now); + + if (!inode_ctx->destroy) { + continue; + } + + /* client list would have been cleaned up*/ + gf_msg_debug("upcall", 0, "Freeing upcall_inode_ctx (%p)", + inode_ctx); + LOCK(&priv->inode_ctx_lk); + { + list_del_init(&inode_ctx->inode_ctx_list); + pthread_mutex_destroy(&inode_ctx->client_list_lock); + } + UNLOCK(&priv->inode_ctx_lk); + GF_FREE(inode_ctx); + inode_ctx = NULL; + } + + /* don't do a very busy loop */ + timeout = get_cache_invalidation_timeout(this); + sleep(timeout / 2); + time_now = gf_time(); + } + + return NULL; +} + +/* + * Initialize upcall reaper thread. + */ +int +upcall_reaper_thread_init(xlator_t *this) +{ + upcall_private_t *priv = NULL; + int ret = -1; + + priv = this->private; + GF_ASSERT(priv); + + ret = gf_thread_create(&priv->reaper_thr, NULL, upcall_reaper_thread, this, + "upreaper"); + + return ret; +} + +int +up_compare_afr_xattr(dict_t *d, char *k, data_t *v, void *tmp) +{ + dict_t *dict = tmp; + + if (!strncmp(k, AFR_XATTR_PREFIX, SLEN(AFR_XATTR_PREFIX)) && + (!is_data_equal(v, dict_get(dict, k)))) + return -1; + + return 0; +} + +static void +up_filter_afr_xattr(dict_t *xattrs, char *xattr, data_t *v) +{ + /* Filter the afr pending xattrs, with value 0. Ideally this should + * be executed only in case of xattrop and not in set and removexattr, + * butset and remove xattr fops do not come with keys AFR_XATTR_PREFIX + */ + if (!strncmp(xattr, AFR_XATTR_PREFIX, SLEN(AFR_XATTR_PREFIX)) && + (mem_0filled(v->data, v->len) == 0)) { + dict_del(xattrs, xattr); + } + return; +} + +static gf_boolean_t +up_key_is_regd_xattr(dict_t *regd_xattrs, char *regd_xattr, data_t *v, + void *xattr) +{ + int ret = _gf_false; + char *key = xattr; + + if (fnmatch(regd_xattr, key, 0) == 0) + ret = _gf_true; + + return ret; +} + +int +up_filter_unregd_xattr(dict_t *xattrs, char *xattr, data_t *v, + void *regd_xattrs) +{ + int ret = 0; + + ret = dict_foreach_match(regd_xattrs, up_key_is_regd_xattr, xattr, + dict_null_foreach_fn, NULL); + if (ret == 0) { + /* xattr was not found in the registered xattr, hence do not + * send notification for its change + */ + dict_del(xattrs, xattr); + goto out; + } + up_filter_afr_xattr(xattrs, xattr, v); +out: + return 0; +} + +int +up_filter_xattr(dict_t *xattr, dict_t *regd_xattrs) +{ + int ret = 0; + + ret = dict_foreach(xattr, up_filter_unregd_xattr, regd_xattrs); + + return ret; +} + +static void +upcall_client_cache_invalidate(xlator_t *this, uuid_t gfid, + upcall_client_t *up_client_entry, uint32_t flags, + struct iatt *stbuf, struct iatt *p_stbuf, + struct iatt *oldp_stbuf, dict_t *xattr, + time_t now); + +gf_boolean_t +up_invalidate_needed(dict_t *xattrs) +{ + if (dict_key_count(xattrs) == 0) { + gf_msg_trace("upcall", 0, + "None of xattrs requested for" + " invalidation, were changed. Nothing to " + "invalidate"); + return _gf_false; + } + + return _gf_true; +} + +/* + * Given a client, first fetch upcall_entry_t from the inode_ctx client list. + * Later traverse through the client list of that upcall entry. If this client + * is not present in the list, create one client entry with this client info. + * Also check if there are other clients which need to be notified of this + * op. If yes send notify calls to them. + * + * Since sending notifications for cache_invalidation is a best effort, + * any errors during the process are logged and ignored. + */ +void +upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client, + inode_t *inode, uint32_t flags, struct iatt *stbuf, + struct iatt *p_stbuf, struct iatt *oldp_stbuf, + dict_t *xattr) +{ + upcall_client_t *up_client_entry = NULL; + upcall_client_t *tmp = NULL; + upcall_inode_ctx_t *up_inode_ctx = NULL; + gf_boolean_t found = _gf_false; + time_t time_now; + inode_t *linked_inode = NULL; + + if (!is_upcall_enabled(this)) + return; + + /* server-side generated fops like quota/marker will not have any + * client associated with them. Ignore such fops. + */ + if (!client) { + gf_msg_debug("upcall", 0, "Internal fop - client NULL"); + return; + } + + /* For nameless LOOKUPs, inode created shall always be + * invalid. Hence check if there is any already linked inode. + * If yes, update the inode_ctx of that valid inode + */ + if (inode && (inode->ia_type == IA_INVAL) && stbuf) { + linked_inode = inode_find(inode->table, stbuf->ia_gfid); + if (linked_inode) { + gf_log("upcall", GF_LOG_DEBUG, + "upcall_inode_ctx_get of linked inode (%p)", inode); + up_inode_ctx = upcall_inode_ctx_get(linked_inode, this); + } + } + + if (inode && !up_inode_ctx) + up_inode_ctx = upcall_inode_ctx_get(inode, this); + + if (!up_inode_ctx) { + gf_msg("upcall", GF_LOG_WARNING, 0, UPCALL_MSG_INTERNAL_ERROR, + "upcall_inode_ctx_get failed (%p)", inode); + return; + } + + /* In case of LOOKUP, if first time, inode created shall be + * invalid till it gets linked to inode table. Read gfid from + * the stat returned in such cases. + */ + if (gf_uuid_is_null(up_inode_ctx->gfid) && stbuf) { + /* That means inode must have been invalid when this inode_ctx + * is created. Copy the gfid value from stbuf instead. + */ + gf_uuid_copy(up_inode_ctx->gfid, stbuf->ia_gfid); + } + + if (gf_uuid_is_null(up_inode_ctx->gfid)) { + gf_msg_debug(this->name, 0, + "up_inode_ctx->gfid and " + "stbuf->ia_gfid is NULL, fop:%s", + gf_fop_list[frame->root->op]); + goto out; + } + + time_now = gf_time(); + pthread_mutex_lock(&up_inode_ctx->client_list_lock); + { + list_for_each_entry_safe(up_client_entry, tmp, + &up_inode_ctx->client_list, client_list) + { + /* Do not send UPCALL event if same client. */ + if (!strcmp(client->client_uid, up_client_entry->client_uid)) { + up_client_entry->access_time = time_now; + found = _gf_true; + continue; + } + + /* + * Ignore sending notifications in case of only UP_ATIME + */ + if (!(flags & ~(UP_ATIME))) { + if (found) + break; + else /* we still need to find current client entry*/ + continue; + } + + /* any other client */ + + /* XXX: Send notifications asynchrounously + * instead of in the I/O path - BZ 1200264 + * Also if the file is frequently accessed, set + * expire_time_attr to 0. + */ + upcall_client_cache_invalidate( + this, up_inode_ctx->gfid, up_client_entry, flags, stbuf, + p_stbuf, oldp_stbuf, xattr, time_now); + } + + if (!found) { + up_client_entry = __add_upcall_client(frame, client, up_inode_ctx, + time_now); + } + } + pthread_mutex_unlock(&up_inode_ctx->client_list_lock); +out: + /* release the ref from inode_find */ + if (linked_inode) + inode_unref(linked_inode); + return; +} + +/* + * If the upcall_client_t has recently accessed the file (i.e, within + * priv->cache_invalidation_timeout), send a upcall notification. + */ +static void +upcall_client_cache_invalidate(xlator_t *this, uuid_t gfid, + upcall_client_t *up_client_entry, uint32_t flags, + struct iatt *stbuf, struct iatt *p_stbuf, + struct iatt *oldp_stbuf, dict_t *xattr, + time_t now) +{ + struct gf_upcall up_req = { + 0, + }; + struct gf_upcall_cache_invalidation ca_req = { + 0, + }; + time_t timeout = 0; + int ret = -1; + time_t t_expired = now - up_client_entry->access_time; + + GF_VALIDATE_OR_GOTO("upcall_client_cache_invalidate", + !(gf_uuid_is_null(gfid)), out); + timeout = get_cache_invalidation_timeout(this); + + if (t_expired < timeout) { + /* Send notify call */ + up_req.client_uid = up_client_entry->client_uid; + gf_uuid_copy(up_req.gfid, gfid); + + ca_req.flags = flags; + ca_req.expire_time_attr = up_client_entry->expire_time_attr; + if (stbuf) + ca_req.stat = *stbuf; + if (p_stbuf) + ca_req.p_stat = *p_stbuf; + if (oldp_stbuf) + ca_req.oldp_stat = *oldp_stbuf; + ca_req.dict = xattr; + + up_req.data = &ca_req; + up_req.event_type = GF_UPCALL_CACHE_INVALIDATION; + + gf_log(THIS->name, GF_LOG_TRACE, + "Cache invalidation notification sent to %s", + up_client_entry->client_uid); + + /* Need to send inode flags */ + ret = this->notify(this, GF_EVENT_UPCALL, &up_req); + + /* + * notify may fail as the client could have been + * dis(re)connected. Cleanup the client entry. + */ + if (ret < 0) + __upcall_cleanup_client_entry(up_client_entry); + + } else { + gf_log(THIS->name, GF_LOG_TRACE, + "Cache invalidation notification NOT sent to %s", + up_client_entry->client_uid); + + if (t_expired > (2 * timeout)) { + /* Cleanup the entry */ + __upcall_cleanup_client_entry(up_client_entry); + } + } +out: + return; +} + +/* + * This is called during upcall_inode_ctx cleanup in case of 'inode_forget'. + * Send "UP_FORGET" to all the clients so that they invalidate their cache + * entry and do a fresh lookup next time when any I/O comes in. + */ +static void +upcall_cache_forget(xlator_t *this, inode_t *inode, + upcall_inode_ctx_t *up_inode_ctx) +{ + upcall_client_t *up_client_entry = NULL; + upcall_client_t *tmp = NULL; + uint32_t flags = UP_FORGET; + time_t time_now; + + if (!up_inode_ctx) { + return; + } + + time_now = gf_time(); + pthread_mutex_lock(&up_inode_ctx->client_list_lock); + { + list_for_each_entry_safe(up_client_entry, tmp, + &up_inode_ctx->client_list, client_list) + { + /* Set the access time to gf_time() + * to send notify */ + up_client_entry->access_time = time_now; + + upcall_client_cache_invalidate(this, up_inode_ctx->gfid, + up_client_entry, flags, NULL, NULL, + NULL, NULL, time_now); + } + } + pthread_mutex_unlock(&up_inode_ctx->client_list_lock); +} |
