diff options
Diffstat (limited to 'libglusterfs/src/glusterfs/stack.h')
| -rw-r--r-- | libglusterfs/src/glusterfs/stack.h | 547 | 
1 files changed, 547 insertions, 0 deletions
diff --git a/libglusterfs/src/glusterfs/stack.h b/libglusterfs/src/glusterfs/stack.h new file mode 100644 index 00000000000..17585508a22 --- /dev/null +++ b/libglusterfs/src/glusterfs/stack.h @@ -0,0 +1,547 @@ +/* +  Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> +  This file is part of GlusterFS. + +  This file is licensed to you under your choice of the GNU Lesser +  General Public License, version 3 or any later version (LGPLv3 or +  later), or the GNU General Public License, version 2 (GPLv2), in all +  cases as published by the Free Software Foundation. +*/ + +/* +  This file defines MACROS and static inlines used to emulate a function +  call over asynchronous communication with remote server +*/ + +#ifndef _STACK_H +#define _STACK_H + +struct _call_stack; +typedef struct _call_stack call_stack_t; +struct _call_frame; +typedef struct _call_frame call_frame_t; +struct call_pool; +typedef struct call_pool call_pool_t; + +#include <sys/time.h> + +#include "glusterfs/xlator.h" +#include "glusterfs/dict.h" +#include "glusterfs/list.h" +#include "glusterfs/common-utils.h" +#include "glusterfs/lkowner.h" +#include "glusterfs/client_t.h" +#include "glusterfs/libglusterfs-messages.h" +#include "glusterfs/timespec.h" + +#define NFS_PID 1 +#define LOW_PRIO_PROC_PID -1 + +#define STACK_ERR_XL_NAME(stack) (stack->err_xl ? stack->err_xl->name : "-") +#define STACK_CLIENT_NAME(stack)                                               \ +    (stack->client ? stack->client->client_uid : "-") + +typedef int32_t (*ret_fn_t)(call_frame_t *frame, call_frame_t *prev_frame, +                            xlator_t *this, int32_t op_ret, int32_t op_errno, +                            ...); + +struct call_pool { +    union { +        struct list_head all_frames; +        struct { +            call_stack_t *next_call; +            call_stack_t *prev_call; +        } all_stacks; +    }; +    int64_t cnt; +    gf_atomic_t total_count; +    gf_lock_t lock; +    struct mem_pool *frame_mem_pool; +    struct mem_pool *stack_mem_pool; +}; + +struct _call_frame { +    call_stack_t *root;   /* stack root */ +    call_frame_t *parent; /* previous BP */ +    struct list_head frames; +    void *local;    /* local variables */ +    xlator_t *this; /* implicit object */ +    ret_fn_t ret;   /* op_return address */ +    int32_t ref_count; +    gf_lock_t lock; +    void *cookie; /* unique cookie */ +    gf_boolean_t complete; + +    glusterfs_fop_t op; +    struct timespec begin; /* when this frame was created */ +    struct timespec end;   /* when this frame completed */ +    const char *wind_from; +    const char *wind_to; +    const char *unwind_from; +    const char *unwind_to; +}; + +struct _ns_info { +    uint32_t hash;      /* Hash of the namespace from SuperFastHash */ +    gf_boolean_t found; /* Set to true if we found a namespace */ +}; + +typedef struct _ns_info ns_info_t; + +#define SMALL_GROUP_COUNT 128 + +struct _call_stack { +    union { +        struct list_head all_frames; +        struct { +            call_stack_t *next_call; +            call_stack_t *prev_call; +        }; +    }; +    call_pool_t *pool; +    gf_lock_t stack_lock; +    client_t *client; +    uint64_t unique; +    void *state; /* pointer to request state */ +    uid_t uid; +    gid_t gid; +    pid_t pid; +    char identifier[UNIX_PATH_MAX]; +    uint16_t ngrps; +    uint32_t groups_small[SMALL_GROUP_COUNT]; +    uint32_t *groups_large; +    uint32_t *groups; +    gf_lkowner_t lk_owner; +    glusterfs_ctx_t *ctx; + +    struct list_head myframes; /* List of call_frame_t that go +                                  to make the call stack */ + +    int32_t op; +    int8_t type; +    struct timespec tv; +    xlator_t *err_xl; +    int32_t error; + +    uint32_t flags;        /* use it wisely, think of it as a mechanism to +                              send information over the wire too */ +    struct timespec ctime; /* timestamp, most probably set at +                              creation of stack. */ + +    ns_info_t ns_info; +}; + +/* call_stack flags field users */ +#define MDATA_CTIME (1 << 0) +#define MDATA_MTIME (1 << 1) +#define MDATA_ATIME (1 << 2) +#define MDATA_PAR_CTIME (1 << 3) +#define MDATA_PAR_MTIME (1 << 4) +#define MDATA_PAR_ATIME (1 << 5) + +#define frame_set_uid_gid(frm, u, g)                                           \ +    do {                                                                       \ +        if (frm) {                                                             \ +            (frm)->root->uid = u;                                              \ +            (frm)->root->gid = g;                                              \ +            (frm)->root->ngrps = 0;                                            \ +        }                                                                      \ +    } while (0); + +struct xlator_fops; +void +gf_update_latency(call_frame_t *frame); + +static inline void +FRAME_DESTROY(call_frame_t *frame) +{ +    void *local = NULL; + +    if (frame->root->ctx->measure_latency) +        gf_update_latency(frame); + +    list_del_init(&frame->frames); +    if (frame->local) { +        local = frame->local; +        frame->local = NULL; +    } + +    LOCK_DESTROY(&frame->lock); +    mem_put(frame); + +    if (local) +        mem_put(local); +} + +static inline void +STACK_DESTROY(call_stack_t *stack) +{ +    call_frame_t *frame = NULL; +    call_frame_t *tmp = NULL; + +    LOCK(&stack->pool->lock); +    { +        list_del_init(&stack->all_frames); +        stack->pool->cnt--; +    } +    UNLOCK(&stack->pool->lock); + +    LOCK_DESTROY(&stack->stack_lock); + +    list_for_each_entry_safe(frame, tmp, &stack->myframes, frames) +    { +        FRAME_DESTROY(frame); +    } + +    GF_FREE(stack->groups_large); + +    mem_put(stack); +} + +static inline void +STACK_RESET(call_stack_t *stack) +{ +    call_frame_t *frame = NULL; +    call_frame_t *tmp = NULL; +    call_frame_t *last = NULL; +    struct list_head toreset = {0}; + +    INIT_LIST_HEAD(&toreset); + +    /* We acquire call_pool->lock only to remove the frames from this stack +     * to preserve atomicity. This synchronizes across concurrent requests +     * like statedump, STACK_DESTROY etc. */ + +    LOCK(&stack->pool->lock); +    { +        last = list_last_entry(&stack->myframes, call_frame_t, frames); +        list_del_init(&last->frames); +        list_splice_init(&stack->myframes, &toreset); +        list_add(&last->frames, &stack->myframes); +    } +    UNLOCK(&stack->pool->lock); + +    list_for_each_entry_safe(frame, tmp, &toreset, frames) +    { +        FRAME_DESTROY(frame); +    } +} + +#define FRAME_SU_DO(frm, local_type)                                           \ +    do {                                                                       \ +        local_type *__local = (frm)->local;                                    \ +        __local->uid = frm->root->uid;                                         \ +        __local->gid = frm->root->gid;                                         \ +        __local->pid = frm->root->pid;                                         \ +        frm->root->uid = 0;                                                    \ +        frm->root->gid = 0;                                                    \ +        frm->root->pid = GF_CLIENT_PID_NO_ROOT_SQUASH;                         \ +    } while (0); + +#define FRAME_SU_UNDO(frm, local_type)                                         \ +    do {                                                                       \ +        local_type *__local = (frm)->local;                                    \ +        frm->root->uid = __local->uid;                                         \ +        frm->root->gid = __local->gid;                                         \ +        frm->root->pid = __local->pid;                                         \ +    } while (0); + +/* NOTE: make sure to keep this as an macro, mainly because, we need 'fn' +   field here to be the proper fn ptr, so its address is valid entry in +   'xlator_fops' struct. +   To understand this, check the `xlator.h:struct xlator_fops`, and then +   see a STACK_WIND call, which generally calls `subvol->fops->fop`, so +   the address offset should give the index */ + +/* +1 is required as 0 means NULL fop, and we don't have a variable for it */ +#define get_fop_index_from_fn(xl, fn)                                          \ +    (1 + (((long)&(fn) - (long)&((xl)->fops->stat)) / sizeof(void *))) + +/* NOTE: the above reason holds good here too. But notice that we are getting + the base address of the 'stat' fop, which is the first entry in the fop + structure. All we need to do is move as much as 'idx' fields, and get the + actual pointer from that field. */ + +static inline void * +get_the_pt_fop(void *base_fop, int fop_idx) +{ +    void *target_addr = (base_fop + ((fop_idx - 1) * sizeof(void *))); +    /* all below type casting is for not getting warning. */ +    return (void *)*(unsigned long *)target_addr; +} + +/* make a call without switching frames */ +#define STACK_WIND_TAIL(frame, obj, fn, params...)                             \ +    do {                                                                       \ +        xlator_t *old_THIS = NULL;                                             \ +        xlator_t *next_xl = obj;                                               \ +        typeof(fn) next_xl_fn = fn;                                            \ +        int opn = get_fop_index_from_fn((next_xl), (fn));                      \ +                                                                               \ +        frame->this = next_xl;                                                 \ +        frame->wind_to = #fn;                                                  \ +        old_THIS = THIS;                                                       \ +        THIS = next_xl;                                                        \ +        gf_msg_trace("stack-trace", 0,                                         \ +                     "stack-address: %p, "                                     \ +                     "winding from %s to %s",                                  \ +                     frame->root, old_THIS->name, THIS->name);                 \ +        /* Need to capture counts at leaf node */                              \ +        if (!next_xl->pass_through && !next_xl->children) {                    \ +            GF_ATOMIC_INC(next_xl->stats.total.metrics[opn].fop);              \ +            GF_ATOMIC_INC(next_xl->stats.interval.metrics[opn].fop);           \ +            GF_ATOMIC_INC(next_xl->stats.total.count);                         \ +            GF_ATOMIC_INC(next_xl->stats.interval.count);                      \ +        }                                                                      \ +                                                                               \ +        if (next_xl->pass_through) {                                           \ +            next_xl_fn = get_the_pt_fop(&next_xl->pass_through_fops->stat,     \ +                                        opn);                                  \ +        }                                                                      \ +        next_xl_fn(frame, next_xl, params);                                    \ +        THIS = old_THIS;                                                       \ +    } while (0) + +/* make a call */ +#define STACK_WIND(frame, rfn, obj, fn, params...)                             \ +    STACK_WIND_COMMON(frame, rfn, 0, NULL, obj, fn, params) + +/* make a call with a cookie */ +#define STACK_WIND_COOKIE(frame, rfn, cky, obj, fn, params...)                 \ +    STACK_WIND_COMMON(frame, rfn, 1, cky, obj, fn, params) + +/* Cookie passed as the argument can be NULL (ptr) or 0 (int). Hence we +   have to have a mechanism to separate out the two STACK_WIND formats. +   Needed a common macro, as other than for cookie, all the other code +   is common across. + */ +#define STACK_WIND_COMMON(frame, rfn, has_cookie, cky, obj, fn, params...)     \ +    do {                                                                       \ +        call_frame_t *_new = NULL;                                             \ +        xlator_t *old_THIS = NULL;                                             \ +        typeof(fn) next_xl_fn = fn;                                            \ +                                                                               \ +        _new = mem_get0(frame->root->pool->frame_mem_pool);                    \ +        if (!_new) {                                                           \ +            break;                                                             \ +        }                                                                      \ +        typeof(fn##_cbk) tmp_cbk = rfn;                                        \ +        _new->root = frame->root;                                              \ +        _new->this = obj;                                                      \ +        _new->ret = (ret_fn_t)tmp_cbk;                                         \ +        _new->parent = frame;                                                  \ +        /* (void *) is required for avoiding gcc warning */                    \ +        _new->cookie = ((has_cookie == 1) ? (void *)(cky) : (void *)_new);     \ +        _new->wind_from = __FUNCTION__;                                        \ +        _new->wind_to = #fn;                                                   \ +        _new->unwind_to = #rfn;                                                \ +        LOCK_INIT(&_new->lock);                                                \ +        LOCK(&frame->root->stack_lock);                                        \ +        {                                                                      \ +            list_add(&_new->frames, &frame->root->myframes);                   \ +            frame->ref_count++;                                                \ +        }                                                                      \ +        UNLOCK(&frame->root->stack_lock);                                      \ +        fn##_cbk = rfn;                                                        \ +        old_THIS = THIS;                                                       \ +        THIS = obj;                                                            \ +        gf_msg_trace("stack-trace", 0,                                         \ +                     "stack-address: %p, "                                     \ +                     "winding from %s to %s",                                  \ +                     frame->root, old_THIS->name, THIS->name);                 \ +        if (obj->ctx->measure_latency)                                         \ +            timespec_now(&_new->begin);                                        \ +        _new->op = get_fop_index_from_fn((_new->this), (fn));                  \ +        if (!obj->pass_through) {                                              \ +            GF_ATOMIC_INC(obj->stats.total.metrics[_new->op].fop);             \ +            GF_ATOMIC_INC(obj->stats.interval.metrics[_new->op].fop);          \ +            GF_ATOMIC_INC(obj->stats.total.count);                             \ +            GF_ATOMIC_INC(obj->stats.interval.count);                          \ +        } else {                                                               \ +            /* we want to get to the actual fop to call */                     \ +            next_xl_fn = get_the_pt_fop(&obj->pass_through_fops->stat,         \ +                                        _new->op);                             \ +        }                                                                      \ +        next_xl_fn(_new, obj, params);                                         \ +        THIS = old_THIS;                                                       \ +    } while (0) + +#define STACK_UNWIND STACK_UNWIND_STRICT + +/* return from function in type-safe way */ +#define STACK_UNWIND_STRICT(fop, frame, op_ret, op_errno, params...)           \ +    do {                                                                       \ +        fop_##fop##_cbk_t fn = NULL;                                           \ +        call_frame_t *_parent = NULL;                                          \ +        xlator_t *old_THIS = NULL;                                             \ +                                                                               \ +        if (!frame) {                                                          \ +            gf_msg("stack", GF_LOG_CRITICAL, 0, LG_MSG_FRAME_ERROR, "!frame"); \ +            break;                                                             \ +        }                                                                      \ +        if ((op_ret) < 0) {                                                    \ +            gf_msg_debug("stack-trace", op_errno,                              \ +                         "stack-address: %p, "                                 \ +                         "%s returned %d error: %s",                           \ +                         frame->root, THIS->name, (int32_t)(op_ret),           \ +                         strerror(op_errno));                                  \ +        } else {                                                               \ +            gf_msg_trace("stack-trace", 0,                                     \ +                         "stack-address: %p, "                                 \ +                         "%s returned %d",                                     \ +                         frame->root, THIS->name, (int32_t)(op_ret));          \ +        }                                                                      \ +        fn = (fop_##fop##_cbk_t)frame->ret;                                    \ +        _parent = frame->parent;                                               \ +        LOCK(&frame->root->stack_lock);                                        \ +        {                                                                      \ +            _parent->ref_count--;                                              \ +            if ((op_ret) < 0 && (op_errno) != frame->root->error) {            \ +                frame->root->err_xl = frame->this;                             \ +                frame->root->error = (op_errno);                               \ +            } else if ((op_ret) == 0) {                                        \ +                frame->root->err_xl = NULL;                                    \ +                frame->root->error = 0;                                        \ +            }                                                                  \ +        }                                                                      \ +        UNLOCK(&frame->root->stack_lock);                                      \ +        old_THIS = THIS;                                                       \ +        THIS = _parent->this;                                                  \ +        frame->complete = _gf_true;                                            \ +        frame->unwind_from = __FUNCTION__;                                     \ +        if (frame->this->ctx->measure_latency) {                               \ +            timespec_now(&frame->end);                                         \ +            /* required for top most xlator */                                 \ +            if (_parent->ret == NULL)                                          \ +                timespec_now(&_parent->end);                                   \ +        }                                                                      \ +        if (op_ret < 0) {                                                      \ +            GF_ATOMIC_INC(THIS->stats.total.metrics[frame->op].cbk);           \ +            GF_ATOMIC_INC(THIS->stats.interval.metrics[frame->op].cbk);        \ +        }                                                                      \ +        fn(_parent, frame->cookie, _parent->this, op_ret, op_errno, params);   \ +        THIS = old_THIS;                                                       \ +    } while (0) + +static inline int +call_stack_alloc_groups(call_stack_t *stack, int ngrps) +{ +    if (ngrps <= SMALL_GROUP_COUNT) { +        stack->groups = stack->groups_small; +    } else { +        stack->groups_large = GF_CALLOC(ngrps, sizeof(gid_t), +                                        gf_common_mt_groups_t); +        if (!stack->groups_large) +            return -1; +        stack->groups = stack->groups_large; +    } + +    stack->ngrps = ngrps; + +    return 0; +} + +static inline int +call_frames_count(call_stack_t *call_stack) +{ +    call_frame_t *pos; +    int32_t count = 0; + +    if (!call_stack) +        return count; + +    list_for_each_entry(pos, &call_stack->myframes, frames) count++; + +    return count; +} + +static inline call_frame_t * +copy_frame(call_frame_t *frame) +{ +    call_stack_t *newstack = NULL; +    call_stack_t *oldstack = NULL; +    call_frame_t *newframe = NULL; + +    if (!frame) { +        return NULL; +    } + +    newstack = mem_get0(frame->root->pool->stack_mem_pool); +    if (newstack == NULL) { +        return NULL; +    } + +    INIT_LIST_HEAD(&newstack->myframes); + +    newframe = mem_get0(frame->root->pool->frame_mem_pool); +    if (!newframe) { +        mem_put(newstack); +        return NULL; +    } + +    newframe->this = frame->this; +    newframe->root = newstack; +    INIT_LIST_HEAD(&newframe->frames); +    list_add(&newframe->frames, &newstack->myframes); + +    oldstack = frame->root; + +    newstack->uid = oldstack->uid; +    newstack->gid = oldstack->gid; +    newstack->pid = oldstack->pid; +    newstack->op = oldstack->op; +    newstack->type = oldstack->type; +    newstack->ctime = oldstack->ctime; +    newstack->flags = oldstack->flags; +    if (call_stack_alloc_groups(newstack, oldstack->ngrps) != 0) { +        mem_put(newstack); +        return NULL; +    } +    if (!oldstack->groups) { +        gf_msg_debug("stack", EINVAL, "groups is null (ngrps: %d)", +                     oldstack->ngrps); +        /* Considering 'groups' is NULL, set ngrps to 0 */ +        oldstack->ngrps = 0; + +        if (oldstack->groups_large) +            oldstack->groups = oldstack->groups_large; +        else +            oldstack->groups = oldstack->groups_small; +    } +    newstack->ngrps = oldstack->ngrps; +    memcpy(newstack->groups, oldstack->groups, sizeof(gid_t) * oldstack->ngrps); +    newstack->unique = oldstack->unique; +    newstack->pool = oldstack->pool; +    newstack->lk_owner = oldstack->lk_owner; +    newstack->ctx = oldstack->ctx; + +    if (newstack->ctx->measure_latency) { +        timespec_now(&newstack->tv); +        memcpy(&newframe->begin, &newstack->tv, sizeof(newstack->tv)); +    } + +    LOCK_INIT(&newframe->lock); +    LOCK_INIT(&newstack->stack_lock); + +    LOCK(&oldstack->pool->lock); +    { +        list_add(&newstack->all_frames, &oldstack->all_frames); +        newstack->pool->cnt++; +    } +    UNLOCK(&oldstack->pool->lock); +    GF_ATOMIC_INC(newstack->pool->total_count); + +    return newframe; +} + +void +call_stack_set_groups(call_stack_t *stack, int ngrps, gid_t **groupbuf_p); +void +gf_proc_dump_pending_frames(call_pool_t *call_pool); +void +gf_proc_dump_pending_frames_to_dict(call_pool_t *call_pool, dict_t *dict); +call_frame_t * +create_frame(xlator_t *xl, call_pool_t *pool); +gf_boolean_t +__is_fuse_call(call_frame_t *frame); +#endif /* _STACK_H */  | 
