diff options
| -rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot.c | 364 | ||||
| -rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot.h | 32 | ||||
| -rw-r--r-- | xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h | 1 | 
3 files changed, 271 insertions, 126 deletions
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c index 4b698fc9fa4..7a0a1b58b93 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot.c @@ -25,6 +25,18 @@  #define BR_HASH_CALC_READ_SIZE  (128 * 1024) +typedef int32_t (br_child_handler)(xlator_t *, br_child_t *); + +struct br_child_event { +        xlator_t *this; + +        br_child_t *child; + +        br_child_handler *call; + +        struct list_head list; +}; +  static int  br_find_child_index (xlator_t *this, xlator_t *child)  { @@ -49,26 +61,6 @@ out:          return index;  } -static void -br_free_children (xlator_t *this) -{ -        br_private_t *priv = NULL; -        int32_t       i    = 0; -        br_child_t   *child = NULL; - -        priv = this->private; - -        for (i = 0; i < priv->child_count; i++) { -                child = &priv->children[i]; -                mem_pool_destroy (child->timer_pool); -                list_del_init (&priv->children[i].list); -        } - -        GF_FREE (priv->children); - -        priv->children = NULL; -} -  br_child_t *  br_get_child_from_brick_path (xlator_t *this, char *brick_path)  { @@ -1085,6 +1077,16 @@ br_oneshot_signer (void *arg)          return NULL;  } +static void +br_set_child_state (br_child_t *child, br_child_state_t state) +{ +        LOCK (&child->lock); +        { +                _br_set_child_state (child, state); +        } +        UNLOCK (&child->lock); +} +  /**   * At this point a thread is spawned to crawl the filesystem (in   * tortoise pace) to sign objects that were not signed in previous run(s). @@ -1172,7 +1174,12 @@ br_enact_scrubber (xlator_t *this, br_child_t *child)                  goto error_return;          } -        ret = br_fsscan_schedule (this, child, fsscan, fsscrub); +        /* this needs to be serialized with reconfigure() */ +        pthread_mutex_lock (&priv->lock); +        { +                ret = br_fsscan_schedule (this, child, fsscan, fsscrub); +        } +        pthread_mutex_unlock (&priv->lock);          if (ret)                  goto error_return; @@ -1197,6 +1204,30 @@ br_enact_scrubber (xlator_t *this, br_child_t *child)          return -1;  } +static int32_t +br_child_enaction (xlator_t *this, br_child_t *child, br_stub_init_t *stub) +{ +        int32_t ret = -1; +        br_private_t *priv = this->private; + +        LOCK (&child->lock); +        { +                if (priv->iamscrubber) +                        ret = br_enact_scrubber (this, child); +                else +                        ret = br_enact_signer (this, child, stub); + +                if (!ret) { +                        _br_set_child_state (child, BR_CHILD_STATE_CONNECTED); +                        gf_log (this->name, GF_LOG_INFO, +                                "Connected to brick %s..", child->brick_path); +                } +        } +        UNLOCK (&child->lock); + +        return ret; +} +  /**   * This routine fetches various attributes associated with a child which   * is basically a subvolume. Attributes include brick path and the stub @@ -1204,7 +1235,7 @@ br_enact_scrubber (xlator_t *this, br_child_t *child)   * by getxattr() on a virtual key. Depending on the configuration, the   * process either acts as a signer or a scrubber.   */ -static inline int32_t +int32_t  br_brick_connect (xlator_t *this, br_child_t *child)  {          int32_t         ret      = -1; @@ -1213,14 +1244,13 @@ br_brick_connect (xlator_t *this, br_child_t *child)          struct iatt     parent   = {0, };          br_stub_init_t *stub     = NULL;          dict_t         *xattr    = NULL; -        br_private_t   *priv     = NULL;          int             op_errno = 0;          GF_VALIDATE_OR_GOTO ("bit-rot", this, out);          GF_VALIDATE_OR_GOTO (this->name, child, out);          GF_VALIDATE_OR_GOTO (this->name, this->private, out); -        priv = this->private; +        br_set_child_state (child, BR_CHILD_STATE_INITIALIZING);          loc.inode = inode_ref (child->table->root);          gf_uuid_copy (loc.gfid, loc.inode->gfid); @@ -1257,20 +1287,15 @@ br_brick_connect (xlator_t *this, br_child_t *child)          child->tv.tv_sec = ntohl (stub->timebuf[0]);          child->tv.tv_usec = ntohl (stub->timebuf[1]); -        if (priv->iamscrubber) -                ret = br_enact_scrubber (this, child); -        else -                ret = br_enact_signer (this, child, stub); - -        if (!ret) -                gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_CONNECTED_TO_BRICK, -                        "Connected to brick %s..", child->brick_path); +        ret = br_child_enaction (this, child, stub);   free_dict:          dict_unref (xattr);   wipeloc:          loc_wipe (&loc);   out: +        if (ret) +                br_set_child_state (child, BR_CHILD_STATE_CONNFAILED);          return ret;  } @@ -1285,7 +1310,8 @@ br_handle_events (void *arg)          int32_t       ret   = 0;          xlator_t     *this  = NULL;          br_private_t *priv  = NULL; -        br_child_t   *child = NULL; +        br_child_t *child = NULL; +        struct br_child_event *childev = NULL;          this = arg;          priv = this->private; @@ -1304,17 +1330,20 @@ br_handle_events (void *arg)                          while (list_empty (&priv->bricks))                                  pthread_cond_wait (&priv->cond, &priv->lock); -                        child = list_first_entry -                                          (&priv->bricks, br_child_t, list); -                        list_del_init (&child->list); +                        childev = list_first_entry +                                   (&priv->bricks, struct br_child_event, list); +                        list_del_init (&childev->list);                  }                  pthread_mutex_unlock (&priv->lock); -                ret = br_brick_connect (this, child); +                child = childev->child; +                ret = childev->call (this, child);                  if (ret)                          gf_msg (this->name, GF_LOG_ERROR, 0, -                                BRB_MSG_SUBVOL_CONNECT_FAILED, "failed to " -                                "connect to subvolume %s", child->xl->name); +                                BRB_MSG_SUBVOL_CONNECT_FAILED, +                                "callback handler for subvolume [%s] failed", +                                child->xl->name); +                GF_FREE (childev);          }          return NULL; @@ -1339,6 +1368,29 @@ mem_acct_init (xlator_t *this)          return ret;  } +static void +_br_qchild_event (xlator_t *this, br_child_t *child, br_child_handler *call) +{ +        br_private_t *priv = NULL; +        struct br_child_event *childev = NULL; + +        priv = this->private; + +        childev = GF_CALLOC (1, sizeof (*childev), gf_br_mt_br_child_event_t); +        if (!childev) { +                gf_log (this->name, GF_LOG_ERROR, "Event unhandled for " +                        "child.. [Brick: %s]", child->xl->name); +                return; +        } + +        INIT_LIST_HEAD (&childev->list); +        childev->this  = this; +        childev->child = child; +        childev->call  = call; + +        list_add_tail (&childev->list, &priv->bricks); +} +  int  notify (xlator_t *this, int32_t event, void *data, ...)  { @@ -1368,14 +1420,14 @@ notify (xlator_t *this, int32_t event, void *data, ...)                          child = &priv->children[idx];                          if (child->child_up == 1)                                  goto unblock; +                        priv->up_children++;                          child->child_up = 1;                          child->xl = subvol; -                        child->table = inode_table_new (4096, subvol); +                        if (!child->table) +                                child->table = inode_table_new (4096, subvol); -                        priv->up_children++; - -                        list_add_tail (&child->list, &priv->bricks); +                        _br_qchild_event (this, child, br_brick_connect);                          pthread_cond_signal (&priv->cond);                  }          unblock: @@ -1405,6 +1457,7 @@ notify (xlator_t *this, int32_t event, void *data, ...)                  if (priv->up_children == 0)                          default_notify (this, event, data);                  break; +          default:                  default_notify (this, event, data);          } @@ -1558,59 +1611,94 @@ br_signer_init (xlator_t *this, br_private_t *priv)  } -int32_t -init (xlator_t *this) +static void +br_free_children (xlator_t *this, br_private_t *priv, int count)  { -        int            i    = 0; -        int32_t        ret  = -1; -        br_private_t  *priv = NULL; -        xlator_list_t *trav = NULL; +        br_child_t *child = NULL; -        if (!this->children) { -                gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_NO_CHILD, -                        "FATAL: no children"); -                goto out; +        for (--count; count >= 0; count--) { +                child = &priv->children[count]; +                mem_pool_destroy (child->timer_pool); +                LOCK_DESTROY (&child->lock);          } -        priv = GF_CALLOC (1, sizeof (*priv), gf_br_mt_br_private_t); -        if (!priv) { -                gf_msg (this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY, -                        "failed to allocate memory (->priv)"); -                goto out; -        } +        GF_FREE (priv->children); +        priv->children = NULL; +} -        GF_OPTION_INIT ("scrubber", priv->iamscrubber, bool, out); +static int +br_init_children (xlator_t *this, br_private_t *priv) +{ +        int i = 0; +        br_child_t *child = NULL; +        xlator_list_t *trav = NULL;          priv->child_count = xlator_subvolume_count (this);          priv->children = GF_CALLOC (priv->child_count, sizeof (*priv->children),                                      gf_br_mt_br_child_t);          if (!priv->children) -                goto free_priv; +                goto err;          trav = this->children;          while (trav) { -                priv->children[i].this = this; -                priv->children[i].xl = trav->xlator; - -                priv->children[i].timer_pool = -                                  mem_pool_new (struct gf_tw_timer_list,  4096); -                if (!priv->children[i].timer_pool) { -                        gf_msg (this->name, GF_LOG_ERROR, ENOMEM, -                                BRB_MSG_NO_MEMORY, "failed to allocate mem-pool" -                                " for timer"); +                child = &priv->children[i]; + +                LOCK_INIT (&child->lock); +                br_set_child_state (child, BR_CHILD_STATE_DISCONNECTED); + +                child->this = this; +                child->xl = trav->xlator; + +                child->timer_pool = mem_pool_new +                                    (struct gf_tw_timer_list,  4096); +                if (!child->timer_pool) { +                        gf_log (this->name, GF_LOG_ERROR, +                                "failed to allocate mem-pool for timer");                          errno = ENOMEM; -                        goto free_children; +                        goto freechild;                  } +                INIT_LIST_HEAD (&child->list); +                  i++;                  trav = trav->next;          } +        return 0; + + freechild: +        br_free_children (this, priv, i); + err: +        return -1; +} + +int32_t +init (xlator_t *this) +{ +        int32_t       ret  = -1; +	br_private_t *priv = NULL; + +	if (!this->children) { +		gf_log (this->name, GF_LOG_ERROR, "FATAL: no children"); +		goto out; +	} + +        priv = GF_CALLOC (1, sizeof (*priv), gf_br_mt_br_private_t); +        if (!priv) { +                gf_log (this->name, GF_LOG_ERROR, +                        "failed to allocate memory (->priv)"); +                goto out; +        } + +        GF_OPTION_INIT ("scrubber", priv->iamscrubber, bool, out); + +        ret = br_init_children (this, priv); +        if (ret) +                goto free_priv; +          pthread_mutex_init (&priv->lock, NULL);          pthread_cond_init (&priv->cond, NULL); -        for (i = 0; i < priv->child_count; i++) -                INIT_LIST_HEAD (&priv->children[i].list);          INIT_LIST_HEAD (&priv->bricks);          INIT_LIST_HEAD (&priv->signing); @@ -1619,7 +1707,7 @@ init (xlator_t *this)                  gf_msg (this->name, GF_LOG_ERROR, 0,                          BRB_MSG_TIMER_WHEEL_UNAVAILABLE,                          "global timer wheel unavailable"); -                goto cleanup_mutex; +                goto cleanup;          }  	this->private = priv; @@ -1635,7 +1723,7 @@ init (xlator_t *this)          }          if (ret) -                goto cleanup_mutex; +                goto cleanup;          ret = gf_thread_create (&priv->thread, NULL, br_handle_events, this);          if (ret != 0) { @@ -1651,16 +1739,12 @@ init (xlator_t *this)                  return 0;          } - cleanup_mutex: + cleanup:          (void) pthread_cond_destroy (&priv->cond);          (void) pthread_mutex_destroy (&priv->lock); - free_children: -        for (i = 0; i < priv->child_count; i++) { -                if (priv->children[i].timer_pool) -                        mem_pool_destroy (priv->children[i].timer_pool); -        } -        GF_FREE (priv->children); +        br_free_children (this, priv, priv->child_count); +   free_priv:          GF_FREE (priv);   out: @@ -1678,7 +1762,7 @@ fini (xlator_t *this)          if (!priv->iamscrubber)                  br_fini_signer (this, priv); -        br_free_children (this); +        br_free_children (this, priv, priv->child_count);          this->private = NULL;  	GF_FREE (priv); @@ -1686,64 +1770,96 @@ fini (xlator_t *this)  	return;  } -int -reconfigure (xlator_t *this, dict_t *options) +static void +br_reconfigure_child (xlator_t *this, +                      br_child_t *child, struct br_scrubber *fsscrub)  { -        int i = 0; -        int32_t ret = -1; -        br_child_t *child = NULL; -        br_private_t *priv = NULL; -        struct br_scanfs *fsscan = NULL; +        int32_t ret = 0; +        struct br_scanfs *fsscan = &child->fsscan; + +        ret = br_fsscan_reschedule (this, child, fsscan, fsscrub, _gf_true); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, +                        "Could not reschedule scrubber for brick: %s. " +                        "Scubbing will continue according to old frequency.", +                        child->brick_path); +        } +} + +static int +br_reconfigure_scrubber (xlator_t *this, dict_t *options) +{ +        int                 i       = 0; +        int32_t             ret     = -1; +        br_child_t         *child   = NULL; +        br_private_t       *priv    = NULL;          struct br_scrubber *fsscrub = NULL;          priv = this->private; +        fsscrub = &priv->fsscrub; -        if (!priv->iamscrubber) { -                ret = br_signer_handle_options (this, priv, options); -                if (ret) -                        goto err; -                return 0; +        pthread_mutex_lock (&priv->lock); +        { +                ret = br_scrubber_handle_options (this, priv, options);          } +        pthread_mutex_unlock (&priv->lock); -        ret = br_scrubber_handle_options (this, priv, options);          if (ret)                  goto err; -        fsscrub = &priv->fsscrub; -          /* reschedule all _up_ subvolume(s) */ -        pthread_mutex_lock (&priv->lock); -        { -                for (; i < priv->child_count; i++) { -                        child = &priv->children[i]; -                        if (!child->child_up) { -                                gf_msg (this->name, GF_LOG_INFO, 0, -                                        BRB_MSG_BRICK_INFO, "Brick %s is " -                                        "offline, skipping rescheduling (scrub" -                                        " would auto- schedule when brick is " -                                        "back online).", child->brick_path); -                                continue; -                        } +        for (; i < priv->child_count; i++) { +                child = &priv->children[i]; -                        fsscan = &child->fsscan; -                        ret = br_fsscan_reschedule (this, child, -                                                    fsscan, fsscrub, _gf_true); -                        if (ret) { -                                gf_msg (this->name, GF_LOG_ERROR, 0, -                                        BRB_MSG_RESCHEDULE_SCRUBBER_FAILED, -                                        "Could not reschedule scrubber for " -                                        "brick: %s. Scubbing will continue " -                                        "according to old frequency.", -                                        child->brick_path); +                LOCK (&child->lock); +                { +                        if (_br_child_failed_conn (child)) { +                                gf_log (this->name, GF_LOG_INFO, +                                        "Scrubber for brick [%s] failed " +                                        "initialization, rescheduling is " +                                        "skipped", child->brick_path); +                                goto unblock;                          } + +                        if (_br_is_child_connected (child)) +                                br_reconfigure_child (this, child, fsscrub); + +                        /** +                         * for the rest.. either the child is in initialization +                         * phase or is disconnected. either way, updated values +                         * would be reflected on successful connection. +                         */                  } +        unblock: +                UNLOCK (&child->lock);          } -        pthread_mutex_unlock (&priv->lock); - -        return 0;   err: -        return -1; +        return ret; +} + +static int +br_reconfigure_signer (xlator_t *this, dict_t *options) +{ +        br_private_t *priv = this->private; + +        return br_signer_handle_options (this, priv, options); +} + +int +reconfigure (xlator_t *this, dict_t *options) +{ +        int ret = 0; +        br_private_t *priv = NULL; + +        priv = this->private; + +        if (priv->iamscrubber) +                ret = br_reconfigure_scrubber (this, options); +        else +                ret = br_reconfigure_signer (this, options); + +        return ret;  }  struct xlator_fops fops; diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h index 7be4398d00d..d4742f4fea4 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot.h @@ -71,7 +71,18 @@ struct br_scanfs {          struct gf_tw_timer_list *timer;  }; +/* just need three states to track child status */ +typedef enum br_child_state { +        BR_CHILD_STATE_CONNECTED = 1, +        BR_CHILD_STATE_INITIALIZING, +        BR_CHILD_STATE_CONNFAILED, +        BR_CHILD_STATE_DISCONNECTED, +} br_child_state_t; +  struct br_child { +        gf_lock_t lock; +        br_child_state_t c_state; +          char child_up;                /* Indicates whether this child is                                           up or not */          xlator_t *xl;                 /* client xlator corresponding to @@ -135,8 +146,8 @@ typedef struct br_obj_n_workers br_obj_n_workers_t;  struct br_private {          pthread_mutex_t lock; -        struct list_head bricks;          /* list of bricks from which CHILD_UP -                                             has been received */ +        struct list_head bricks;          /* list of bricks from which enents +                                             have been received */          struct list_head signing; @@ -202,5 +213,22 @@ br_prepare_loc (xlator_t *, br_child_t *, loc_t *, gf_dirent_t *, loc_t *);  gf_boolean_t  bitd_is_bad_file (xlator_t *, br_child_t *, loc_t *, fd_t *); +static inline void +_br_set_child_state (br_child_t *child, br_child_state_t state) +{ +        child->c_state = state; +} + +static inline int +_br_is_child_connected (br_child_t *child) +{ +        return (child->c_state == BR_CHILD_STATE_CONNECTED); +} + +static inline int +_br_child_failed_conn (br_child_t *child) +{ +        return (child->c_state == BR_CHILD_STATE_CONNFAILED); +}  #endif /* __BIT_ROT_H__ */ diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h index fbb69ce7ea8..f70fafbca49 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h @@ -31,6 +31,7 @@ enum br_mem_types {          gf_br_stub_mt_br_stub_fd_t,          gf_br_stub_mt_br_scanner_freq_t,          gf_br_stub_mt_sigstub_t, +        gf_br_mt_br_child_event_t,          gf_br_stub_mt_end,  };  | 
