diff options
Diffstat (limited to 'xlators')
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 31 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-shared.c | 26 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/tier.c | 554 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/tier.h | 6 | ||||
| -rw-r--r-- | xlators/features/changetimerecorder/src/changetimerecorder.c | 146 | ||||
| -rw-r--r-- | xlators/features/changetimerecorder/src/ctr-helper.h | 4 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-messages.h | 1 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 59 | 
8 files changed, 759 insertions, 68 deletions
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 3717a68273c..da1bcb6a4a1 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -310,7 +310,7 @@ typedef struct dht_local dht_local_t;  /* du - disk-usage */  struct dht_du {          double   avail_percent; -	double   avail_inodes; +        double   avail_inodes;          uint64_t avail_space;          uint32_t log;          uint32_t chunks; @@ -325,10 +325,10 @@ enum gf_defrag_type {          GF_DEFRAG_CMD_START_FORCE = 1 + 4,          GF_DEFRAG_CMD_START_TIER = 1 + 5,          GF_DEFRAG_CMD_STATUS_TIER = 1 + 6, -	GF_DEFRAG_CMD_START_DETACH_TIER = 1 + 7, -	GF_DEFRAG_CMD_STOP_DETACH_TIER = 1 + 8, -	GF_DEFRAG_CMD_PAUSE_TIER = 1 + 9, -	GF_DEFRAG_CMD_RESUME_TIER = 1 + 10, +        GF_DEFRAG_CMD_START_DETACH_TIER = 1 + 7, +        GF_DEFRAG_CMD_STOP_DETACH_TIER = 1 + 8, +        GF_DEFRAG_CMD_PAUSE_TIER = 1 + 9, +        GF_DEFRAG_CMD_RESUME_TIER = 1 + 10,  };  typedef enum gf_defrag_type gf_defrag_type; @@ -398,9 +398,26 @@ typedef struct gf_tier_conf {          uint64_t                     max_migrate_bytes;          int                          max_migrate_files;          tier_mode_t                  mode; +        /* These flags are only used for tier-compact */ +        gf_boolean_t                 compact_active; +        /* These 3 flags are set to true when the client changes the */ +        /* compaction mode on the command line. */ +        /* When they are set, the daemon will trigger compaction as */ +        /* soon as possible to activate or deactivate compaction. */ +        /* If in the middle of a compaction, then the switches take */ +        /* effect on the next compaction, not the current one. */ +        /* If the user switches it off, we want to avoid needless */ +        /* compactions. */ +        /* If the user switches it on, they want to compact as soon */ +        /* as possible. */ +        gf_boolean_t                 compact_mode_switched; +        gf_boolean_t                 compact_mode_switched_hot; +        gf_boolean_t                 compact_mode_switched_cold;          int                          tier_max_promote_size;          int                          tier_promote_frequency;          int                          tier_demote_frequency; +        int                          tier_compact_hot_frequency; +        int                          tier_compact_cold_frequency;          uint64_t                     st_last_promoted_size;          uint64_t                     st_last_demoted_size;          tier_pause_state_t           pause_state; @@ -1023,9 +1040,9 @@ int32_t dht_setattr (call_frame_t  *frame, xlator_t *this, loc_t *loc,  int32_t dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,                        struct iatt  *stbuf, int32_t valid, dict_t *xdata);  int32_t dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, -		      int32_t mode, off_t offset, size_t len, dict_t *xdata); +                      int32_t mode, off_t offset, size_t len, dict_t *xdata);  int32_t dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, -		    off_t offset, size_t len, dict_t *xdata); +                    off_t offset, size_t len, dict_t *xdata);  int32_t dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd,                      off_t offset, off_t len, dict_t *xdata);  int32_t dht_ipc (call_frame_t *frame, xlator_t *this, int32_t op, diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c index 873ced53eec..f410f71b5a6 100644 --- a/xlators/cluster/dht/src/dht-shared.c +++ b/xlators/cluster/dht/src/dht-shared.c @@ -148,7 +148,7 @@ dht_priv_dump (xlator_t *this)          gf_proc_dump_write("search_unhashed", "%d", conf->search_unhashed);          gf_proc_dump_write("gen", "%d", conf->gen);          gf_proc_dump_write("min_free_disk", "%lf", conf->min_free_disk); -	gf_proc_dump_write("min_free_inodes", "%lf", conf->min_free_inodes); +        gf_proc_dump_write("min_free_inodes", "%lf", conf->min_free_inodes);          gf_proc_dump_write("disk_unit", "%c", conf->disk_unit);          gf_proc_dump_write("refresh_interval", "%d", conf->refresh_interval);          gf_proc_dump_write("unhashed_sticky_bit", "%d", conf->unhashed_sticky_bit); @@ -433,14 +433,14 @@ dht_reconfigure (xlator_t *this, dict_t *options)          GF_OPTION_RECONF ("lookup-optimize", conf->lookup_optimize, options,                            bool, out); -	GF_OPTION_RECONF ("min-free-disk", conf->min_free_disk, options, +        GF_OPTION_RECONF ("min-free-disk", conf->min_free_disk, options,                            percent_or_size, out);          /* option can be any one of percent or bytes */          conf->disk_unit = 0;          if (conf->min_free_disk < 100.0)                  conf->disk_unit = 'p'; -	GF_OPTION_RECONF ("min-free-inodes", conf->min_free_inodes, options, +        GF_OPTION_RECONF ("min-free-inodes", conf->min_free_inodes, options,                            percent, out);          GF_OPTION_RECONF ("directory-layout-spread", conf->dir_spread_cnt, @@ -711,8 +711,8 @@ dht_init (xlator_t *this)          GF_OPTION_INIT ("use-readdirp", conf->use_readdirp, bool, err); -	GF_OPTION_INIT ("min-free-disk", conf->min_free_disk, percent_or_size, -			err); +        GF_OPTION_INIT ("min-free-disk", conf->min_free_disk, percent_or_size, +                        err);          GF_OPTION_INIT ("min-free-inodes", conf->min_free_inodes, percent,                          err); @@ -901,7 +901,7 @@ struct volume_options options[] = {            "process starts balancing out the cluster, and logs will appear "            "in log files",          }, -	{ .key  = {"min-free-inodes"}, +        { .key  = {"min-free-inodes"},            .type = GF_OPTION_TYPE_PERCENT,            .default_value = "5%",            .description = "after system has only N% of inodes, warnings " @@ -1038,6 +1038,20 @@ struct volume_options options[] = {            .type = GF_OPTION_TYPE_STR,            .default_value = "test",          }, +        { .key         = {"tier-compact"}, +          .type = GF_OPTION_TYPE_BOOL, +          .default_value = "off", +        }, +        { .key         = {"tier-hot-compact-frequency"}, +          .type = GF_OPTION_TYPE_INT, +          .default_value = "604800", +          .description = "Frequency to compact DBs on hot tier in system" +        }, +        { .key         = {"tier-cold-compact-frequency"}, +          .type = GF_OPTION_TYPE_INT, +          .default_value = "604800", +          .description = "Frequency to compact DBs on cold tier in system" +        },          { .key         = {"tier-max-mb"},            .type = GF_OPTION_TYPE_INT,            .default_value = "4000", diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c index 0d53a62d327..7e5e1004b84 100644 --- a/xlators/cluster/dht/src/tier.c +++ b/xlators/cluster/dht/src/tier.c @@ -1240,15 +1240,15 @@ tier_process_ctr_query (tier_brick_list_t *local_brick, void *args)                  gfdb_brick_info->time_stamp,                  sizeof (gfdb_time_t)); -        SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_in_dict, -                             GFDB_IPC_CTR_KEY, GFDB_IPC_CTR_QUERY_OPS, -                             ret, out); +        SET_DB_PARAM_TO_DICT (this->name, ctr_ipc_in_dict, +                              GFDB_IPC_CTR_KEY, GFDB_IPC_CTR_QUERY_OPS, +                              ret, out); -        SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_in_dict, -                             GFDB_IPC_CTR_GET_QFILE_PATH, -                             local_brick->qfile_path, -                             ret, out); +        SET_DB_PARAM_TO_DICT (this->name, ctr_ipc_in_dict, +                              GFDB_IPC_CTR_GET_QFILE_PATH, +                              local_brick->qfile_path, +                              ret, out);          ret = dict_set_bin (ctr_ipc_in_dict, GFDB_IPC_CTR_GET_QUERY_PARAMS,                                  ipc_ctr_params, sizeof (*ipc_ctr_params)); @@ -1360,7 +1360,7 @@ tier_process_brick (tier_brick_list_t *local_brick, void *args) {                          gf_msg ("tier", GF_LOG_ERROR, 0,                                  LG_MSG_SET_PARAM_FAILED, "Failed to set %s "                                  "to params dictionary", -                                GFDB_IPC_CTR_GET_DB_KEY);\ +                                GFDB_IPC_CTR_GET_DB_KEY);                          goto out;                  } @@ -1442,11 +1442,12 @@ tier_build_migration_qfile (migration_args_t *args,          }          time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec; -        /* The migration daemon may run a varrying numberof usec after the sleep */ -        /* call triggers. A file may be registered in CTR some number of usec X */ -        /* after the daemon started and missed in the subsequent cycle if the */ -        /* daemon starts Y usec after the period in seconds where Y>X. Normalize */ -        /* away this problem by always setting usec to 0. */ +        /* The migration daemon may run a varying numberof usec after the */ +        /* sleep call triggers. A file may be registered in CTR some number */ +        /* of usec X after the daemon started and missed in the subsequent */ +        /* cycle if the daemon starts Y usec after the period in seconds */ +        /* where Y>X. Normalize away this problem by always setting usec */ +        /* to 0. */          time_in_past.tv_usec = 0;          gfdb_brick_info.time_stamp = &time_in_past; @@ -1649,6 +1650,265 @@ out:          return ret;  } + +/* + * Command the CTR on a brick to compact the local database using an IPC + */ +static int +tier_process_self_compact (tier_brick_list_t *local_brick, void *args) +{ +        int ret                                         = -1; +        char *db_path                                   = NULL; +        query_cbk_args_t *query_cbk_args                = NULL; +        xlator_t *this                                  = NULL; +        gfdb_conn_node_t *conn_node                     = NULL; +        dict_t *params_dict                             = NULL; +        dict_t *ctr_ipc_dict                            = NULL; +        gfdb_brick_info_t *gfdb_brick_info              = args; +        int is_changing                                 = -1; + +        /*Init of all the essentials*/ +        GF_VALIDATE_OR_GOTO ("tier", gfdb_brick_info , out); +        query_cbk_args = gfdb_brick_info->_query_cbk_args; + +        GF_VALIDATE_OR_GOTO ("tier", query_cbk_args->this, out); +        this = query_cbk_args->this; + +        GF_VALIDATE_OR_GOTO (this->name, +                             gfdb_brick_info->_query_cbk_args, out); + +        GF_VALIDATE_OR_GOTO (this->name, local_brick, out); + +        GF_VALIDATE_OR_GOTO (this->name, local_brick->xlator, out); + +        GF_VALIDATE_OR_GOTO (this->name, local_brick->brick_db_path, out); + +        db_path = local_brick->brick_db_path; + +        /*Preparing DB parameters before init_db i.e getting db connection*/ +        params_dict = dict_new (); +        if (!params_dict) { +                gf_msg (this->name, GF_LOG_ERROR, 0, +                        DHT_MSG_LOG_TIER_ERROR, +                        "DB Params cannot initialized"); +                goto out; +        } +        SET_DB_PARAM_TO_DICT (this->name, params_dict, +                             (char *) gfdb_methods.get_db_path_key(), db_path, +                             ret, out); + +        /*Get the db connection*/ +        conn_node = gfdb_methods.init_db ((void *)params_dict, +                                          dht_tier_db_type); +        if (!conn_node) { +                gf_msg (this->name, GF_LOG_ERROR, 0, +                        DHT_MSG_LOG_TIER_ERROR, +                        "FATAL: Failed initializing db operations"); +                         goto out; +        } + +        ret = 0; + +        /*Preparing ctr_ipc_dict*/ +        ctr_ipc_dict = dict_new (); +        if (!ctr_ipc_dict) { +                gf_msg (this->name, GF_LOG_ERROR, 0, +                        DHT_MSG_LOG_TIER_ERROR, +                        "ctr_ipc_dict cannot initialized"); +                goto out; +        } + +        ret = dict_set_int32 (ctr_ipc_dict, "compact_active", +                              query_cbk_args->defrag-> +                              tier_conf.compact_active); + +        if (ret) { +                gf_msg ("tier", GF_LOG_ERROR, 0, +                        LG_MSG_SET_PARAM_FAILED, "Failed to set %s " +                        "to params dictionary", +                        "compact_active"); +                goto out; +        } + +        ret = dict_set_int32 (ctr_ipc_dict, "compact_mode_switched", +                              query_cbk_args->defrag-> +                              tier_conf.compact_mode_switched); + +        if (ret) { +                gf_msg ("tier", GF_LOG_ERROR, 0, +                        LG_MSG_SET_PARAM_FAILED, "Failed to set %s " +                        "to params dictionary", +                        "compact_mode_switched"); +                goto out; +        } + +        SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_dict, +                             GFDB_IPC_CTR_KEY, GFDB_IPC_CTR_SET_COMPACT_PRAGMA, +                             ret, out); + +        gf_msg (this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, +                "Starting Compaction IPC"); + +        ret = syncop_ipc (local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_dict, +                          NULL); + +        gf_msg (this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, +                "Ending Compaction IPC"); + +        if (ret) { +                gf_msg (this->name, GF_LOG_ERROR, 0, +                        DHT_MSG_LOG_TIER_ERROR, "Failed compaction " +                        "on db %s error %d", local_brick->brick_db_path, ret); +                goto out; +        } + +        gf_msg (this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, +                "SUCCESS: %s Compaction", local_brick->brick_name); + +        ret = 0; +out: +        if (params_dict) { +                dict_unref (params_dict); +                params_dict = NULL; +        } + +        if (ctr_ipc_dict) { +                dict_unref (ctr_ipc_dict); +                ctr_ipc_dict = NULL; +        } + +        gfdb_methods.fini_db (conn_node); + +        return ret; +} + +/* + * This is the call back function for each brick from hot/cold bricklist. + * It determines the database type on each brick and calls the corresponding + * function to prepare the compaction IPC. + */ +static int +tier_compact_db_brick (tier_brick_list_t *local_brick, void *args) +{ +        int ret = -1; +        char *strval = NULL; + +        GF_VALIDATE_OR_GOTO ("tier", local_brick, out); + +        GF_VALIDATE_OR_GOTO ("tier", local_brick->xlator, out); + +        ret = tier_process_self_compact (local_brick, args); +        if (ret) { +                gf_msg ("tier", GF_LOG_INFO, 0, +                        DHT_MSG_LOG_TIER_STATUS, +                        "Brick %s did not compact", +                        local_brick->brick_name); +                goto out; +        } + +        ret = 0; + +out: + +        return ret; +} + +static int +tier_send_compact (migration_args_t *args, +                   query_cbk_args_t *query_cbk_args) +{ +        gfdb_time_t               current_time; +        gfdb_brick_info_t         gfdb_brick_info; +        gfdb_time_t               time_in_past; +        int                       ret = -1; +        tier_brick_list_t         *local_brick = NULL; + +        time_in_past.tv_sec = args->freq_time; +        time_in_past.tv_usec = 0; + +        ret = gettimeofday (¤t_time, NULL); +        if (ret == -1) { +                gf_msg (args->this->name, GF_LOG_ERROR, errno, +                        DHT_MSG_SYS_CALL_GET_TIME_FAILED, +                        "Failed to get current time"); +                goto out; +        } +        time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec; + +        /* The migration daemon may run a varying numberof usec after the sleep +           call triggers. A file may be registered in CTR some number of usec X +           after the daemon started and missed in the subsequent cycle if the +           daemon starts Y usec after the period in seconds where Y>X. Normalize +           away this problem by always setting usec to 0. */ +        time_in_past.tv_usec = 0; + +        gfdb_brick_info.time_stamp = &time_in_past; + +        /* This is meant to say we are always compacting at this point */ +        /* We simply borrow the promotion flag to do this */ +        gfdb_brick_info._gfdb_promote = 1; + +        gfdb_brick_info._query_cbk_args = query_cbk_args; + +        list_for_each_entry (local_brick, args->brick_list, list) { + +                gf_msg (args->this->name, GF_LOG_TRACE, 0, +                        DHT_MSG_LOG_TIER_STATUS, +                        "Start compaction for %s", +                        local_brick->brick_name); + +                ret = tier_compact_db_brick (local_brick, +                                             &gfdb_brick_info); +                if (ret) { +                        gf_msg (args->this->name, GF_LOG_ERROR, 0, +                                DHT_MSG_BRICK_QUERY_FAILED, +                                "Brick %s compaction failed\n", +                                local_brick->brick_db_path); +                } + +                gf_msg (args->this->name, GF_LOG_TRACE, 0, +                        DHT_MSG_LOG_TIER_STATUS, +                        "End compaction for %s", +                        local_brick->brick_name); + +        } +        ret = 0; +out: +        return ret; +} + +static int +tier_compact (void *args) +{ +        int ret = -1; +        query_cbk_args_t query_cbk_args; +        migration_args_t *compaction_args = args; + +        GF_VALIDATE_OR_GOTO ("tier", compaction_args->this, out); +        GF_VALIDATE_OR_GOTO (compaction_args->this->name, +                             compaction_args->brick_list, out); +        GF_VALIDATE_OR_GOTO (compaction_args->this->name, +                             compaction_args->defrag, out); + +        THIS = compaction_args->this; + +        query_cbk_args.this = compaction_args->this; +        query_cbk_args.defrag = compaction_args->defrag; +        query_cbk_args.is_compaction = 1; + +        /* Send the compaction pragma out to all the bricks on the bricklist. */ +        /* tier_get_bricklist ensures all bricks on the list are local to */ +        /* this node. */ +        ret = tier_send_compact (compaction_args, &query_cbk_args); +        if (ret) +                goto out; + +        ret = 0; +out: +        compaction_args->return_value = ret; +        return ret; + } +  static int  tier_get_bricklist (xlator_t *xl, struct list_head *local_bricklist_head)  { @@ -1755,6 +2015,18 @@ tier_get_freq_promote (gf_tier_conf_t *tier_conf)          return tier_conf->tier_promote_frequency;  } +int +tier_get_freq_compact_hot (gf_tier_conf_t *tier_conf) +{ +        return tier_conf->tier_compact_hot_frequency; +} + +int +tier_get_freq_compact_cold (gf_tier_conf_t *tier_conf) +{ +        return tier_conf->tier_compact_cold_frequency; +} +  static int  tier_check_demote (gfdb_time_t  current_time, int freq)  { @@ -1776,8 +2048,21 @@ tier_check_promote (gf_tier_conf_t   *tier_conf,                          _gf_true : _gf_false;  } +static gf_boolean_t +tier_check_compact (gf_tier_conf_t   *tier_conf, +                    gfdb_time_t  current_time, +                    int freq_compact) +{ + +        if (!(tier_conf->compact_active || +              tier_conf->compact_mode_switched)) +                return _gf_false; +        return ((current_time.tv_sec % freq_compact) == 0) ? +                _gf_true : _gf_false; +} +  void  clear_bricklist (struct list_head *brick_list) @@ -1824,6 +2109,72 @@ out:          return;  } +static int +tier_prepare_compact (migration_args_t *args, gfdb_time_t current_time) +{ +        xlator_t *this = NULL; +        dht_conf_t *conf                        = NULL; +        gf_defrag_info_t *defrag                = NULL; +        gf_tier_conf_t *tier_conf               = NULL; +        gf_boolean_t is_hot_tier = _gf_false; +        int freq = 0; +        int ret = -1; +        const char *tier_type = is_hot_tier ? "hot" : "cold"; + +        this = args->this; + +        conf = this->private; + +        defrag = conf->defrag; + +        tier_conf = &defrag->tier_conf; + +        is_hot_tier = args->is_hot_tier; + +        freq = is_hot_tier ? tier_get_freq_compact_hot (tier_conf) : +                tier_get_freq_compact_cold (tier_conf); + +        defrag->tier_conf.compact_mode_switched = is_hot_tier ? +                defrag->tier_conf.compact_mode_switched_hot : +                defrag->tier_conf.compact_mode_switched_cold; + +        gf_msg(this->name, GF_LOG_TRACE, 0, +               DHT_MSG_LOG_TIER_STATUS, +               "Compact mode %i", +               defrag->tier_conf.compact_mode_switched); + +        if (tier_check_compact (tier_conf, current_time, +                                freq)) { +                gf_msg (this->name, GF_LOG_INFO, 0, +                        DHT_MSG_LOG_TIER_STATUS, +                        "Start compaction on %s tier", +                        tier_type); + +                args->freq_time = freq; +                ret = tier_compact (args); +                if (ret) { +                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                DHT_MSG_LOG_TIER_ERROR, "Compaction failed on " +                                "%s tier", tier_type); +                        goto out; +                } + +                gf_msg (this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, +                        "End compaction on %s tier", tier_type); + +                if (is_hot_tier) { +                        defrag->tier_conf.compact_mode_switched_hot = +                                _gf_false; +                } else { +                        defrag->tier_conf.compact_mode_switched_cold = +                                _gf_false; +                } +        } + +out: +        return ret; +} +  /*   * Main tiering loop. This is called from the promotion and the   * demotion threads spawned in tier_start(). @@ -1846,8 +2197,9 @@ static void          int check_watermark                     = 0;          gf_defrag_info_t *defrag                = NULL;          xlator_t  *this                         = NULL; +        struct list_head *bricklist_temp        = NULL;          migration_args_t *args = in_args; - +        gf_boolean_t compacted = _gf_false;          GF_VALIDATE_OR_GOTO ("tier", args, out);          GF_VALIDATE_OR_GOTO ("tier", args->brick_list, out); @@ -1884,7 +2236,8 @@ static void                  if (xlator != this) {                          gf_msg (this->name, GF_LOG_INFO, 0,                                  DHT_MSG_LOG_TIER_STATUS, -                                "Detected graph switch. Exiting migration daemon."); +                                "Detected graph switch. Exiting migration " +                                "daemon.");                          goto out;                  } @@ -1912,10 +2265,10 @@ static void                          goto out;                  } -                if (gf_defrag_get_pause_state (&defrag->tier_conf) != TIER_RUNNING) +                if (gf_defrag_get_pause_state (&defrag->tier_conf) != +                    TIER_RUNNING)                          continue; -                  /* To have proper synchronization amongst all                   * brick holding nodes, so that promotion and demotions                   * start atomicly w.r.t promotion/demotion frequency @@ -1950,7 +2303,6 @@ static void                  }                  if (args->is_promotion) { -                          freq = tier_get_freq_promote (tier_conf);                          if (tier_check_promote (tier_conf, current_time, freq)) { @@ -1962,21 +2314,22 @@ static void                                                  "Promotion failed");                                  }                          } - +                } else if (args->is_compaction) { +                        tier_prepare_compact (args, current_time);                  } else { -                          freq = tier_get_freq_demote (tier_conf);                          if (tier_check_demote (current_time, freq)) {                                  args->freq_time = freq;                                  ret = tier_demote (args);                                  if (ret) { -                                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                        gf_msg (this->name, +                                                GF_LOG_ERROR, +                                                0,                                                  DHT_MSG_LOG_TIER_ERROR,                                                  "Demotion failed");                                  }                          } -                  }                  /* Check the statfs immediately after the processing threads @@ -1997,11 +2350,15 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)  {          pthread_t promote_thread;          pthread_t demote_thread; +        pthread_t hot_compact_thread; +        pthread_t cold_compact_thread;          int ret = -1;          struct list_head bricklist_hot          = { 0 };          struct list_head bricklist_cold         = { 0 };          migration_args_t promotion_args         = { 0 };          migration_args_t demotion_args          = { 0 }; +        migration_args_t hot_compaction_args    = { 0 }; +        migration_args_t cold_compaction_args   = { 0 };          dht_conf_t *conf                        = NULL;          INIT_LIST_HEAD ((&bricklist_hot)); @@ -2016,6 +2373,7 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)          demotion_args.brick_list = &bricklist_hot;          demotion_args.defrag = defrag;          demotion_args.is_promotion = _gf_false; +        demotion_args.is_compaction = _gf_false;          ret = pthread_create (&demote_thread,                                NULL, &tier_run, @@ -2047,6 +2405,47 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)                  goto waitforspawned;          } +        hot_compaction_args.this = this; +        hot_compaction_args.brick_list = &bricklist_hot; +        hot_compaction_args.defrag = defrag; +        hot_compaction_args.is_promotion = _gf_false; +        hot_compaction_args.is_compaction = _gf_true; +        hot_compaction_args.is_hot_tier = _gf_true; + +        ret = pthread_create (&hot_compact_thread, +                              NULL, &tier_run, +                              &hot_compaction_args); +        if (ret) { +                gf_msg (this->name, GF_LOG_ERROR, 0, +                        DHT_MSG_LOG_TIER_ERROR, +                        "Failed to start compaction thread."); +                defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; +                goto waitforspawnedpromote; +        } + +        cold_compaction_args.this = this; +        cold_compaction_args.brick_list = &bricklist_cold; +        cold_compaction_args.defrag = defrag; +        cold_compaction_args.is_promotion = _gf_false; +        cold_compaction_args.is_compaction = _gf_true; +        cold_compaction_args.is_hot_tier = _gf_false; + +        ret = pthread_create (&cold_compact_thread, +                              NULL, &tier_run, +                              &cold_compaction_args); +        if (ret) { +                gf_msg (this->name, GF_LOG_ERROR, 0, +                        DHT_MSG_LOG_TIER_ERROR, +                        "Failed to start compaction thread."); +                defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; +                goto waitforspawnedhotcompact; +        } +        pthread_join (cold_compact_thread, NULL); + +waitforspawnedhotcompact: +        pthread_join (hot_compact_thread, NULL); + +waitforspawnedpromote:          pthread_join (promote_thread, NULL);  waitforspawned: @@ -2055,7 +2454,6 @@ waitforspawned:  cleanup:          clear_bricklist (&bricklist_cold);          clear_bricklist (&bricklist_hot); -          return ret;  } @@ -2167,8 +2565,8 @@ out:          return ret;  } -static -int tier_validate_mode (char *mode) +static int +tier_validate_mode (char *mode)  {          int ret = -1; @@ -2181,6 +2579,26 @@ int tier_validate_mode (char *mode)          return ret;  } +static gf_boolean_t +tier_validate_compact_mode (char *mode) +{ +        gf_boolean_t ret = _gf_false; + +        gf_msg ("tier", GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, +                "tier_validate_compact_mode: mode = %s", mode); + +        if (!strcmp (mode, "on")) { +                ret = _gf_true; +        } else { +                ret = _gf_false; +        } + +        gf_msg ("tier", GF_LOG_ERROR, 0, +                DHT_MSG_LOG_TIER_STATUS, +                "tier_validate_compact_mode: ret = %i", ret); + +        return ret; +}  int  tier_init_methods (xlator_t *this) @@ -2205,8 +2623,6 @@ err:          return ret;  } - -  int  tier_init (xlator_t *this)  { @@ -2291,6 +2707,22 @@ tier_init (xlator_t *this)          defrag->tier_conf.tier_demote_frequency = freq;          ret = dict_get_int32 (this->options, +                              "tier-hot-compact-frequency", &freq); +        if (ret) { +                freq = DEFAULT_HOT_COMPACT_FREQ_SEC; +        } + +        defrag->tier_conf.tier_compact_hot_frequency = freq; + +        ret = dict_get_int32 (this->options, +                              "tier-cold-compact-frequency", &freq); +        if (ret) { +                freq = DEFAULT_COLD_COMPACT_FREQ_SEC; +        } + +        defrag->tier_conf.tier_compact_cold_frequency = freq; + +        ret = dict_get_int32 (this->options,                                "watermark-hi", &freq);          if (ret) {                  freq = DEFAULT_WM_HI; @@ -2339,6 +2771,29 @@ tier_init (xlator_t *this)          defrag->tier_conf.max_migrate_files = freq;          ret = dict_get_str (this->options, +                            "tier-compact", &mode); + +        if (ret) { +                defrag->tier_conf.compact_active = DEFAULT_COMP_MODE; +        } else { +                ret = tier_validate_compact_mode (mode); +                if (ret < 0) { +                        gf_msg(this->name, GF_LOG_ERROR, 0, +                               DHT_MSG_LOG_TIER_ERROR, +                               "tier_init failed - invalid compaction mode"); +                        goto out; +                } + +                /* If compaction is now active, we need to inform the bricks on +                   the hot and cold tier of this. See dht-common.h for more. */ +                defrag->tier_conf.compact_active = ret; +                if (ret) { +                        defrag->tier_conf.compact_mode_switched_hot = _gf_true; +                        defrag->tier_conf.compact_mode_switched_cold = _gf_true; +                } +        } + +        ret = dict_get_str (this->options,                              "tier-mode", &mode);          if (ret) {                  defrag->tier_conf.mode = DEFAULT_TIER_MODE; @@ -2361,7 +2816,8 @@ tier_init (xlator_t *this)                                "tier-pause", &paused);          if (paused && strcmp (paused, "on") == 0) -                gf_defrag_set_pause_state (&defrag->tier_conf, TIER_REQUEST_PAUSE); +                gf_defrag_set_pause_state (&defrag->tier_conf, +                                           TIER_REQUEST_PAUSE);          ret = gf_asprintf(&voldir, "%s/%s",                            DEFAULT_VAR_RUN_DIRECTORY, @@ -2411,7 +2867,6 @@ out:          return ret;  } -  int  tier_cli_pause_done (int op_ret, call_frame_t *sync_frame, void *data)  { @@ -2445,17 +2900,17 @@ exit:          return ret;  } -  int  tier_reconfigure (xlator_t *this, dict_t *options)  { -        dht_conf_t       *conf           = NULL; -        gf_defrag_info_t *defrag         = NULL; -        char             *mode           = NULL; -        int               migrate_mb     = 0; -        gf_boolean_t      req_pause      = _gf_false; -        int               ret            = 0; -        call_frame_t            *frame  = NULL; +        dht_conf_t       *conf            = NULL; +        gf_defrag_info_t *defrag          = NULL; +        char             *mode            = NULL; +        int               migrate_mb      = 0; +        gf_boolean_t      req_pause       = _gf_false; +        int               ret             = 0; +        call_frame_t            *frame    = NULL; +        gf_boolean_t last_compact_setting = _gf_false;          conf = this->private; @@ -2489,6 +2944,28 @@ tier_reconfigure (xlator_t *this, dict_t *options)                                    defrag->tier_conf.watermark_low, options,                                    int32, out); +                last_compact_setting = defrag->tier_conf.compact_active; + +                GF_OPTION_RECONF ("tier-compact", +                                  defrag->tier_conf.compact_active, options, +                                  bool, out); + +                if (last_compact_setting != defrag->tier_conf.compact_active) { +                        defrag->tier_conf.compact_mode_switched_hot = _gf_true; +                        defrag->tier_conf.compact_mode_switched_cold = _gf_true; +                        gf_msg (this->name, GF_LOG_INFO, 0, +                                DHT_MSG_LOG_TIER_STATUS, +                                "compact mode switched"); +                } + +                GF_OPTION_RECONF ("tier-hot-compact-frequency", +                                  defrag->tier_conf.tier_compact_hot_frequency, +                                  options, int32, out); + +                GF_OPTION_RECONF ("tier-cold-compact-frequency", +                                  defrag->tier_conf.tier_compact_cold_frequency, +                                  options, int32, out); +                  GF_OPTION_RECONF ("tier-mode",                                    mode, options,                                    str, out); @@ -2558,7 +3035,6 @@ class_methods_t class_methods = {          .notify         = dht_notify  }; -  struct xlator_fops fops = {          .lookup      = dht_lookup, @@ -2611,9 +3087,7 @@ struct xlator_fops fops = {          .zerofill    = dht_zerofill,  }; -  struct xlator_cbks cbks = {          .release    = dht_release,          .forget     = dht_forget  }; - diff --git a/xlators/cluster/dht/src/tier.h b/xlators/cluster/dht/src/tier.h index 0807608fda2..ffb04173bd5 100644 --- a/xlators/cluster/dht/src/tier.h +++ b/xlators/cluster/dht/src/tier.h @@ -54,6 +54,7 @@ typedef struct _query_cbk_args {          /* This is write */          int                     query_fd;          int                     is_promotion; +        int                     is_compaction;          /* This is for read */          tier_qfile_array_t       *qfile_array;  } query_cbk_args_t; @@ -82,6 +83,8 @@ typedef struct _dm_thread_args {          int                     freq_time;          int                     return_value;          int                     is_promotion; +        int                     is_compaction; +        gf_boolean_t            is_hot_tier;  } migration_args_t;  typedef enum tier_watermark_op_ { @@ -93,12 +96,15 @@ typedef enum tier_watermark_op_ {  #define DEFAULT_PROMOTE_FREQ_SEC       120  #define DEFAULT_DEMOTE_FREQ_SEC        120 +#define DEFAULT_HOT_COMPACT_FREQ_SEC   604800 +#define DEFAULT_COLD_COMPACT_FREQ_SEC  604800  #define DEFAULT_DEMOTE_DEGRADED        10  #define DEFAULT_WRITE_FREQ_SEC         0  #define DEFAULT_READ_FREQ_SEC          0  #define DEFAULT_WM_LOW                 75  #define DEFAULT_WM_HI                  90  #define DEFAULT_TIER_MODE              TIER_MODE_TEST +#define DEFAULT_COMP_MODE              _gf_true  #define DEFAULT_TIER_MAX_MIGRATE_MB    1000  #define DEFAULT_TIER_MAX_MIGRATE_FILES 5000 diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.c b/xlators/features/changetimerecorder/src/changetimerecorder.c index 5f3a074acd5..933f496028c 100644 --- a/xlators/features/changetimerecorder/src/changetimerecorder.c +++ b/xlators/features/changetimerecorder/src/changetimerecorder.c @@ -15,6 +15,8 @@  #include "ctr-messages.h"  #include "syscall.h" +#include "changetimerecorder.h" +  /*******************************inode forget***********************************/  int @@ -1789,6 +1791,61 @@ out:          return ret;  } +void * +ctr_compact_thread (void *args) +{ +        int ret = -1; +        void *db_conn = NULL; + +        xlator_t *this = NULL; +        gf_ctr_private_t *priv = NULL; +        gf_boolean_t compact_active = _gf_false; +        gf_boolean_t compact_mode_switched = _gf_false; + +        this = (xlator_t *)args; + +        GF_VALIDATE_OR_GOTO("ctr", this, out); + +        priv = this->private; + +        db_conn = priv->_db_conn; +        compact_active = priv->compact_active; +        compact_mode_switched = priv->compact_mode_switched; + +        gf_msg ("ctr-compact", GF_LOG_INFO, 0, CTR_MSG_SET, +                "Starting compaction"); + +        ret = compact_db(db_conn, compact_active, +                         compact_mode_switched); + +        if (ret) { +                gf_msg ("ctr-compact", GF_LOG_ERROR, 0, CTR_MSG_SET, +                        "Failed to perform the compaction"); +        } + +        ret = pthread_mutex_lock (&priv->compact_lock); + +        if (ret) { +                gf_msg (this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, +                        "Failed to acquire lock"); +                goto out; +        } + +        /* We are done compaction on this brick. Set all flags to false */ +        priv->compact_active = _gf_false; +        priv->compact_mode_switched = _gf_false; + +        ret = pthread_mutex_unlock (&priv->compact_lock); + +        if (ret) { +                gf_msg (this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, +                        "Failed to release lock"); +                goto out; +        } + +out: +        return NULL; +}  int  ctr_ipc_helper (xlator_t *this, dict_t *in_dict, @@ -1802,7 +1859,8 @@ ctr_ipc_helper (xlator_t *this, dict_t *in_dict,          char *db_param = NULL;          char *query_file = NULL;          gfdb_ipc_ctr_params_t *ipc_ctr_params = NULL; - +        int result = 0; +        pthread_t compact_thread;          GF_VALIDATE_OR_GOTO ("ctr", this, out);          GF_VALIDATE_OR_GOTO (this->name, this->private, out); @@ -1888,12 +1946,78 @@ ctr_ipc_helper (xlator_t *this, dict_t *in_dict,                  SET_DB_PARAM_TO_DICT(this->name, out_dict,                                          db_param_key,                                          db_param, ret, error); +        } /* if its an attempt to compact the database */ +        else if (strncmp (ctr_ipc_ops, GFDB_IPC_CTR_SET_COMPACT_PRAGMA, +                          strlen (GFDB_IPC_CTR_SET_COMPACT_PRAGMA)) == 0) { + +                ret = pthread_mutex_lock (&priv->compact_lock); +                if (ret) { +                        gf_msg (this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, +                                "Failed to acquire lock for compaction"); +                        goto out; +                } + +                if ((priv->compact_active || priv->compact_mode_switched)) { +                        /* Compaction in progress. LEAVE */ +                        gf_msg (this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, +                               "Compaction already in progress."); +                        pthread_mutex_unlock (&priv->compact_lock); +                        goto out; +                } +                /* At this point, we should be the only one on the brick */ +                /* compacting */ + +                /* Grab the arguments from the dictionary */ +                ret = dict_get_int32 (in_dict, "compact_active", &result); +                if (ret) { +                        gf_msg (this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, +                               "Failed to get compaction type"); +                        goto out; +                } + +                if (result) { +                        priv->compact_active = _gf_true; +                } + +                ret = dict_get_int32 (in_dict, "compact_mode_switched" +                                     , &result); +                if (ret) { +                        gf_msg (this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, +                               "Failed to see if compaction switched"); +                        goto out; +                } + +                if (result) { +                        priv->compact_mode_switched = _gf_true; +                        gf_msg ("ctr-compact", GF_LOG_TRACE, 0, CTR_MSG_SET, +                                "Pre-thread: Compact mode switch is true"); +                } else { +                        gf_msg ("ctr-compact", GF_LOG_TRACE, 0, CTR_MSG_SET, +                                "Pre-thread: Compact mode switch is false"); +                } + +                ret = pthread_mutex_unlock (&priv->compact_lock); +                if (ret) { +                        gf_msg (this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, +                                "Failed to release lock for compaction"); +                        goto out; +                } + +                ret = pthread_create (&compact_thread, NULL, ctr_compact_thread, +                                      (void *)this); + +                if (ret) { +                        gf_msg (this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, +                                "Failed to spawn compaction thread"); +                        goto out; +                } + +                goto out;          } /* default case */          else {                  goto out;          } -          ret = 0;          goto out;  error: @@ -2079,6 +2203,18 @@ init (xlator_t *this)          priv->ctr_lookupheal_inode_timeout =                                  CTR_DEFAULT_INODE_EXP_PERIOD; +        /* For compaction */ +        priv->compact_active = _gf_false; +        priv->compact_mode_switched = _gf_false; +        ret_db = pthread_mutex_init (&priv->compact_lock, NULL); + +        if (ret_db) { +                gf_msg (this->name, GF_LOG_ERROR, 0, +                        CTR_MSG_FATAL_ERROR, +                        "FATAL: Failed initializing compaction mutex"); +                goto error; +        } +          /*Extract ctr xlator options*/          ret_db = extract_ctr_options (this, priv);          if (ret_db) { @@ -2123,6 +2259,7 @@ init (xlator_t *this)                          goto error;          } +          ret_db = 0;          goto out; @@ -2185,6 +2322,11 @@ fini (xlator_t *this)                                  "db connection");                  }                  GF_FREE (priv->ctr_db_path); +                if (pthread_mutex_destroy (&priv->compact_lock)) { +                        gf_msg (this->name, GF_LOG_WARNING, 0, +                                CTR_MSG_CLOSE_DB_CONN_FAILED, "Failed to " +                                "destroy the compaction mutex"); +                }          }          GF_FREE (priv);          mem_pool_destroy (this->local_pool); diff --git a/xlators/features/changetimerecorder/src/ctr-helper.h b/xlators/features/changetimerecorder/src/ctr-helper.h index d5615270184..4fd4f745f4d 100644 --- a/xlators/features/changetimerecorder/src/ctr-helper.h +++ b/xlators/features/changetimerecorder/src/ctr-helper.h @@ -22,6 +22,7 @@  #include "common-utils.h"  #include <time.h>  #include <sys/time.h> +#include <pthread.h>  #include "gfdb_data_store.h"  #include "ctr-xlator-ctx.h" @@ -52,6 +53,9 @@ typedef struct gf_ctr_private {          gfdb_conn_node_t                *_db_conn;          uint64_t                        ctr_lookupheal_link_timeout;          uint64_t                        ctr_lookupheal_inode_timeout; +        gf_boolean_t                    compact_active; +        gf_boolean_t                    compact_mode_switched; +        pthread_mutex_t                 compact_lock;  } gf_ctr_private_t; diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h index e520c69add2..2ba1876b6ec 100644 --- a/xlators/mgmt/glusterd/src/glusterd-messages.h +++ b/xlators/mgmt/glusterd/src/glusterd-messages.h @@ -4755,4 +4755,3 @@  /*------------*/  #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"  #endif /* !_GLUSTERD_MESSAGES_H_ */ - diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index ce34ffd2b05..d87082e9e89 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -387,6 +387,14 @@ validate_tier (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,                          goto out;                  }                  goto out; +        } else if (strstr (key, "tier-compact")) { +                if (strcmp (value, "on") && +                    strcmp (value, "off")) { +                        ret = -1; +                        goto out; +                } + +                goto out;          }          /* @@ -452,7 +460,9 @@ validate_tier (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,                     strstr (key, "tier-max-mb") ||                     strstr (key, "tier-max-promote-file-size") ||                     strstr (key, "tier-max-files") || -                   strstr (key, "tier-demote-frequency")) { +                   strstr (key, "tier-demote-frequency") || +                   strstr (key, "tier-hot-compact-frequency") || +                   strstr (key, "tier-cold-compact-frequency")) {                  if (origin_val < 1) {                          snprintf (errstr, sizeof (errstr), "%s is not a "                                    " compatible value. %s expects a positive " @@ -464,7 +474,6 @@ validate_tier (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,                          ret = -1;                          goto out;                  } -          }  out:          gf_msg_debug (this->name, 0, "Returning %d", ret); @@ -1589,17 +1598,17 @@ struct volopt_map_entry glusterd_volopt_map[] = {            .flags      = OPT_FLAG_CLIENT_OPT          }, - 	/* Crypt xlator options */ +         /* Crypt xlator options */ -	{ .key         = "features.encryption", -	  .voltype     = "encryption/crypt", -	  .option      = "!feat", -	  .value       = "off", -	  .op_version  = 3, -	  .description = "enable/disable client-side encryption for " +        { .key         = "features.encryption", +          .voltype     = "encryption/crypt", +          .option      = "!feat", +          .value       = "off", +          .op_version  = 3, +          .description = "enable/disable client-side encryption for "                           "the volume.", -	  .flags       = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT -	}, +          .flags       = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT +        },          { .key         = "encryption.master-key",            .voltype     = "encryption/crypt", @@ -1968,7 +1977,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {            .flags      = OPT_FLAG_CLIENT_OPT          }, -	/* Feature translators */ +        /* Feature translators */          { .key         = "features.uss",            .voltype     = "features/snapview-server",            .op_version  = GD_OP_VERSION_3_6_0, @@ -2730,6 +2739,32 @@ struct volopt_map_entry glusterd_volopt_map[] = {            .description = "The maximum number of files that may be migrated"            " in any direction in a given cycle by a single node."          }, +        { .key         = "cluster.tier-compact", +          .voltype     = "cluster/tier", +          .option      = "tier-compact", +          .value       = "on", +          .op_version  = GD_OP_VERSION_3_9_0, +          .flags       = OPT_FLAG_CLIENT_OPT, +          .validate_fn = validate_tier, +          .description = "Activate or deactivate the compaction of the DB" +          " for the volume's metadata." +        }, +        { .key         = "cluster.tier-hot-compact-frequency", +          .voltype     = "cluster/tier", +          .value       = "604800", +          .option      = "tier-hot-compact-frequency", +          .op_version  = GD_OP_VERSION_3_9_0, +          .flags       = OPT_FLAG_CLIENT_OPT, +          .validate_fn = validate_tier, +        }, +        { .key         = "cluster.tier-cold-compact-frequency", +          .voltype     = "cluster/tier", +          .value       = "604800", +          .option      = "tier-cold-compact-frequency", +          .op_version  = GD_OP_VERSION_3_9_0, +          .flags       = OPT_FLAG_CLIENT_OPT, +          .validate_fn = validate_tier, +        },          { .key         = "features.ctr-enabled",            .voltype     = "features/changetimerecorder",            .value       = "off",  | 
