diff options
| -rw-r--r-- | libglusterfs/src/Makefile.am | 3 | ||||
| -rw-r--r-- | libglusterfs/src/gfdb/gfdb_data_store.c | 8 | ||||
| -rw-r--r-- | libglusterfs/src/gfdb/gfdb_data_store.h | 48 | ||||
| -rw-r--r-- | libglusterfs/src/gfdb/gfdb_data_store_types.h | 8 | ||||
| -rw-r--r-- | libglusterfs/src/gfdb/gfdb_sqlite3.c | 37 | ||||
| -rw-r--r-- | libglusterfs/src/gfdb/gfdb_sqlite3.h | 3 | ||||
| -rw-r--r-- | libglusterfs/src/tier-ctr-interface.h | 44 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 1 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-shared.c | 4 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/tier.c | 93 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/tier.h | 3 | ||||
| -rw-r--r-- | xlators/features/changetimerecorder/src/changetimerecorder.c | 15 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 15 | 
13 files changed, 213 insertions, 69 deletions
diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am index 20163da769c..e9e690ee4bd 100644 --- a/libglusterfs/src/Makefile.am +++ b/libglusterfs/src/Makefile.am @@ -61,7 +61,8 @@ noinst_HEADERS = unittest/unittest.h \  	$(CONTRIBDIR)/rbtree/rb.h \  	$(CONTRIBDIR)/mount/mntent_compat.h \  	$(CONTRIBDIR)/libexecinfo/execinfo_compat.h \ -	$(CONTRIBDIR)/timer-wheel/timer-wheel.h +	$(CONTRIBDIR)/timer-wheel/timer-wheel.h \ +	tier-ctr-interface.h  if !HAVE_LIBUUID  # FIXME: unbundle libuuid, see compat-uuid.h. diff --git a/libglusterfs/src/gfdb/gfdb_data_store.c b/libglusterfs/src/gfdb/gfdb_data_store.c index cb567503fa3..7074c4a51c2 100644 --- a/libglusterfs/src/gfdb/gfdb_data_store.c +++ b/libglusterfs/src/gfdb/gfdb_data_store.c @@ -481,12 +481,14 @@ compact_db (gfdb_conn_node_t *_conn_node, gf_boolean_t _compact_active,   *                        for every record found   *      _query_cbk_args : Custom argument passed for the call back   *                        function query_callback + *      query_limit     : number to limit number of rows returned by the query   * Returns : if successful return 0 or   *          -ve value in case of failure*/  int  find_all (gfdb_conn_node_t      *_conn_node,            gf_query_callback_t   query_callback, -          void                  *_query_cbk_args) +          void                  *_query_cbk_args, +          int                   query_limit)  {          int ret                                 = 0;          gfdb_db_operations_t *db_operations_t   = NULL; @@ -500,7 +502,8 @@ find_all (gfdb_conn_node_t      *_conn_node,          if (db_operations_t->find_all_op) {                  ret = db_operations_t->find_all_op (gf_db_connection,                                                      query_callback, -                                                    _query_cbk_args); +                                                    _query_cbk_args, +                                                    query_limit);                  if (ret) {                          gf_msg (GFDB_DATA_STORE, GF_LOG_ERROR, 0,                                  LG_MSG_FIND_OP_FAILED, "Find all operation " @@ -851,6 +854,7 @@ void get_gfdb_methods (gfdb_methods_t *methods)  {          methods->init_db = init_db;          methods->fini_db = fini_db; +        methods->find_all = find_all;          methods->find_unchanged_for_time = find_unchanged_for_time;          methods->find_recently_changed_files = find_recently_changed_files;          methods->find_unchanged_for_time_freq = find_unchanged_for_time_freq; diff --git a/libglusterfs/src/gfdb/gfdb_data_store.h b/libglusterfs/src/gfdb/gfdb_data_store.h index 0aac4611153..beb954c190a 100644 --- a/libglusterfs/src/gfdb/gfdb_data_store.h +++ b/libglusterfs/src/gfdb/gfdb_data_store.h @@ -20,42 +20,6 @@  #include "gfdb_data_store_types.h" -#define GFDB_IPC_CTR_KEY "gfdb.ipc-ctr-op" - -/* - * CTR IPC OPERATIONS - * - * - */ -#define GFDB_IPC_CTR_QUERY_OPS "gfdb.ipc-ctr-query-op" -#define GFDB_IPC_CTR_CLEAR_OPS "gfdb.ipc-ctr-clear-op" -#define GFDB_IPC_CTR_GET_DB_PARAM_OPS "gfdb.ipc-ctr-get-db-parm" -#define GFDB_IPC_CTR_GET_DB_VERSION_OPS "gfdb.ipc-ctr-get-db-version" -#define GFDB_IPC_CTR_SET_COMPACT_PRAGMA "gfdb.ipc-ctr-set-compact-pragma" -/* - * CTR IPC INPUT/OUTPUT - * - * - */ -#define GFDB_IPC_CTR_GET_QFILE_PATH "gfdb.ipc-ctr-get-qfile-path" -#define GFDB_IPC_CTR_GET_QUERY_PARAMS "gfdb.ipc-ctr-get-query-parms" -#define GFDB_IPC_CTR_RET_QUERY_COUNT "gfdb.ipc-ctr-ret-rec-count" -#define GFDB_IPC_CTR_GET_DB_KEY "gfdb.ipc-ctr-get-params-key" -#define GFDB_IPC_CTR_RET_DB_VERSION "gfdb.ipc-ctr-ret-db-version" - -/* - * gfdb ipc ctr params for query - * - * - */ -typedef struct gfdb_ipc_ctr_params { -        gf_boolean_t is_promote; -        int write_freq_threshold; -        int read_freq_threshold; -        gfdb_time_t time_stamp; -} gfdb_ipc_ctr_params_t; - -  /* GFDB Connection Node:   * ~~~~~~~~~~~~~~~~~~~~   * Represents the connection to the database while using libgfdb @@ -146,11 +110,20 @@ delete_record(gfdb_conn_node_t *, gfdb_db_record_t *gfdb_db_record);   *                        for every record found   *      _query_cbk_args : Custom argument passed for the call back   *                        function query_callback + *      query_limit     : 0 - umlimited, + *                        any positive value - adds the LIMIT clause + *                        to the SQL query + *   * Returns : if successful return 0 or   *          -ve value in case of failure*/  int find_all(gfdb_conn_node_t *, gf_query_callback_t query_callback, -                void *_query_cbk_args); +                void *_query_cbk_args, +                int query_limit); +typedef int (*find_all_t) (gfdb_conn_node_t *, +                           gf_query_callback_t query_callback, +                           void *_query_cbk_args, +                           int query_limit); @@ -368,6 +341,7 @@ typedef int (*compact_db_t)(gfdb_conn_node_t *db_conn,  typedef struct gfdb_methods_s {          init_db_t                       init_db;          fini_db_t                       fini_db; +        find_all_t                      find_all;          find_unchanged_for_time_t       find_unchanged_for_time;          find_recently_changed_files_t   find_recently_changed_files;          find_unchanged_for_time_freq_t  find_unchanged_for_time_freq; diff --git a/libglusterfs/src/gfdb/gfdb_data_store_types.h b/libglusterfs/src/gfdb/gfdb_data_store_types.h index d0c96370eb8..02b7aa0fd33 100644 --- a/libglusterfs/src/gfdb/gfdb_data_store_types.h +++ b/libglusterfs/src/gfdb/gfdb_data_store_types.h @@ -366,12 +366,18 @@ typedef int   *                        for every record found   *      _query_cbk_args : Custom argument passed for the call back   *                        function query_callback + *      query_limit     : 0 - list all files + *                        positive value - add the LIMIT clause to + *                        the SQL query to limit the number of records + *                        returned + *   * Returns : if successful return 0 or   *          -ve value in case of failure*/  typedef int  (*gfdb_find_all_t)(void *db_conn,                                 gf_query_callback_t query_callback, -                               void *_cbk_args); +                               void *_cbk_args, +                               int query_limit); diff --git a/libglusterfs/src/gfdb/gfdb_sqlite3.c b/libglusterfs/src/gfdb/gfdb_sqlite3.c index 094028361c5..4284ed9a69d 100644 --- a/libglusterfs/src/gfdb/gfdb_sqlite3.c +++ b/libglusterfs/src/gfdb/gfdb_sqlite3.c @@ -632,12 +632,15 @@ gf_get_basic_query_stmt (char **out_stmt)   * */  int  gf_sqlite3_find_all (void *db_conn, gf_query_callback_t query_callback, -                        void *query_cbk_args) +                        void *query_cbk_args, +                        int query_limit)  {          int ret                                 =       -1;          char *query_str                         =       NULL;          gf_sql_connection_t *sql_conn           =       db_conn;          sqlite3_stmt *prep_stmt                 =       NULL; +        char *limit_query                       =       NULL; +        char *query                             =       NULL;          CHECK_SQL_CONN (sql_conn, out);          GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, query_callback, out); @@ -647,12 +650,28 @@ gf_sqlite3_find_all (void *db_conn, gf_query_callback_t query_callback,                  goto out;          } -        ret = sqlite3_prepare (sql_conn->sqlite3_db_conn, query_str, -1, +        query = query_str; + +        if (query_limit > 0) { +                ret = gf_asprintf (&limit_query, "%s LIMIT %d", +                                   query, query_limit); +                if (ret < 0) { +                        gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, +                                LG_MSG_QUERY_FAILED, +                                "Failed creating limit query statement"); +                        limit_query = NULL; +                        goto out; +                } + +                query = limit_query; +        } + +        ret = sqlite3_prepare (sql_conn->sqlite3_db_conn, query, -1,                                  &prep_stmt, 0);          if (ret != SQLITE_OK) {                  gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, -                        LG_MSG_PREPARE_FAILED, "Failed to prepare statement %s :" -                        "%s", query_str, +                        LG_MSG_PREPARE_FAILED, +                        "Failed to prepare statement %s: %s", query,                          sqlite3_errmsg (sql_conn->sqlite3_db_conn));                  ret = -1;                  goto out; @@ -661,7 +680,7 @@ gf_sqlite3_find_all (void *db_conn, gf_query_callback_t query_callback,          ret = gf_sql_query_function (prep_stmt, query_callback, query_cbk_args);          if (ret) {                  gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED, -                        "Failed Query %s", query_str); +                        "Failed Query %s", query);                  goto out;          } @@ -669,6 +688,10 @@ gf_sqlite3_find_all (void *db_conn, gf_query_callback_t query_callback,  out:          sqlite3_finalize (prep_stmt);          GF_FREE (query_str); + +        if (limit_query) +                GF_FREE (limit_query); +          return ret;  } @@ -1070,10 +1093,10 @@ gf_sqlite3_find_unchanged_for_time_freq (void *db_conn,                  GF_COL_TB_WMSEC ") >= ? ) ) )"                  " AND "                  /*Second condition: For Reads -                 * Files that have reaASCd wind time smaller than for_time +                 * Files that have read wind time smaller than for_time                   * OR                   * File that have read wind time greater than for_time, -                 * but write_frequency less than freq_write_cnt*/ +                 * but read_frequency less than freq_read_cnt*/                  "( ((" GF_COL_TB_RWSEC " * " TOSTRING(GFDB_MICROSEC) " + "                  GF_COL_TB_RWMSEC ") < ? )"                  " OR " diff --git a/libglusterfs/src/gfdb/gfdb_sqlite3.h b/libglusterfs/src/gfdb/gfdb_sqlite3.h index 52b84da251c..5b55b0ace5e 100644 --- a/libglusterfs/src/gfdb/gfdb_sqlite3.h +++ b/libglusterfs/src/gfdb/gfdb_sqlite3.h @@ -258,7 +258,8 @@ int gf_sqlite3_delete (void *db_conn, gfdb_db_record_t *);  /*querying modules*/  int gf_sqlite3_find_all (void *db_conn, gf_query_callback_t, -                        void *_query_cbk_args); +                        void *_query_cbk_args, +                        int query_limit);  int gf_sqlite3_find_unchanged_for_time (void *db_conn,                                          gf_query_callback_t query_callback,                                          void *_query_cbk_args, diff --git a/libglusterfs/src/tier-ctr-interface.h b/libglusterfs/src/tier-ctr-interface.h new file mode 100644 index 00000000000..cfd3f8a5e5d --- /dev/null +++ b/libglusterfs/src/tier-ctr-interface.h @@ -0,0 +1,44 @@ +#ifndef _TIER_CTR_INTERFACE_H_ +#define _TIER_CTR_INTERFACE_H_ + +#include "common-utils.h" +#include "gfdb_data_store_types.h" + +#define GFDB_IPC_CTR_KEY "gfdb.ipc-ctr-op" + +/* + * CTR IPC OPERATIONS + * + * + */ +#define GFDB_IPC_CTR_QUERY_OPS "gfdb.ipc-ctr-query-op" +#define GFDB_IPC_CTR_CLEAR_OPS "gfdb.ipc-ctr-clear-op" +#define GFDB_IPC_CTR_GET_DB_PARAM_OPS "gfdb.ipc-ctr-get-db-parm" +#define GFDB_IPC_CTR_GET_DB_VERSION_OPS "gfdb.ipc-ctr-get-db-version" +#define GFDB_IPC_CTR_SET_COMPACT_PRAGMA "gfdb.ipc-ctr-set-compact-pragma" +/* + * CTR IPC INPUT/OUTPUT + * + * + */ +#define GFDB_IPC_CTR_GET_QFILE_PATH "gfdb.ipc-ctr-get-qfile-path" +#define GFDB_IPC_CTR_GET_QUERY_PARAMS "gfdb.ipc-ctr-get-query-parms" +#define GFDB_IPC_CTR_RET_QUERY_COUNT "gfdb.ipc-ctr-ret-rec-count" +#define GFDB_IPC_CTR_GET_DB_KEY "gfdb.ipc-ctr-get-params-key" +#define GFDB_IPC_CTR_RET_DB_VERSION "gfdb.ipc-ctr-ret-db-version" + +/* + * gfdb ipc ctr params for query + * + * + */ +typedef struct gfdb_ipc_ctr_params { +        gf_boolean_t is_promote; +        int write_freq_threshold; +        int read_freq_threshold; +        gfdb_time_t time_stamp; +        int query_limit; +        gf_boolean_t emergency_demote; +} gfdb_ipc_ctr_params_t; + +#endif diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 7adc849fb63..719d214f92d 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -396,6 +396,7 @@ typedef struct gf_tier_conf {          int                          percent_full;          uint64_t                     max_migrate_bytes;          int                          max_migrate_files; +        int                          query_limit;          tier_mode_t                  mode;          /* These flags are only used for tier-compact */          gf_boolean_t                 compact_active; diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c index f410f71b5a6..46bf461cf63 100644 --- a/xlators/cluster/dht/src/dht-shared.c +++ b/xlators/cluster/dht/src/dht-shared.c @@ -1064,6 +1064,10 @@ struct volume_options options[] = {            .type = GF_OPTION_TYPE_INT,            .default_value = "10000",          }, +        { .key         = {"tier-query-limit"}, +          .type = GF_OPTION_TYPE_INT, +          .default_value = "100", +        },          /* switch option */          { .key  = {"pattern.switch.case"},            .type = GF_OPTION_TYPE_ANY diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c index 2f8eddbc07d..60e967a88a2 100644 --- a/xlators/cluster/dht/src/tier.c +++ b/xlators/cluster/dht/src/tier.c @@ -15,6 +15,7 @@  #include "tier-common.h"  #include "syscall.h"  #include "events.h" +#include "tier-ctr-interface.h"  /*Hard coded DB info*/  static gfdb_db_type_t dht_tier_db_type = GFDB_SQLITE3; @@ -193,6 +194,7 @@ out:  /* Check and update the watermark every WM_INTERVAL seconds */  #define WM_INTERVAL            5 +#define WM_INTERVAL_EMERG      1  static int  tier_check_same_node (xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag) @@ -523,7 +525,7 @@ tier_can_promote_file (xlator_t *this, char const *file_name,                                  defrag->tier_conf.blocks_used;          /* test if the estimated block usage goes above HI watermark */ -        if (GF_PERCENTAGE (estimated_usage, defrag->tier_conf.blocks_total) > +        if (GF_PERCENTAGE (estimated_usage, defrag->tier_conf.blocks_total) >=                          defrag->tier_conf.watermark_hi) {                  gf_msg (this->name, GF_LOG_INFO, 0,                          DHT_MSG_LOG_TIER_STATUS, @@ -575,6 +577,7 @@ tier_migrate_using_query_file (void *_args)          gfdb_time_t  current_time               = { 0 };          int total_time                          = 0;          int max_time                            = 0; +        gf_boolean_t emergency_demote_mode      = _gf_false;          GF_VALIDATE_OR_GOTO ("tier", query_cbk_args, out); @@ -591,6 +594,9 @@ tier_migrate_using_query_file (void *_args)          if (!migrate_data)                  goto out; +        emergency_demote_mode = (!query_cbk_args->is_promotion && +                                 is_hot_tier_full(&defrag->tier_conf)); +          xdata_request = dict_new ();          if (!xdata_request) {                  gf_msg (this->name, GF_LOG_ERROR, 0, @@ -1013,6 +1019,18 @@ per_file_out:                  gfdb_methods.gfdb_query_record_free (query_record);                  query_record = NULL; + +                /* If we are demoting and the entry watermark was HI, then +                 * we are done with emergency demotions if the current +                 * watermark has fallen below hi-watermark level +                 */ +                if (emergency_demote_mode) { +                        if (tier_check_watermark (this) == 0) { +                                if (!is_hot_tier_full (&defrag->tier_conf)) { +                                        break; +                                } +                        } +                }          }  out: @@ -1126,14 +1144,23 @@ tier_process_self_query (tier_brick_list_t *local_brick, void *args)                  goto out;          }          if (!gfdb_brick_info->_gfdb_promote) { -                if (query_cbk_args->defrag->write_freq_threshold == 0 && -                        query_cbk_args->defrag->read_freq_threshold == 0) { -                                ret = gfdb_methods.find_unchanged_for_time ( -                                        conn_node, -                                        tier_gf_query_callback, -                                        (void *)query_cbk_args, -                                        gfdb_brick_info->time_stamp); +                if (query_cbk_args->defrag->tier_conf.watermark_last == +                        TIER_WM_HI) { +                        /* emergency demotion mode */ +                        ret = gfdb_methods.find_all (conn_node, +                                tier_gf_query_callback, +                                (void *)query_cbk_args, +                                query_cbk_args->defrag->tier_conf. +                                        query_limit);                  } else { +                        if (query_cbk_args->defrag->write_freq_threshold == 0 && +                            query_cbk_args->defrag->read_freq_threshold == 0) { +                                ret = gfdb_methods.find_unchanged_for_time ( +                                                conn_node, +                                                tier_gf_query_callback, +                                                (void *)query_cbk_args, +                                                gfdb_brick_info->time_stamp); +                        } else {                                  ret = gfdb_methods.find_unchanged_for_time_freq (                                          conn_node,                                          tier_gf_query_callback, @@ -1144,6 +1171,7 @@ tier_process_self_query (tier_brick_list_t *local_brick, void *args)                                          query_cbk_args->defrag->                                                          read_freq_threshold,                                          _gf_false); +                        }                  }          } else {                  if (query_cbk_args->defrag->write_freq_threshold == 0 && @@ -1159,8 +1187,7 @@ tier_process_self_query (tier_brick_list_t *local_brick, void *args)                                  tier_gf_query_callback,                                  (void *)query_cbk_args,                                  gfdb_brick_info->time_stamp, -                                query_cbk_args->defrag-> -                                write_freq_threshold, +                                query_cbk_args->defrag->write_freq_threshold,                                  query_cbk_args->defrag->read_freq_threshold,                                  _gf_false);                  } @@ -1267,10 +1294,21 @@ tier_process_ctr_query (tier_brick_list_t *local_brick, void *args)          /* set all the query params*/          ipc_ctr_params->is_promote = gfdb_brick_info->_gfdb_promote; -        ipc_ctr_params->write_freq_threshold = query_cbk_args-> -                                                defrag->write_freq_threshold; -        ipc_ctr_params->read_freq_threshold = query_cbk_args-> -                                                defrag->read_freq_threshold; + +        ipc_ctr_params->write_freq_threshold = +                query_cbk_args->defrag->write_freq_threshold; + +        ipc_ctr_params->read_freq_threshold = +                query_cbk_args->defrag->read_freq_threshold; + +        ipc_ctr_params->query_limit = +                query_cbk_args->defrag->tier_conf.query_limit; + +        ipc_ctr_params->emergency_demote = +                (!gfdb_brick_info->_gfdb_promote && +                 query_cbk_args->defrag->tier_conf.watermark_last == +                        TIER_WM_HI); +          memcpy (&ipc_ctr_params->time_stamp,                  gfdb_brick_info->time_stamp,                  sizeof (gfdb_time_t)); @@ -2208,6 +2246,15 @@ out:          return ret;  } +static int +tier_get_wm_interval(tier_mode_t mode, tier_watermark_op_t wm) +{ +        if (mode == TIER_MODE_WM && wm == TIER_WM_HI) +                return WM_INTERVAL_EMERG; + +        return WM_INTERVAL; +} +  /*   * Main tiering loop. This is called from the promotion and the   * demotion threads spawned in tier_start(). @@ -2316,7 +2363,10 @@ static void                  check_watermark++; -                if (check_watermark >= WM_INTERVAL) { +                /* emergency demotion requires frequent watermark monitoring */ +                if (check_watermark >= +                        tier_get_wm_interval(tier_conf->mode, +                                             tier_conf->watermark_last)) {                          check_watermark = 0;                          if (tier_conf->mode == TIER_MODE_WM) {                                  ret = tier_get_fs_stat (this, &root_loc); @@ -2828,6 +2878,15 @@ tier_init (xlator_t *this)          defrag->tier_conf.max_migrate_files = freq; + +        ret = dict_get_int32 (this->options, +                              "tier-query-limit", +                              &(defrag->tier_conf.query_limit)); +        if (ret) { +                defrag->tier_conf.query_limit = +                        DEFAULT_TIER_QUERY_LIMIT; +        } +          ret = dict_get_str (this->options,                              "tier-compact", &mode); @@ -3041,6 +3100,10 @@ tier_reconfigure (xlator_t *this, dict_t *options)                                    defrag->tier_conf.max_migrate_files, options,                                    int32, out); +                GF_OPTION_RECONF ("tier-query-limit", +                                  defrag->tier_conf.query_limit, +                                  options, int32, out); +                  GF_OPTION_RECONF ("tier-pause",                                    req_pause, options,                                    bool, out); diff --git a/xlators/cluster/dht/src/tier.h b/xlators/cluster/dht/src/tier.h index ffb04173bd5..764860e6884 100644 --- a/xlators/cluster/dht/src/tier.h +++ b/xlators/cluster/dht/src/tier.h @@ -98,7 +98,7 @@ typedef enum tier_watermark_op_ {  #define DEFAULT_DEMOTE_FREQ_SEC        120  #define DEFAULT_HOT_COMPACT_FREQ_SEC   604800  #define DEFAULT_COLD_COMPACT_FREQ_SEC  604800 -#define DEFAULT_DEMOTE_DEGRADED        10 +#define DEFAULT_DEMOTE_DEGRADED        1  #define DEFAULT_WRITE_FREQ_SEC         0  #define DEFAULT_READ_FREQ_SEC          0  #define DEFAULT_WM_LOW                 75 @@ -107,5 +107,6 @@ typedef enum tier_watermark_op_ {  #define DEFAULT_COMP_MODE              _gf_true  #define DEFAULT_TIER_MAX_MIGRATE_MB    1000  #define DEFAULT_TIER_MAX_MIGRATE_FILES 5000 +#define DEFAULT_TIER_QUERY_LIMIT       100  #endif diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.c b/xlators/features/changetimerecorder/src/changetimerecorder.c index 4e4ea851a86..ffc4625626b 100644 --- a/xlators/features/changetimerecorder/src/changetimerecorder.c +++ b/xlators/features/changetimerecorder/src/changetimerecorder.c @@ -16,6 +16,7 @@  #include "syscall.h"  #include "changetimerecorder.h" +#include "tier-ctr-interface.h"  /*******************************inode forget***********************************/ @@ -1726,14 +1727,21 @@ ctr_db_query (xlator_t *this,                  goto out;          }          if (!ipc_ctr_params->is_promote) { -                if (ipc_ctr_params->write_freq_threshold == 0 && -                        ipc_ctr_params->read_freq_threshold == 0) { +                if (ipc_ctr_params->emergency_demote) { +                        /* emergency demotion mode */ +                        ret = find_all (conn_node, +                                ctr_db_query_callback, +                                (void *)&query_cbk_args, +                                ipc_ctr_params->query_limit); +                } else { +                        if (ipc_ctr_params->write_freq_threshold == 0 && +                                ipc_ctr_params->read_freq_threshold == 0) {                                  ret = find_unchanged_for_time (                                          conn_node,                                          ctr_db_query_callback,                                          (void *)&query_cbk_args,                                          &ipc_ctr_params->time_stamp); -                } else { +                        } else {                                  ret = find_unchanged_for_time_freq (                                          conn_node,                                          ctr_db_query_callback, @@ -1742,6 +1750,7 @@ ctr_db_query (xlator_t *this,                                          ipc_ctr_params->write_freq_threshold,                                          ipc_ctr_params->read_freq_threshold,                                          _gf_false); +                        }                  }          } else {                  if (ipc_ctr_params->write_freq_threshold == 0 && diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 0c67135db3f..931f99ba5af 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -462,7 +462,8 @@ validate_tier (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,                     strstr (key, "tier-max-files") ||                     strstr (key, "tier-demote-frequency") ||                     strstr (key, "tier-hot-compact-frequency") || -                   strstr (key, "tier-cold-compact-frequency")) { +                   strstr (key, "tier-cold-compact-frequency") || +                   strstr (key, "tier-query-limit")) {                  if (origin_val < 1) {                          snprintf (errstr, sizeof (errstr), "%s is not a "                                    " compatible value. %s expects a positive " @@ -2739,6 +2740,18 @@ struct volopt_map_entry glusterd_volopt_map[] = {            .description = "The maximum number of files that may be migrated"            " in any direction in a given cycle by a single node."          }, +        { .key         = "cluster.tier-query-limit", +          .voltype     = "cluster/tier", +          .option      = "tier-query-limit", +          .value       = "100", +          .op_version  = GD_OP_VERSION_3_9_0, +          .flags       = OPT_FLAG_CLIENT_OPT, +          .validate_fn = validate_tier, +          .type        = NO_DOC, +          .description = "The maximum number of files that may be migrated " +                         "during an emergency demote. An emergency condition " +                         "is flagged when writes breach the hi-watermark." +        },          { .key         = "cluster.tier-compact",            .voltype     = "cluster/tier",            .option      = "tier-compact",  | 
