diff options
author | Joseph Fernandes <josferna@redhat.com> | 2015-08-27 17:23:07 +0530 |
---|---|---|
committer | Dan Lambright <dlambrig@redhat.com> | 2015-09-08 05:13:00 -0700 |
commit | 96af474045c9ba5ab74ca76daa823d91a0a0c610 (patch) | |
tree | b9f8991807f1d5a41eee82ff69788161af815e72 /xlators/cluster/dht/src | |
parent | 9efce73fb31d520706a6d47de4daa4fb3366e6a6 (diff) |
tier/ctr: Solving DB Lock issue due to write contention from db connections
Problem: The DB on the brick is been accessed by CTR, for write and
tier migrator, for read and write. The write from tier migrator is reseting
the heat counters after a cycle. Since we are using sqlite, two connections
trying to write would cause a db lock contention. As a result CTR used to fail
to update the db.
Solution: Using the same db connection of CTR for reseting the heat counters.
1) Introducted a new IPC FOP for CTR
2) After the query do a ipc syncop to the underlying client xlator associated
to the brick.
3) CTR in brick will catch the IPC FOP and cleat the heat counters.
Change-Id: I53306bfc08dcdba479deb4ccc154896521336150
BUG: 1260730
Signed-off-by: Joseph Fernandes <josferna@redhat.com>
Reviewed-on: http://review.gluster.org/12031
Tested-by: NetBSD Build System <jenkins@build.gluster.org>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Dan Lambright <dlambrig@redhat.com>
Tested-by: Dan Lambright <dlambrig@redhat.com>
Diffstat (limited to 'xlators/cluster/dht/src')
-rw-r--r-- | xlators/cluster/dht/src/dht-mem-types.h | 1 | ||||
-rw-r--r-- | xlators/cluster/dht/src/tier.c | 139 | ||||
-rw-r--r-- | xlators/cluster/dht/src/tier.h | 9 |
3 files changed, 105 insertions, 44 deletions
diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h index 85e5baed62c..e3a38ed7e03 100644 --- a/xlators/cluster/dht/src/dht-mem-types.h +++ b/xlators/cluster/dht/src/dht-mem-types.h @@ -34,6 +34,7 @@ enum gf_dht_mem_types_ { gf_dht_mt_container_t, gf_dht_mt_octx_t, gf_dht_mt_miginfo_t, + gf_tier_mt_bricklist_t, gf_dht_mt_end }; #endif diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c index 7563ec7f20b..ff31c17c7e2 100644 --- a/xlators/cluster/dht/src/tier.c +++ b/xlators/cluster/dht/src/tier.c @@ -27,7 +27,7 @@ static void *libhandle; static gfdb_methods_t gfdb_methods; #define DB_QUERY_RECORD_SIZE 4096 - +#define PROMOTION_CYCLE_CNT 4 static int @@ -436,8 +436,7 @@ out: * It picks up each bricks db and queries for eligible files for migration. * The list of eligible files are populated in appropriate query files*/ static int -tier_process_brick_cbk (dict_t *brick_dict, char *key, data_t *value, - void *args) { +tier_process_brick_cbk (brick_list_t *local_brick, void *args) { int ret = -1; char *db_path = NULL; query_cbk_args_t *query_cbk_args = NULL; @@ -456,8 +455,12 @@ tier_process_brick_cbk (dict_t *brick_dict, char *key, data_t *value, GF_VALIDATE_OR_GOTO (this->name, gfdb_brick_dict_info->_query_cbk_args, out); - GF_VALIDATE_OR_GOTO (this->name, value, out); - db_path = data_to_str(value); + GF_VALIDATE_OR_GOTO (this->name, local_brick, out); + + GF_VALIDATE_OR_GOTO (this->name, local_brick->xlator, out); + + GF_VALIDATE_OR_GOTO (this->name, local_brick->brick_db_path, out); + db_path = local_brick->brick_db_path; /*Preparing DB parameters before init_db i.e getting db connection*/ params_dict = dict_new (); @@ -467,7 +470,7 @@ tier_process_brick_cbk (dict_t *brick_dict, char *key, data_t *value, "DB Params cannot initialized!"); goto out; } - SET_DB_PARAM_TO_DICT(this->name, params_dict, gfdb_methods.dbpath, + SET_DB_PARAM_TO_DICT(this->name, params_dict, GFDB_SQL_PARAM_DBPATH, db_path, ret, out); /*Get the db connection*/ @@ -508,7 +511,7 @@ tier_process_brick_cbk (dict_t *brick_dict, char *key, data_t *value, write_freq_threshold, query_cbk_args->defrag-> read_freq_threshold, - _gf_true); + _gf_false); } } else { if (query_cbk_args->defrag->write_freq_threshold == 0 && @@ -527,7 +530,7 @@ tier_process_brick_cbk (dict_t *brick_dict, char *key, data_t *value, query_cbk_args->defrag-> write_freq_threshold, query_cbk_args->defrag->read_freq_threshold, - _gf_true); + _gf_false); } } if (ret) { @@ -535,7 +538,17 @@ tier_process_brick_cbk (dict_t *brick_dict, char *key, data_t *value, DHT_MSG_LOG_TIER_ERROR, "FATAL: query from db failed"); goto out; - } + } + + /*Clear the heat on the DB entries*/ + ret = syncop_ipc (local_brick->xlator, GF_IPC_TARGET_CTR, NULL, NULL); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + DHT_MSG_LOG_TIER_ERROR, "Failed clearing the heat " + "on db %s", local_brick->brick_db_path); + goto out; + } + ret = 0; out: if (query_cbk_args && query_cbk_args->queryFILE) { @@ -555,6 +568,7 @@ tier_build_migration_qfile (demotion_args_t *args, _gfdb_brick_dict_info_t gfdb_brick_dict_info; gfdb_time_t time_in_past; int ret = -1; + brick_list_t *local_brick = NULL; /* * The first time this function is called, query file will @@ -585,14 +599,18 @@ tier_build_migration_qfile (demotion_args_t *args, gfdb_brick_dict_info.time_stamp = &time_in_past; gfdb_brick_dict_info._gfdb_promote = is_promotion; gfdb_brick_dict_info._query_cbk_args = query_cbk_args; - ret = dict_foreach (args->brick_list, tier_process_brick_cbk, - &gfdb_brick_dict_info); - if (ret) { - gf_msg (args->this->name, GF_LOG_ERROR, 0, - DHT_MSG_BRICK_QUERY_FAILED, - "Brick query failed\n"); - goto out; + + list_for_each_entry (local_brick, args->brick_list, list) { + ret = tier_process_brick_cbk (local_brick, + &gfdb_brick_dict_info); + if (ret) { + gf_msg (args->this->name, GF_LOG_ERROR, 0, + DHT_MSG_BRICK_QUERY_FAILED, + "Brick query failed\n"); + goto out; + } } + ret = 0; out: return ret; } @@ -697,19 +715,19 @@ out: } static int -tier_get_bricklist (xlator_t *xl, dict_t *bricklist) +tier_get_bricklist (xlator_t *xl, struct list_head *local_bricklist_head) { xlator_list_t *child = NULL; char *rv = NULL; char *rh = NULL; char localhost[256] = {0}; - char *db_path = NULL; char *brickname = NULL; char db_name[PATH_MAX] = ""; int ret = 0; + brick_list_t *local_brick = NULL; GF_VALIDATE_OR_GOTO ("tier", xl, out); - GF_VALIDATE_OR_GOTO ("tier", bricklist, out); + GF_VALIDATE_OR_GOTO ("tier", local_bricklist_head, out); gethostname (localhost, sizeof (localhost)); @@ -724,27 +742,38 @@ tier_get_bricklist (xlator_t *xl, dict_t *bricklist) if (gf_is_local_addr (rh)) { + local_brick = GF_CALLOC (1, sizeof(brick_list_t), + gf_tier_mt_bricklist_t); + if (!local_brick) { + goto out; + } + ret = dict_get_str(xl->options, "remote-subvolume", &rv); if (ret < 0) goto out; + brickname = strrchr(rv, '/') + 1; snprintf(db_name, sizeof(db_name), "%s.db", brickname); - db_path = GF_CALLOC (PATH_MAX, 1, gf_common_mt_char); - if (!db_path) { + + local_brick->brick_db_path = + GF_CALLOC (PATH_MAX, 1, gf_common_mt_char); + if (!local_brick->brick_db_path) { gf_msg ("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS, "Faile. to allocate memory for bricklist"); goto out; } - sprintf(db_path, "%s/%s/%s", rv, + sprintf(local_brick->brick_db_path, "%s/%s/%s", rv, GF_HIDDEN_PATH, db_name); - if (dict_add_dynstr_with_alloc(bricklist, "brick", - db_path)) - goto out; + + local_brick->xlator = xl; + + list_add_tail (&(local_brick->list), + local_bricklist_head); ret = 0; goto out; @@ -752,19 +781,48 @@ tier_get_bricklist (xlator_t *xl, dict_t *bricklist) } for (child = xl->children; child; child = child->next) { - ret = tier_get_bricklist(child->xlator, bricklist); + ret = tier_get_bricklist(child->xlator, local_bricklist_head); + if (ret) { + goto out; + } } + + ret = 0; out: - GF_FREE (db_path); + + if (ret) { + if (local_brick) { + GF_FREE (local_brick->brick_db_path); + } + GF_FREE (local_brick); + } return ret; } +void +clear_bricklist (struct list_head *brick_list) +{ + brick_list_t *local_brick = NULL; + brick_list_t *temp = NULL; + + if (list_empty(brick_list)) { + return; + } + + list_for_each_entry_safe (local_brick, temp, brick_list, list) { + list_del (&local_brick->list); + GF_FREE (local_brick->brick_db_path); + GF_FREE (local_brick); + } +} + + int tier_start (xlator_t *this, gf_defrag_info_t *defrag) { - dict_t *bricklist_cold = NULL; - dict_t *bricklist_hot = NULL; + struct list_head bricklist_hot = { 0 }; + struct list_head bricklist_cold = { 0 }; dht_conf_t *conf = NULL; gfdb_time_t current_time; int freq_promote = 0; @@ -783,16 +841,11 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag) conf = this->private; - bricklist_cold = dict_new(); - if (!bricklist_cold) - return -1; - - bricklist_hot = dict_new(); - if (!bricklist_hot) - return -1; + INIT_LIST_HEAD ((&bricklist_hot)); + INIT_LIST_HEAD ((&bricklist_cold)); - tier_get_bricklist (conf->subvolumes[0], bricklist_cold); - tier_get_bricklist (conf->subvolumes[1], bricklist_hot); + tier_get_bricklist (conf->subvolumes[0], &bricklist_cold); + tier_get_bricklist (conf->subvolumes[1], &bricklist_hot); gf_msg (this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, "Begin run tier promote %d" @@ -873,7 +926,7 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag) if (is_demotion_triggered) { demotion_args.this = this; - demotion_args.brick_list = bricklist_hot; + demotion_args.brick_list = &bricklist_hot; demotion_args.defrag = defrag; demotion_args.freq_time = freq_demote; ret_demotion = pthread_create (&demote_thread, @@ -889,9 +942,9 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag) if (is_promotion_triggered) { promotion_args.this = this; - promotion_args.brick_list = bricklist_cold; + promotion_args.brick_list = &bricklist_cold; promotion_args.defrag = defrag; - promotion_args.freq_time = freq_promote; + promotion_args.freq_time = freq_promote * PROMOTION_CYCLE_CNT; ret_promotion = pthread_create (&promote_thread, NULL, &tier_promote, &promotion_args); @@ -940,8 +993,8 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag) ret = 0; out: - dict_unref(bricklist_cold); - dict_unref(bricklist_hot); + clear_bricklist (&bricklist_cold); + clear_bricklist (&bricklist_hot); return ret; } diff --git a/xlators/cluster/dht/src/tier.h b/xlators/cluster/dht/src/tier.h index 88fb7844dad..76ff27d5932 100644 --- a/xlators/cluster/dht/src/tier.h +++ b/xlators/cluster/dht/src/tier.h @@ -31,6 +31,7 @@ #define TIMER_SECS 3600 #include "gfdb_data_store.h" +#include "gfdb_sqlite3.h" #include <ctype.h> #include <sys/stat.h> @@ -56,10 +57,16 @@ typedef struct _gfdb_brick_dict_info { query_cbk_args_t *_query_cbk_args; } _gfdb_brick_dict_info_t; +typedef struct brick_list { + xlator_t *xlator; + char *brick_db_path; + struct list_head list; +} brick_list_t; + typedef struct _dm_thread_args { xlator_t *this; gf_defrag_info_t *defrag; - dict_t *brick_list; + struct list_head *brick_list; int freq_time; int return_value; } promotion_args_t, demotion_args_t; |