summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/dht/src
diff options
context:
space:
mode:
authorJoseph Fernandes <josferna@redhat.com>2015-08-27 17:23:07 +0530
committerDan Lambright <dlambrig@redhat.com>2015-09-08 05:13:00 -0700
commit96af474045c9ba5ab74ca76daa823d91a0a0c610 (patch)
treeb9f8991807f1d5a41eee82ff69788161af815e72 /xlators/cluster/dht/src
parent9efce73fb31d520706a6d47de4daa4fb3366e6a6 (diff)
tier/ctr: Solving DB Lock issue due to write contention from db connections
Problem: The DB on the brick is been accessed by CTR, for write and tier migrator, for read and write. The write from tier migrator is reseting the heat counters after a cycle. Since we are using sqlite, two connections trying to write would cause a db lock contention. As a result CTR used to fail to update the db. Solution: Using the same db connection of CTR for reseting the heat counters. 1) Introducted a new IPC FOP for CTR 2) After the query do a ipc syncop to the underlying client xlator associated to the brick. 3) CTR in brick will catch the IPC FOP and cleat the heat counters. Change-Id: I53306bfc08dcdba479deb4ccc154896521336150 BUG: 1260730 Signed-off-by: Joseph Fernandes <josferna@redhat.com> Reviewed-on: http://review.gluster.org/12031 Tested-by: NetBSD Build System <jenkins@build.gluster.org> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Dan Lambright <dlambrig@redhat.com> Tested-by: Dan Lambright <dlambrig@redhat.com>
Diffstat (limited to 'xlators/cluster/dht/src')
-rw-r--r--xlators/cluster/dht/src/dht-mem-types.h1
-rw-r--r--xlators/cluster/dht/src/tier.c139
-rw-r--r--xlators/cluster/dht/src/tier.h9
3 files changed, 105 insertions, 44 deletions
diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h
index 85e5baed62c..e3a38ed7e03 100644
--- a/xlators/cluster/dht/src/dht-mem-types.h
+++ b/xlators/cluster/dht/src/dht-mem-types.h
@@ -34,6 +34,7 @@ enum gf_dht_mem_types_ {
gf_dht_mt_container_t,
gf_dht_mt_octx_t,
gf_dht_mt_miginfo_t,
+ gf_tier_mt_bricklist_t,
gf_dht_mt_end
};
#endif
diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c
index 7563ec7f20b..ff31c17c7e2 100644
--- a/xlators/cluster/dht/src/tier.c
+++ b/xlators/cluster/dht/src/tier.c
@@ -27,7 +27,7 @@ static void *libhandle;
static gfdb_methods_t gfdb_methods;
#define DB_QUERY_RECORD_SIZE 4096
-
+#define PROMOTION_CYCLE_CNT 4
static int
@@ -436,8 +436,7 @@ out:
* It picks up each bricks db and queries for eligible files for migration.
* The list of eligible files are populated in appropriate query files*/
static int
-tier_process_brick_cbk (dict_t *brick_dict, char *key, data_t *value,
- void *args) {
+tier_process_brick_cbk (brick_list_t *local_brick, void *args) {
int ret = -1;
char *db_path = NULL;
query_cbk_args_t *query_cbk_args = NULL;
@@ -456,8 +455,12 @@ tier_process_brick_cbk (dict_t *brick_dict, char *key, data_t *value,
GF_VALIDATE_OR_GOTO (this->name,
gfdb_brick_dict_info->_query_cbk_args, out);
- GF_VALIDATE_OR_GOTO (this->name, value, out);
- db_path = data_to_str(value);
+ GF_VALIDATE_OR_GOTO (this->name, local_brick, out);
+
+ GF_VALIDATE_OR_GOTO (this->name, local_brick->xlator, out);
+
+ GF_VALIDATE_OR_GOTO (this->name, local_brick->brick_db_path, out);
+ db_path = local_brick->brick_db_path;
/*Preparing DB parameters before init_db i.e getting db connection*/
params_dict = dict_new ();
@@ -467,7 +470,7 @@ tier_process_brick_cbk (dict_t *brick_dict, char *key, data_t *value,
"DB Params cannot initialized!");
goto out;
}
- SET_DB_PARAM_TO_DICT(this->name, params_dict, gfdb_methods.dbpath,
+ SET_DB_PARAM_TO_DICT(this->name, params_dict, GFDB_SQL_PARAM_DBPATH,
db_path, ret, out);
/*Get the db connection*/
@@ -508,7 +511,7 @@ tier_process_brick_cbk (dict_t *brick_dict, char *key, data_t *value,
write_freq_threshold,
query_cbk_args->defrag->
read_freq_threshold,
- _gf_true);
+ _gf_false);
}
} else {
if (query_cbk_args->defrag->write_freq_threshold == 0 &&
@@ -527,7 +530,7 @@ tier_process_brick_cbk (dict_t *brick_dict, char *key, data_t *value,
query_cbk_args->defrag->
write_freq_threshold,
query_cbk_args->defrag->read_freq_threshold,
- _gf_true);
+ _gf_false);
}
}
if (ret) {
@@ -535,7 +538,17 @@ tier_process_brick_cbk (dict_t *brick_dict, char *key, data_t *value,
DHT_MSG_LOG_TIER_ERROR,
"FATAL: query from db failed");
goto out;
- }
+ }
+
+ /*Clear the heat on the DB entries*/
+ ret = syncop_ipc (local_brick->xlator, GF_IPC_TARGET_CTR, NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR, "Failed clearing the heat "
+ "on db %s", local_brick->brick_db_path);
+ goto out;
+ }
+
ret = 0;
out:
if (query_cbk_args && query_cbk_args->queryFILE) {
@@ -555,6 +568,7 @@ tier_build_migration_qfile (demotion_args_t *args,
_gfdb_brick_dict_info_t gfdb_brick_dict_info;
gfdb_time_t time_in_past;
int ret = -1;
+ brick_list_t *local_brick = NULL;
/*
* The first time this function is called, query file will
@@ -585,14 +599,18 @@ tier_build_migration_qfile (demotion_args_t *args,
gfdb_brick_dict_info.time_stamp = &time_in_past;
gfdb_brick_dict_info._gfdb_promote = is_promotion;
gfdb_brick_dict_info._query_cbk_args = query_cbk_args;
- ret = dict_foreach (args->brick_list, tier_process_brick_cbk,
- &gfdb_brick_dict_info);
- if (ret) {
- gf_msg (args->this->name, GF_LOG_ERROR, 0,
- DHT_MSG_BRICK_QUERY_FAILED,
- "Brick query failed\n");
- goto out;
+
+ list_for_each_entry (local_brick, args->brick_list, list) {
+ ret = tier_process_brick_cbk (local_brick,
+ &gfdb_brick_dict_info);
+ if (ret) {
+ gf_msg (args->this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_BRICK_QUERY_FAILED,
+ "Brick query failed\n");
+ goto out;
+ }
}
+ ret = 0;
out:
return ret;
}
@@ -697,19 +715,19 @@ out:
}
static int
-tier_get_bricklist (xlator_t *xl, dict_t *bricklist)
+tier_get_bricklist (xlator_t *xl, struct list_head *local_bricklist_head)
{
xlator_list_t *child = NULL;
char *rv = NULL;
char *rh = NULL;
char localhost[256] = {0};
- char *db_path = NULL;
char *brickname = NULL;
char db_name[PATH_MAX] = "";
int ret = 0;
+ brick_list_t *local_brick = NULL;
GF_VALIDATE_OR_GOTO ("tier", xl, out);
- GF_VALIDATE_OR_GOTO ("tier", bricklist, out);
+ GF_VALIDATE_OR_GOTO ("tier", local_bricklist_head, out);
gethostname (localhost, sizeof (localhost));
@@ -724,27 +742,38 @@ tier_get_bricklist (xlator_t *xl, dict_t *bricklist)
if (gf_is_local_addr (rh)) {
+ local_brick = GF_CALLOC (1, sizeof(brick_list_t),
+ gf_tier_mt_bricklist_t);
+ if (!local_brick) {
+ goto out;
+ }
+
ret = dict_get_str(xl->options, "remote-subvolume",
&rv);
if (ret < 0)
goto out;
+
brickname = strrchr(rv, '/') + 1;
snprintf(db_name, sizeof(db_name), "%s.db",
brickname);
- db_path = GF_CALLOC (PATH_MAX, 1, gf_common_mt_char);
- if (!db_path) {
+
+ local_brick->brick_db_path =
+ GF_CALLOC (PATH_MAX, 1, gf_common_mt_char);
+ if (!local_brick->brick_db_path) {
gf_msg ("tier", GF_LOG_ERROR, 0,
DHT_MSG_LOG_TIER_STATUS,
"Faile. to allocate memory for bricklist");
goto out;
}
- sprintf(db_path, "%s/%s/%s", rv,
+ sprintf(local_brick->brick_db_path, "%s/%s/%s", rv,
GF_HIDDEN_PATH,
db_name);
- if (dict_add_dynstr_with_alloc(bricklist, "brick",
- db_path))
- goto out;
+
+ local_brick->xlator = xl;
+
+ list_add_tail (&(local_brick->list),
+ local_bricklist_head);
ret = 0;
goto out;
@@ -752,19 +781,48 @@ tier_get_bricklist (xlator_t *xl, dict_t *bricklist)
}
for (child = xl->children; child; child = child->next) {
- ret = tier_get_bricklist(child->xlator, bricklist);
+ ret = tier_get_bricklist(child->xlator, local_bricklist_head);
+ if (ret) {
+ goto out;
+ }
}
+
+ ret = 0;
out:
- GF_FREE (db_path);
+
+ if (ret) {
+ if (local_brick) {
+ GF_FREE (local_brick->brick_db_path);
+ }
+ GF_FREE (local_brick);
+ }
return ret;
}
+void
+clear_bricklist (struct list_head *brick_list)
+{
+ brick_list_t *local_brick = NULL;
+ brick_list_t *temp = NULL;
+
+ if (list_empty(brick_list)) {
+ return;
+ }
+
+ list_for_each_entry_safe (local_brick, temp, brick_list, list) {
+ list_del (&local_brick->list);
+ GF_FREE (local_brick->brick_db_path);
+ GF_FREE (local_brick);
+ }
+}
+
+
int
tier_start (xlator_t *this, gf_defrag_info_t *defrag)
{
- dict_t *bricklist_cold = NULL;
- dict_t *bricklist_hot = NULL;
+ struct list_head bricklist_hot = { 0 };
+ struct list_head bricklist_cold = { 0 };
dht_conf_t *conf = NULL;
gfdb_time_t current_time;
int freq_promote = 0;
@@ -783,16 +841,11 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
conf = this->private;
- bricklist_cold = dict_new();
- if (!bricklist_cold)
- return -1;
-
- bricklist_hot = dict_new();
- if (!bricklist_hot)
- return -1;
+ INIT_LIST_HEAD ((&bricklist_hot));
+ INIT_LIST_HEAD ((&bricklist_cold));
- tier_get_bricklist (conf->subvolumes[0], bricklist_cold);
- tier_get_bricklist (conf->subvolumes[1], bricklist_hot);
+ tier_get_bricklist (conf->subvolumes[0], &bricklist_cold);
+ tier_get_bricklist (conf->subvolumes[1], &bricklist_hot);
gf_msg (this->name, GF_LOG_INFO, 0,
DHT_MSG_LOG_TIER_STATUS, "Begin run tier promote %d"
@@ -873,7 +926,7 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
if (is_demotion_triggered) {
demotion_args.this = this;
- demotion_args.brick_list = bricklist_hot;
+ demotion_args.brick_list = &bricklist_hot;
demotion_args.defrag = defrag;
demotion_args.freq_time = freq_demote;
ret_demotion = pthread_create (&demote_thread,
@@ -889,9 +942,9 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
if (is_promotion_triggered) {
promotion_args.this = this;
- promotion_args.brick_list = bricklist_cold;
+ promotion_args.brick_list = &bricklist_cold;
promotion_args.defrag = defrag;
- promotion_args.freq_time = freq_promote;
+ promotion_args.freq_time = freq_promote * PROMOTION_CYCLE_CNT;
ret_promotion = pthread_create (&promote_thread,
NULL, &tier_promote,
&promotion_args);
@@ -940,8 +993,8 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
ret = 0;
out:
- dict_unref(bricklist_cold);
- dict_unref(bricklist_hot);
+ clear_bricklist (&bricklist_cold);
+ clear_bricklist (&bricklist_hot);
return ret;
}
diff --git a/xlators/cluster/dht/src/tier.h b/xlators/cluster/dht/src/tier.h
index 88fb7844dad..76ff27d5932 100644
--- a/xlators/cluster/dht/src/tier.h
+++ b/xlators/cluster/dht/src/tier.h
@@ -31,6 +31,7 @@
#define TIMER_SECS 3600
#include "gfdb_data_store.h"
+#include "gfdb_sqlite3.h"
#include <ctype.h>
#include <sys/stat.h>
@@ -56,10 +57,16 @@ typedef struct _gfdb_brick_dict_info {
query_cbk_args_t *_query_cbk_args;
} _gfdb_brick_dict_info_t;
+typedef struct brick_list {
+ xlator_t *xlator;
+ char *brick_db_path;
+ struct list_head list;
+} brick_list_t;
+
typedef struct _dm_thread_args {
xlator_t *this;
gf_defrag_info_t *defrag;
- dict_t *brick_list;
+ struct list_head *brick_list;
int freq_time;
int return_value;
} promotion_args_t, demotion_args_t;