diff options
author | Diogenes Nunez <dnunez@redhat.com> | 2016-07-27 11:09:47 -0400 |
---|---|---|
committer | Dan Lambright <dlambrig@redhat.com> | 2016-09-04 18:37:57 -0700 |
commit | 261c035c7d0cd1639cc8bd0ead82c30efcc0e93f (patch) | |
tree | af3a2e498023e7ad8af417312b83ce2f969ef738 /libglusterfs | |
parent | 6459fc812219551291e4be426ed8ecf2c90813a4 (diff) |
cluster/tier: Adding compaction option for metadata databases
Problem: As metadata in the database fills up, querying the database
take a long time. As a result, tier migration slows down. To
counteract this, we added a way to enable the compaction methods of
the underlying database. The goal is to reduce the size of the
underlying file by eliminating database fragmentation.
NOTE: There is currently a bug where sometimes a brick will
attempt to activate compaction. This happens even compaction is already
turned on.
The cause is narrowed down to the compact_mode_switch flipping its value.
Changes: libglusterfs/src/gfdb - Added a gfdb function to compact the
underlying database, compact_db() This is a no-op if the database has
no such option.
- Added a compaction function for SQLite3 that does the following
1) Changes the auto_vacuum pragma of the database
2) Compacts the database according to the type of compaction requested
- Compaction type can be changed by changing the macro
GF_SQL_COMPACT_DEF to one of the 4 compaction types in
gfdb_sqlite3.h
It is currently set to GF_SQL_COMPACT_INCR, or incremental
vacuuming.
xlators/cluster/dht/src - Added the following command-line option to
enable SQLite3 compaction.
gluster volume set <vol-name> tier-compact <off|on>
- Added the following command-line option to change the frequency the
hot and cold tier are ordered to compact.
gluster volume set <vol-name> tier-hot-compact-frequency <int>
gluster volume set <vol-name> tier-cold-compact-frequency <int>
- tier daemon periodically sends the (new)
GFDB_IPC_CTR_SET_COMPACT_PRAGMA IPC to the CTR xlator. The IPC
triggers compaction of the database.
The inputs are both gf_boolean_t.
IPC Input:
compact_active: Is compaction currently on for the db.
compact_mode_switched: Did we flip the compaction switch recently?
IPC Output:
0 if the compaction succeeds.
Non-zero otherwise.
xlators/features/changetimerecorder/src/ - When the CTR gets the
compaction IPC, it launches a thread that will perform the
compaction. The IPC ends after the thread is launched. To avoid extra
allocations, the parameters are passed using static variables.
Change-Id: I5e1433becb9eeff2afe8dcb4a5798977bf5ba0dd
Signed-off-by: Diogenes Nunez <dnunez@redhat.com>
Reviewed-on: http://review.gluster.org/15031
Reviewed-by: Milind Changire <mchangir@redhat.com>
Reviewed-by: Dan Lambright <dlambrig@redhat.com>
Tested-by: Dan Lambright <dlambrig@redhat.com>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
Smoke: Gluster Build System <jenkins@build.gluster.org>
Diffstat (limited to 'libglusterfs')
-rw-r--r-- | libglusterfs/src/gfdb/gfdb_data_store.c | 40 | ||||
-rw-r--r-- | libglusterfs/src/gfdb/gfdb_data_store.h | 19 | ||||
-rw-r--r-- | libglusterfs/src/gfdb/gfdb_data_store_types.h | 50 | ||||
-rw-r--r-- | libglusterfs/src/gfdb/gfdb_sqlite3.c | 187 | ||||
-rw-r--r-- | libglusterfs/src/gfdb/gfdb_sqlite3.h | 21 | ||||
-rw-r--r-- | libglusterfs/src/libglusterfs-messages.h | 23 |
6 files changed, 298 insertions, 42 deletions
diff --git a/libglusterfs/src/gfdb/gfdb_data_store.c b/libglusterfs/src/gfdb/gfdb_data_store.c index 9c042f9e82e..cb567503fa3 100644 --- a/libglusterfs/src/gfdb/gfdb_data_store.c +++ b/libglusterfs/src/gfdb/gfdb_data_store.c @@ -433,6 +433,43 @@ delete_record (gfdb_conn_node_t *_conn_node, return ret; } +/*Libgfdb API Function: Compact the database. + * + * Arguments: + * _conn_node : GFDB Connection node + * _compact_active : Is compaction currently on? + * _compact_mode_switched : Was the compaction switch flipped? + * Returns : if successful return 0 or + * -ve value in case of failure*/ +int +compact_db (gfdb_conn_node_t *_conn_node, gf_boolean_t _compact_active, + gf_boolean_t _compact_mode_switched) +{ + int ret = 0; + gfdb_db_operations_t *db_operations_t = NULL; + void *gf_db_connection = NULL; + + CHECK_CONN_NODE(_conn_node); + + db_operations_t = &_conn_node->gfdb_connection.gfdb_db_operations; + gf_db_connection = _conn_node->gfdb_connection.gf_db_connection; + + if (db_operations_t->compact_db_op) { + + ret = db_operations_t->compact_db_op (gf_db_connection, + _compact_active, + _compact_mode_switched); + if (ret) { + gf_msg (GFDB_DATA_STORE, GF_LOG_ERROR, 0, + LG_MSG_COMPACT_FAILED, "Compaction operation " + "failed"); + } + + } + + return ret; +} + @@ -835,5 +872,8 @@ void get_gfdb_methods (gfdb_methods_t *methods) /* Link info related functions */ methods->gfdb_link_info_new = gfdb_link_info_new; methods->gfdb_link_info_free = gfdb_link_info_free; + + /* Compaction related functions */ + methods->compact_db = compact_db; } diff --git a/libglusterfs/src/gfdb/gfdb_data_store.h b/libglusterfs/src/gfdb/gfdb_data_store.h index eacb8527034..0aac4611153 100644 --- a/libglusterfs/src/gfdb/gfdb_data_store.h +++ b/libglusterfs/src/gfdb/gfdb_data_store.h @@ -31,7 +31,7 @@ #define GFDB_IPC_CTR_CLEAR_OPS "gfdb.ipc-ctr-clear-op" #define GFDB_IPC_CTR_GET_DB_PARAM_OPS "gfdb.ipc-ctr-get-db-parm" #define GFDB_IPC_CTR_GET_DB_VERSION_OPS "gfdb.ipc-ctr-get-db-version" - +#define GFDB_IPC_CTR_SET_COMPACT_PRAGMA "gfdb.ipc-ctr-set-compact-pragma" /* * CTR IPC INPUT/OUTPUT * @@ -348,6 +348,21 @@ typedef int (*set_db_params_t)(gfdb_conn_node_t *db_conn, char *param_key, char *param_value); +/*Libgfdb API Function: Compact the database. + * + * Arguments: + * _conn_node : GFDB Connection node + * _compact_active : Is compaction currently on? + * _compact_mode_switched : Was the compaction switch flipped? + * Returns : if successful return 0 or + * -ve value in case of failure*/ +int +compact_db (gfdb_conn_node_t *_conn_node, gf_boolean_t _compact_active, + gf_boolean_t _compact_mode_switched); + +typedef int (*compact_db_t)(gfdb_conn_node_t *db_conn, + gf_boolean_t compact_active, + gf_boolean_t compact_mode_switched); typedef struct gfdb_methods_s { @@ -377,6 +392,8 @@ typedef struct gfdb_methods_s { gfdb_link_info_new_t gfdb_link_info_new; gfdb_link_info_free_t gfdb_link_info_free; + /* Compaction related functions */ + compact_db_t compact_db; } gfdb_methods_t; void get_gfdb_methods (gfdb_methods_t *methods); diff --git a/libglusterfs/src/gfdb/gfdb_data_store_types.h b/libglusterfs/src/gfdb/gfdb_data_store_types.h index 1acbdf2f99f..d0c96370eb8 100644 --- a/libglusterfs/src/gfdb/gfdb_data_store_types.h +++ b/libglusterfs/src/gfdb/gfdb_data_store_types.h @@ -40,7 +40,8 @@ typedef enum gf_db_operation { GFDB_W_DELETE_DB_OP, GFDB_UW_DELETE_DB_OP, GFDB_WFC_UPDATE_DB_OP, - GFDB_RFC_UPDATE_DB_OP + GFDB_RFC_UPDATE_DB_OP, + GFDB_DB_COMPACT_DB_OP /* Added for VACUUM/manual compaction support */ } gf_db_operation_t; @@ -81,19 +82,12 @@ gfdb_time_2_usec(gfdb_time_t *gfdb_time) return ((uint64_t) gfdb_time->tv_sec * GFDB_MICROSEC) + gfdb_time->tv_usec; } - - - - /****************************************************************************** * * Insert/Update Record related data structures/functions * * ****************************************************************************/ - - - /*Indicated a generic synchronous write to the db * This may or may not be implemented*/ typedef enum gfdb_sync_type { @@ -123,11 +117,6 @@ out: return ret; } - - - - - /*Indicated different types of db*/ typedef enum gfdb_db_type { GFDB_INVALID_DB = -1, @@ -165,12 +154,6 @@ out: return ret; } - - - - - - /*Tells the path of the fop*/ typedef enum gfdb_fop_path { GFDB_FOP_INVALID = -1, @@ -206,12 +189,6 @@ isunwindpath(gfdb_fop_path_t gfdb_fop_path) return (gfdb_fop_path >= GFDB_FOP_UNWIND) ? _gf_true : _gf_false; } - - - - - - /*Tell what type of fop it was * Like whether a dentry fop or a inode fop * Read fop or a write fop etc*/ @@ -258,12 +235,6 @@ isdentrycreatefop(gfdb_fop_type_t fop_type) _gf_true : _gf_false; } - - - - - - /*The structure that is used to send insert/update the databases * using insert_db api*/ typedef struct gfdb_db_record { @@ -374,6 +345,20 @@ typedef int +/*Used to compact the database + * Arguments: + * db_conn : GFDB Connection node + * compact_active : Is compaction currently on? + * compact_mode_switched : Was the compaction switch flipped? + * Returns : if successful return 0 or + * -ve value in case of failure*/ +typedef int +(*gfdb_compact_db_t)(void *db_conn, gf_boolean_t compact_active, + gf_boolean_t compact_mode_switched); + + + + /* Query all the records from the database * Arguments: * db_conn : plugin specific data base connection @@ -502,6 +487,7 @@ typedef struct gfdb_db_operations { gfdb_fini_db_t fini_db_op; gfdb_insert_record_t insert_record_op; gfdb_delete_record_t delete_record_op; + gfdb_compact_db_t compact_db_op; gfdb_find_all_t find_all_op; gfdb_find_unchanged_for_time_t find_unchanged_for_time_op; gfdb_find_recently_changed_files_t find_recently_changed_files_op; @@ -598,5 +584,3 @@ typedef struct gfdb_connection { #endif - - diff --git a/libglusterfs/src/gfdb/gfdb_sqlite3.c b/libglusterfs/src/gfdb/gfdb_sqlite3.c index 04781be562a..094028361c5 100644 --- a/libglusterfs/src/gfdb/gfdb_sqlite3.c +++ b/libglusterfs/src/gfdb/gfdb_sqlite3.c @@ -239,6 +239,7 @@ gf_sqlite3_fill_db_operations(gfdb_db_operations_t *gfdb_db_ops) gfdb_db_ops->insert_record_op = gf_sqlite3_insert; gfdb_db_ops->delete_record_op = gf_sqlite3_delete; + gfdb_db_ops->compact_db_op = gf_sqlite3_vacuum; gfdb_db_ops->find_all_op = gf_sqlite3_find_all; gfdb_db_ops->find_unchanged_for_time_op = @@ -1327,10 +1328,14 @@ gf_sqlite3_pragma (void *db_conn, char *pragma_key, char **pragma_value) goto out; } - ret = gf_asprintf (pragma_value, "%s", sqlite3_column_text (pre_stmt, 0)); - if (ret <= 0) { - gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED, - "Failed to get %s from db", pragma_key); + if (pragma_value) { + ret = gf_asprintf (pragma_value, "%s", + sqlite3_column_text (pre_stmt, 0)); + if (ret <= 0) { + gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, + LG_MSG_QUERY_FAILED, "Failed to get %s from db", + pragma_key); + } } ret = 0; @@ -1382,3 +1387,177 @@ out: return ret; } + +/* Function to vacuum of sqlite db + * Input: + * void *db_conn : Sqlite connection + * gf_boolean_t compact_active : Is compaction on? + * gf_boolean_t compact_mode_switched : Did we just flip the compaction swtich? + * Return: + * On success return 0 + * On failure return -1 + * */ +int +gf_sqlite3_vacuum (void *db_conn, gf_boolean_t compact_active, + gf_boolean_t compact_mode_switched) +{ + int ret = -1; + gf_sql_connection_t *sql_conn = db_conn; + char *sqlstring = NULL; + char *sql_strerror = NULL; + gf_boolean_t changing_pragma = _gf_true; + + CHECK_SQL_CONN (sql_conn, out); + + if (GF_SQL_COMPACT_DEF == GF_SQL_COMPACT_NONE) { + gf_msg (GFDB_STR_SQLITE3, GF_LOG_INFO, 0, + LG_MSG_COMPACT_STATUS, + "VACUUM type is off: no VACUUM to do"); + goto out; + } + + if (compact_mode_switched) { + if (compact_active) { /* Then it was OFF before. + So turn everything on */ + ret = 0; + switch (GF_SQL_COMPACT_DEF) { + case GF_SQL_COMPACT_FULL: + ret = gf_sqlite3_set_pragma (db_conn, + "auto_vacuum", + GF_SQL_AV_FULL); + break; + case GF_SQL_COMPACT_INCR: + ret = gf_sqlite3_set_pragma (db_conn, + "auto_vacuum", + GF_SQL_AV_INCR); + break; + case GF_SQL_COMPACT_MANUAL: + changing_pragma = _gf_false; + default: + ret = -1; + gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, + LG_MSG_COMPACT_FAILED, + "VACUUM type undefined"); + goto out; + break; + } + + } else { /* Then it was ON before, so turn it all off */ + if (GF_SQL_COMPACT_DEF == GF_SQL_COMPACT_FULL || + GF_SQL_COMPACT_DEF == GF_SQL_COMPACT_INCR) { + ret = gf_sqlite3_set_pragma (db_conn, + "auto_vacuum", + GF_SQL_AV_NONE); + } else { + changing_pragma = _gf_false; + } + } + + if (ret) { + gf_msg (GFDB_STR_SQLITE3, GF_LOG_TRACE, 0, + LG_MSG_PREPARE_FAILED, + "Failed to set the pragma"); + goto out; + } + + gf_msg (GFDB_STR_SQLITE3, GF_LOG_INFO, 0, + LG_MSG_COMPACT_STATUS, "Turning compaction %i", + GF_SQL_COMPACT_DEF); + + /* If we move from an auto_vacuum scheme to off, */ + /* or vice-versa, we must VACUUM to save the change. */ + /* In the case of a manual VACUUM scheme, we might as well */ + /* run a manual VACUUM now if we */ + if (changing_pragma || compact_active) { + ret = gf_asprintf (&sqlstring, "VACUUM;"); + if (ret <= 0) { + gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, + LG_MSG_PREPARE_FAILED, + "Failed allocating memory"); + goto out; + } + gf_msg(GFDB_STR_SQLITE3, GF_LOG_INFO, 0, + LG_MSG_COMPACT_STATUS, "Sealed with a VACUUM"); + } + } else { /* We are active, so it's time to VACUUM */ + if (!compact_active) { /* Did we somehow enter an inconsistent + state? */ + ret = -1; + gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, + LG_MSG_PREPARE_FAILED, + "Tried to VACUUM when compaction inactive"); + goto out; + } + + gf_msg(GFDB_STR_SQLITE3, GF_LOG_TRACE, 0, + LG_MSG_COMPACT_STATUS, + "Doing regular vacuum of type %i", GF_SQL_COMPACT_DEF); + + switch (GF_SQL_COMPACT_DEF) { + case GF_SQL_COMPACT_INCR: /* INCR auto_vacuum */ + ret = gf_asprintf(&sqlstring, + "PRAGMA incremental_vacuum;"); + if (ret <= 0) { + gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, + LG_MSG_PREPARE_FAILED, + "Failed allocating memory"); + goto out; + } + gf_msg(GFDB_STR_SQLITE3, GF_LOG_INFO, 0, + LG_MSG_COMPACT_STATUS, + "Will commence an incremental VACUUM"); + break; + /* (MANUAL) Invoke the VACUUM command */ + case GF_SQL_COMPACT_MANUAL: + ret = gf_asprintf(&sqlstring, "VACUUM;"); + if (ret <= 0) { + gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, + LG_MSG_PREPARE_FAILED, + "Failed allocating memory"); + goto out; + } + gf_msg(GFDB_STR_SQLITE3, GF_LOG_INFO, 0, + LG_MSG_COMPACT_STATUS, + "Will commence a VACUUM"); + break; + /* (FULL) The database does the compaction itself. */ + /* We cannot do anything else, so we can leave */ + /* without sending anything to the database */ + case GF_SQL_COMPACT_FULL: + ret = 0; + goto success; + /* Any other state must be an error. Note that OFF */ + /* cannot hit this statement since we immediately leave */ + /* in that case */ + default: + ret = -1; + gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, + LG_MSG_COMPACT_FAILED, + "VACUUM type undefined"); + goto out; + break; + } + } + + gf_msg(GFDB_STR_SQLITE3, GF_LOG_TRACE, 0, LG_MSG_COMPACT_STATUS, + "SQLString == %s", sqlstring); + + ret = sqlite3_exec(sql_conn->sqlite3_db_conn, sqlstring, NULL, NULL, + &sql_strerror); + + if (ret != SQLITE_OK) { + gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, + LG_MSG_GET_RECORD_FAILED, "Failed to vacuum " + "the db : %s", sqlite3_errmsg (db_conn)); + ret = -1; + goto out; + } +success: + gf_msg(GFDB_STR_SQLITE3, GF_LOG_INFO, 0, LG_MSG_COMPACT_STATUS, + compact_mode_switched ? "Successfully changed VACUUM on/off" + : "DB successfully VACUUM"); +out: + GF_FREE(sqlstring); + + return ret; +} diff --git a/libglusterfs/src/gfdb/gfdb_sqlite3.h b/libglusterfs/src/gfdb/gfdb_sqlite3.h index 9d0d996a322..4d70a60e431 100644 --- a/libglusterfs/src/gfdb/gfdb_sqlite3.h +++ b/libglusterfs/src/gfdb/gfdb_sqlite3.h @@ -73,8 +73,7 @@ do {\ #define GF_SQL_AV_NONE "none" #define GF_SQL_AV_FULL "full" -#define GF_SQL_AV_INCR "incr" - +#define GF_SQL_AV_INCR "incremental" #define GF_SQL_SYNC_OFF "off" #define GF_SQL_SYNC_NORMAL "normal" @@ -87,7 +86,12 @@ do {\ #define GF_SQL_JM_WAL "wal" #define GF_SQL_JM_OFF "off" +#define GF_SQL_COMPACT_NONE 0 +#define GF_SQL_COMPACT_FULL 1 +#define GF_SQL_COMPACT_INCR 2 +#define GF_SQL_COMPACT_MANUAL 3 +#define GF_SQL_COMPACT_DEF GF_SQL_COMPACT_INCR typedef enum gf_sql_auto_vacuum { gf_sql_av_none = 0, gf_sql_av_full, @@ -319,7 +323,18 @@ int gf_sqlite3_pragma (void *db_conn, char *pragma_key, char **pragma_value); int gf_sqlite3_set_pragma (void *db_conn, char *pragma_key, char *pragma_value); - +/* Function to vacuum of sqlite db + * Input: + * void *db_conn : Sqlite connection + * gf_boolean_t compact_active : Is compaction on? + * gf_boolean_t compact_mode_switched : Did we just flip the compaction swtich? + * Return: + * On success return 0 + * On failure return -1 + * */ +int +gf_sqlite3_vacuum (void *db_conn, gf_boolean_t compact_active, + gf_boolean_t compact_mode_switched); void gf_sqlite3_fill_db_operations (gfdb_db_operations_t *gfdb_db_ops); diff --git a/libglusterfs/src/libglusterfs-messages.h b/libglusterfs/src/libglusterfs-messages.h index d2ad44e470e..29196929eb3 100644 --- a/libglusterfs/src/libglusterfs-messages.h +++ b/libglusterfs/src/libglusterfs-messages.h @@ -36,7 +36,9 @@ */ #define GLFS_LG_BASE GLFS_MSGID_COMP_LIBGLUSTERFS -#define GLFS_LG_NUM_MESSAGES 207 + +#define GLFS_LG_NUM_MESSAGES 209 + #define GLFS_LG_MSGID_END (GLFS_LG_BASE + GLFS_LG_NUM_MESSAGES + 1) /* Messaged with message IDs */ #define glfs_msg_start_lg GLFS_LG_BASE, "Invalid: Start of messages" @@ -1762,6 +1764,7 @@ * @recommendedaction * */ + #define LG_MSG_INVALID_INODE_LIST (GLFS_LG_BASE + 207) /*! @@ -1770,6 +1773,24 @@ * @recommendedaction * */ + +#define LG_MSG_COMPACT_FAILED (GLFS_LG_BASE + 208) + +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ + +#define LG_MSG_COMPACT_STATUS (GLFS_LG_BASE + 209) + +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ /*------------*/ #define glfs_msg_end_lg GLFS_LG_MSGID_END, "Invalid: End of messages" |