diff options
Diffstat (limited to 'xlators/mgmt/glusterd/src')
35 files changed, 21718 insertions, 3913 deletions
diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am index df15453dc..933c44019 100644 --- a/xlators/mgmt/glusterd/src/Makefile.am +++ b/xlators/mgmt/glusterd/src/Makefile.am @@ -11,17 +11,20 @@ glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \ glusterd-volgen.c glusterd-rebalance.c glusterd-quota.c \ glusterd-geo-rep.c glusterd-replace-brick.c glusterd-log-ops.c \ glusterd-volume-ops.c glusterd-brick-ops.c glusterd-mountbroker.c \ - glusterd-syncop.c glusterd-hooks.c glusterd-volume-set.c + glusterd-syncop.c glusterd-hooks.c glusterd-volume-set.c \ + glusterd-locks.c glusterd-snapshot.c glusterd-mgmt-handler.c \ + glusterd-mgmt.c glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ $(top_builddir)/rpc/xdr/src/libgfxdr.la \ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \ - $(LIBXML2_LIBS) -lcrypto + $(XML_LIBS) -lcrypto noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \ glusterd-sm.h glusterd-store.h glusterd-mem-types.h \ glusterd-pmap.h glusterd-volgen.h glusterd-mountbroker.h \ - glusterd-syncop.h glusterd-hooks.h + glusterd-syncop.h glusterd-hooks.h glusterd-locks.h \ + glusterd-mgmt.h AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ -I$(rpclibdir) -I$(CONTRIBDIR)/rbtree \ @@ -29,7 +32,7 @@ AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ -I$(CONTRIBDIR)/uuid \ -DSBIN_DIR=\"$(sbindir)\" -DDATADIR=\"$(localstatedir)\" \ -DGSYNCD_PREFIX=\"$(libexecdir)/glusterfs\"\ - -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE) $(LIBXML2_CFLAGS) + -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE) $(XML_CPPFLAGS) AM_CFLAGS = -Wall $(GF_CFLAGS) diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index 2ac2699cf..596503c21 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -335,7 +335,7 @@ out: /* Handler functions */ int -glusterd_handle_add_brick (rpcsvc_request_t *req) +__glusterd_handle_add_brick (rpcsvc_request_t *req) { int32_t ret = -1; gf_cli_req cli_req = {{0,}}; @@ -423,6 +423,10 @@ glusterd_handle_add_brick (rpcsvc_request_t *req) stripe_count); } + if (!dict_get (dict, "force")) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get flag"); + goto out; + } ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { @@ -536,9 +540,80 @@ out: return ret; } +int +glusterd_handle_add_brick (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_add_brick); +} + +static int +subvol_matcher_init (int **subvols, int count) +{ + int ret = -1; + + *subvols = GF_CALLOC (count, sizeof(int), gf_gld_mt_int); + if (*subvols) + ret = 0; + + return ret; +} + +static void +subvol_matcher_update (int *subvols, glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo) +{ + glusterd_brickinfo_t *tmp = NULL; + int32_t sub_volume = 0; + int pos = 0; + + list_for_each_entry (tmp, &volinfo->bricks, brick_list) { + + if (strcmp (tmp->hostname, brickinfo->hostname) || + strcmp (tmp->path, brickinfo->path)) { + pos++; + continue; + } + gf_log (THIS->name, GF_LOG_DEBUG, LOGSTR_FOUND_BRICK, + brickinfo->hostname, brickinfo->path, + volinfo->volname); + sub_volume = (pos / volinfo->dist_leaf_count); + subvols[sub_volume]++; + break; + } + +} + +static int +subvol_matcher_verify (int *subvols, glusterd_volinfo_t *volinfo, char *err_str, + size_t err_len, char *vol_type) +{ + int i = 0; + int ret = 0; + + do { + + if (subvols[i] % volinfo->dist_leaf_count == 0) { + continue; + } else { + ret = -1; + snprintf (err_str, err_len, + "Bricks not from same subvol for %s", vol_type); + gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str); + break; + } + } while (++i < volinfo->subvol_count); + + return ret; +} + +static void +subvol_matcher_destroy (int *subvols) +{ + GF_FREE (subvols); +} int -glusterd_handle_remove_brick (rpcsvc_request_t *req) +__glusterd_handle_remove_brick (rpcsvc_request_t *req) { int32_t ret = -1; gf_cli_req cli_req = {{0,}}; @@ -550,10 +625,7 @@ glusterd_handle_remove_brick (rpcsvc_request_t *req) int i = 1; glusterd_volinfo_t *volinfo = NULL; glusterd_brickinfo_t *brickinfo = NULL; - int32_t pos = 0; - int32_t sub_volume = 0; - int32_t sub_volume_start = 0; - int32_t sub_volume_end = 0; + int *subvols = NULL; glusterd_brickinfo_t *tmp = NULL; char err_str[2048] = {0}; gf_cli_rsp rsp = {0,}; @@ -662,11 +734,11 @@ glusterd_handle_remove_brick (rpcsvc_request_t *req) strcpy (vol_type, "distribute"); } - /* Do not allow remove-brick if the volume is plain stripe */ + /* Do not allow remove-brick if the volume is a stripe volume*/ if ((volinfo->type == GF_CLUSTER_TYPE_STRIPE) && (volinfo->brick_count == volinfo->stripe_count)) { snprintf (err_str, sizeof (err_str), - "Removing brick from a plain stripe is not allowed"); + "Removing brick from a stripe volume is not allowed"); gf_log (this->name, GF_LOG_ERROR, "%s", err_str); ret = -1; goto out; @@ -718,6 +790,14 @@ glusterd_handle_remove_brick (rpcsvc_request_t *req) } strcpy (brick_list, " "); + + if ((volinfo->type != GF_CLUSTER_TYPE_NONE) && + (volinfo->subvol_count > 1)) { + ret = subvol_matcher_init (&subvols, volinfo->subvol_count); + if (ret) + goto out; + } + while ( i <= count) { snprintf (key, sizeof (key), "brick%d", i); ret = dict_get_str (dict, key, &brick); @@ -785,38 +865,18 @@ glusterd_handle_remove_brick (rpcsvc_request_t *req) goto out; } - pos = 0; - list_for_each_entry (tmp, &volinfo->bricks, brick_list) { - - if (strcmp (tmp->hostname,brickinfo->hostname) || - strcmp (tmp->path, brickinfo->path)) { - pos++; - continue; - } + /* Find which subvolume the brick belongs to */ + subvol_matcher_update (subvols, volinfo, brickinfo); + } - gf_log (this->name, GF_LOG_DEBUG, LOGSTR_FOUND_BRICK, - brickinfo->hostname, brickinfo->path, - volinfo->volname); - if (!sub_volume && (volinfo->dist_leaf_count > 1)) { - sub_volume = (pos / volinfo->dist_leaf_count) + 1; - sub_volume_start = (volinfo->dist_leaf_count * - (sub_volume - 1)); - sub_volume_end = (volinfo->dist_leaf_count * - sub_volume) - 1; - } else { - if (pos < sub_volume_start || - pos >sub_volume_end) { - ret = -1; - snprintf (err_str, sizeof (err_str), - "Bricks not from same subvol " - "for %s", vol_type); - gf_log (this->name, GF_LOG_ERROR, - "%s", err_str); - goto out; - } - } - break; - } + /* Check if the bricks belong to the same subvolumes.*/ + if ((volinfo->type != GF_CLUSTER_TYPE_NONE) && + (volinfo->subvol_count > 1)) { + ret = subvol_matcher_verify (subvols, volinfo, + err_str, sizeof(err_str), + vol_type); + if (ret) + goto out; } ret = glusterd_op_begin_synctask (req, GD_OP_REMOVE_BRICK, dict); @@ -839,11 +899,112 @@ out: } GF_FREE (brick_list); + subvol_matcher_destroy (subvols); free (cli_req.dict.dict_val); //its malloced by xdr return ret; } +int +glusterd_handle_remove_brick (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_remove_brick); +} + +static int +_glusterd_restart_gsync_session (dict_t *this, char *key, + data_t *value, void *data) +{ + char *slave = NULL; + char *slave_buf = NULL; + char *path_list = NULL; + char *slave_vol = NULL; + char *slave_ip = NULL; + char *conf_path = NULL; + char **errmsg = NULL; + int ret = -1; + glusterd_gsync_status_temp_t *param = NULL; + gf_boolean_t is_running = _gf_false; + + param = (glusterd_gsync_status_temp_t *)data; + + GF_ASSERT (param); + GF_ASSERT (param->volinfo); + + slave = strchr(value->data, ':'); + if (slave) { + slave++; + slave_buf = gf_strdup (slave); + if (!slave_buf) { + gf_log ("", GF_LOG_ERROR, + "Failed to gf_strdup"); + ret = -1; + goto out; + } + } + else + return 0; + + ret = dict_set_dynstr (param->rsp_dict, "slave", slave_buf); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to store slave"); + if (slave_buf) + GF_FREE(slave_buf); + goto out; + } + + ret = glusterd_get_slave_details_confpath (param->volinfo, + param->rsp_dict, + &slave_ip, &slave_vol, + &conf_path, errmsg); + if (ret) { + if (*errmsg) + gf_log ("", GF_LOG_ERROR, "%s", *errmsg); + else + gf_log ("", GF_LOG_ERROR, + "Unable to fetch slave or confpath details."); + goto out; + } + + /* In cases that gsyncd is not running, we will not invoke it + * because of add-brick. */ + ret = glusterd_check_gsync_running_local (param->volinfo->volname, + slave, conf_path, + &is_running); + if (ret) { + gf_log ("", GF_LOG_ERROR, "gsync running validation failed."); + goto out; + } + if (_gf_false == is_running) { + gf_log ("", GF_LOG_DEBUG, "gsync session for %s and %s is" + " not running on this node. Hence not restarting.", + param->volinfo->volname, slave); + ret = 0; + goto out; + } + + ret = glusterd_get_local_brickpaths (param->volinfo, &path_list); + if (!path_list) { + gf_log ("", GF_LOG_DEBUG, "This node not being part of" + " volume should not be running gsyncd. Hence" + " no gsyncd process to restart."); + ret = 0; + goto out; + } + + ret = glusterd_check_restart_gsync_session (param->volinfo, slave, + param->rsp_dict, path_list, + conf_path, 0); + if (ret) + gf_log ("", GF_LOG_ERROR, + "Unable to restart gsync session."); + +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d.", ret); + return ret; +} /* op-sm */ @@ -851,17 +1012,21 @@ int glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count, char *bricks, dict_t *dict) { - glusterd_brickinfo_t *brickinfo = NULL; - char *brick = NULL; - int32_t i = 1; - char *brick_list = NULL; - char *free_ptr1 = NULL; - char *free_ptr2 = NULL; - char *saveptr = NULL; - int32_t ret = -1; - int32_t stripe_count = 0; - int32_t replica_count = 0; - int32_t type = 0; + char *brick = NULL; + int32_t i = 1; + char *brick_list = NULL; + char *free_ptr1 = NULL; + char *free_ptr2 = NULL; + char *saveptr = NULL; + int32_t ret = -1; + int32_t stripe_count = 0; + int32_t replica_count = 0; + int32_t type = 0; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_gsync_status_temp_t param = {0, }; + gf_boolean_t restart_needed = 0; + char msg[1024] __attribute__((unused)) = {0, }; + int caps = 0; GF_ASSERT (volinfo); @@ -942,13 +1107,40 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count, if (count) brick = strtok_r (brick_list+1, " \n", &saveptr); +#ifdef HAVE_BD_XLATOR + if (brickinfo->vg[0]) + caps = CAPS_BD | CAPS_THIN | + CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT; +#endif while (i <= count) { - ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo, &brickinfo); if (ret) goto out; +#ifdef HAVE_BD_XLATOR + /* Check for VG/thin pool if its BD volume */ + if (brickinfo->vg[0]) { + ret = glusterd_is_valid_vg (brickinfo, 0, msg); + if (ret) { + gf_log (THIS->name, GF_LOG_CRITICAL, "%s", msg); + goto out; + } + /* if anyone of the brick does not have thin support, + disable it for entire volume */ + caps &= brickinfo->caps; + } else + caps = 0; +#endif + + if (uuid_is_null (brickinfo->uuid)) { + ret = glusterd_resolve_brick (brickinfo); + if (ret) { + gf_log ("", GF_LOG_ERROR, FMTSTR_RESOLVE_BRICK, + brickinfo->hostname, brickinfo->path); + goto out; + } + } ret = glusterd_brick_start (volinfo, brickinfo, _gf_true); @@ -956,8 +1148,27 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count, goto out; i++; brick = strtok_r (NULL, " \n", &saveptr); + + /* Check if the brick is added in this node, and set + * the restart_needed flag. */ + if ((!uuid_compare (brickinfo->uuid, MY_UUID)) && + !restart_needed) { + restart_needed = 1; + gf_log ("", GF_LOG_DEBUG, + "Restart gsyncd session, if it's already " + "running."); + } } + /* If the restart_needed flag is set, restart gsyncd sessions for that + * particular master with all the slaves. */ + if (restart_needed) { + param.rsp_dict = dict; + param.volinfo = volinfo; + dict_foreach (volinfo->gsync_slaves, + _glusterd_restart_gsync_session, ¶m); + } + volinfo->caps = caps; out: GF_FREE (free_ptr1); GF_FREE (free_ptr2); @@ -1034,6 +1245,7 @@ glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr) gf_boolean_t brick_alloc = _gf_false; char *all_bricks = NULL; char *str_ret = NULL; + gf_boolean_t is_force = _gf_false; priv = THIS->private; if (!priv) @@ -1053,15 +1265,6 @@ glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr) goto out; } - if (volinfo->backend == GD_VOL_BK_BD) { - snprintf (msg, sizeof (msg), "Add brick is not supported for " - "Block backend volume %s.", volname); - gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - ret = -1; - goto out; - } - ret = glusterd_validate_volume_id (dict, volinfo); if (ret) goto out; @@ -1096,6 +1299,8 @@ glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr) goto out; } + is_force = dict_get_str_boolean (dict, "force", _gf_false); + if (bricks) { brick_list = gf_strdup (bricks); all_bricks = gf_strdup (bricks); @@ -1137,10 +1342,21 @@ glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr) } if (!uuid_compare (brickinfo->uuid, MY_UUID)) { - ret = glusterd_brick_create_path (brickinfo->hostname, - brickinfo->path, +#ifdef HAVE_BD_XLATOR + if (brickinfo->vg[0]) { + ret = glusterd_is_valid_vg (brickinfo, 1, msg); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "%s", + msg); + *op_errstr = gf_strdup (msg); + goto out; + } + } +#endif + + ret = glusterd_validate_and_create_brickpath (brickinfo, volinfo->volume_id, - op_errstr); + op_errstr, is_force); if (ret) goto out; } @@ -1227,6 +1443,16 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) case GF_OP_CMD_START: { + if ((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) && + dict_get (dict, "replica-count")) { + snprintf (msg, sizeof(msg), "Migration of data is not " + "needed when reducing replica count. Use the" + " 'force' option"); + errstr = gf_strdup (msg); + gf_log (this->name, GF_LOG_ERROR, "%s", errstr); + goto out; + } + if (GLUSTERD_STATUS_STARTED != volinfo->status) { snprintf (msg, sizeof (msg), "Volume %s needs to be " "started before remove-brick (you can use " @@ -1236,6 +1462,17 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) gf_log (this->name, GF_LOG_ERROR, "%s", errstr); goto out; } + if (!gd_is_remove_brick_committed (volinfo)) { + snprintf (msg, sizeof (msg), "An earlier remove-brick " + "task exists for volume %s. Either commit it" + " or stop it before starting a new task.", + volinfo->volname); + errstr = gf_strdup (msg); + gf_log (this->name, GF_LOG_ERROR, "Earlier remove-brick" + " task exists for volume %s.", + volinfo->volname); + goto out; + } if (glusterd_is_defrag_on(volinfo)) { errstr = gf_strdup("Rebalance is in progress. Please " "retry after completion"); @@ -1243,7 +1480,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) goto out; } - if (is_origin_glusterd ()) { + if (is_origin_glusterd (dict)) { ret = glusterd_generate_and_set_task_id (dict, GF_REMOVE_BRICK_TID_KEY); if (ret) { @@ -1430,15 +1667,6 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr) goto out; } - /* Need to reset the defrag/rebalance status accordingly */ - switch (volinfo->rebal.defrag_status) { - case GF_DEFRAG_STATUS_FAILED: - case GF_DEFRAG_STATUS_COMPLETE: - volinfo->rebal.defrag_status = 0; - default: - break; - } - ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) goto out; @@ -1470,6 +1698,8 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) glusterd_brickinfo_t *tmp = NULL; char *task_id_str = NULL; xlator_t *this = NULL; + dict_t *bricks_dict = NULL; + char *brick_tmpstr = NULL; this = THIS; GF_ASSERT (this); @@ -1497,7 +1727,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) /* Set task-id, if available, in ctx dict for operations other than * start */ - if (is_origin_glusterd () && (cmd != GF_OP_CMD_START)) { + if (is_origin_glusterd (dict) && (cmd != GF_OP_CMD_START)) { if (!uuid_is_null (volinfo->rebal.rebalance_id)) { ret = glusterd_copy_uuid_to_dict (volinfo->rebal.rebalance_id, dict, @@ -1510,9 +1740,14 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) } } - /* Clear task-id on commmitting/stopping of remove-brick operation */ - if ((cmd != GF_OP_CMD_START) || (cmd != GF_OP_CMD_STATUS)) + /* Clear task-id, rebal.op and stored bricks on commmitting/stopping + * remove-brick */ + if ((cmd != GF_OP_CMD_START) || (cmd != GF_OP_CMD_STATUS)) { uuid_clear (volinfo->rebal.rebalance_id); + volinfo->rebal.op = GD_OP_NONE; + dict_unref (volinfo->rebal.dict); + volinfo->rebal.dict = NULL; + } ret = -1; switch (cmd) { @@ -1559,6 +1794,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) ret = 0; } else { uuid_parse (task_id_str, volinfo->rebal.rebalance_id) ; + volinfo->rebal.op = GD_OP_REMOVE_BRICK; } force = 0; break; @@ -1595,7 +1831,20 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) goto out; } - + /* Save the list of bricks for later usage. Right now this is required + * for displaying the task parameters with task status in volume status. + */ + bricks_dict = dict_new (); + if (!bricks_dict) { + ret = -1; + goto out; + } + ret = dict_set_int32 (bricks_dict, "count", count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to save remove-brick count"); + goto out; + } while ( i <= count) { snprintf (key, 256, "brick%d", i); ret = dict_get_str (dict, key, &brick); @@ -1605,6 +1854,21 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) goto out; } + brick_tmpstr = gf_strdup (brick); + if (!brick_tmpstr) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Failed to duplicate brick name"); + goto out; + } + ret = dict_set_dynstr (bricks_dict, key, brick_tmpstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add brick to dict"); + goto out; + } + brick_tmpstr = NULL; + ret = glusterd_op_perform_remove_brick (volinfo, brick, force, &need_rebalance); if (ret) @@ -1618,6 +1882,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) volinfo->replica_count, replica_count, volinfo->volname); volinfo->replica_count = replica_count; + volinfo->sub_count = replica_count; volinfo->dist_leaf_count = glusterd_get_dist_leaf_count (volinfo); volinfo->subvol_count = (volinfo->brick_count / volinfo->dist_leaf_count); @@ -1634,6 +1899,8 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) } } } + volinfo->rebal.dict = bricks_dict; + bricks_dict = NULL; ret = glusterd_create_volfiles_and_notify_services (volinfo); if (ret) { @@ -1678,5 +1945,9 @@ out: if (ret && err_str[0] && op_errstr) *op_errstr = gf_strdup (err_str); + GF_FREE (brick_tmpstr); + if (bricks_dict) + dict_unref (bricks_dict); + return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c index 758ab1a14..5786694bd 100644 --- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c @@ -25,18 +25,199 @@ #include <signal.h> +static int +dict_get_param (dict_t *dict, char *key, char **param); + +static int +glusterd_get_statefile_name (glusterd_volinfo_t *volinfo, char *slave, + char *conf_path, char **statefile); + +static int +glusterd_get_slave_info (char *slave, char **slave_ip, + char **slave_vol, char **op_errstr); + +static int +glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen); + +struct gsync_config_opt_vals_ gsync_confopt_vals[] = { + {.op_name = "change_detector", + .no_of_pos_vals = 2, + .case_sensitive = _gf_true, + .values = {"xsync", "changelog"}, + }, + {.op_name = "special_sync_mode", + .no_of_pos_vals = 2, + .case_sensitive = _gf_true, + .values = {"partial", "recover"} + }, + {.op_name = "log-level", + .no_of_pos_vals = 5, + .case_sensitive = _gf_false, + .values = {"critical", "error", "warning", "info", "debug"} + }, + {.op_name = NULL, + }, +}; + static char *gsync_reserved_opts[] = { "gluster-command-dir", "pid-file", + "remote-gsyncd" "state-file", "session-owner", "state-socket-unencoded", "socketdir", + "ignore-deletes", + "local-id", + "local-path", + "slave-id", NULL }; int -glusterd_handle_gsync_set (rpcsvc_request_t *req) +__glusterd_handle_sys_exec (rpcsvc_request_t *req) +{ + int32_t ret = 0; + dict_t *dict = NULL; + gf_cli_req cli_req = {{0},}; + glusterd_op_t cli_op = GD_OP_SYS_EXEC; + glusterd_conf_t *priv = NULL; + char *host_uuid = NULL; + char err_str[2048] = {0,}; + xlator_t *this = NULL; + + GF_ASSERT (req); + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + ret = xdr_to_generic (req->msg[0], &cli_req, + (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (cli_req.dict.dict_len) { + dict = dict_new (); + if (!dict) + goto out; + + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "unserialize req-buffer to dictionary"); + snprintf (err_str, sizeof (err_str), "Unable to decode " + "the command"); + goto out; + } else { + dict->extra_stdfree = cli_req.dict.dict_val; + } + + host_uuid = gf_strdup (uuid_utoa(MY_UUID)); + if (host_uuid == NULL) { + snprintf (err_str, sizeof (err_str), "Failed to get " + "the uuid of local glusterd"); + ret = -1; + goto out; + } + + ret = dict_set_dynstr (dict, "host-uuid", host_uuid); + if (ret) + goto out; + } + + ret = glusterd_op_begin_synctask (req, cli_op, dict); + +out: + if (ret) { + if (err_str[0] == '\0') + snprintf (err_str, sizeof (err_str), + "Operation failed"); + ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, + dict, err_str); + } + return ret; +} + +int +__glusterd_handle_copy_file (rpcsvc_request_t *req) +{ + int32_t ret = 0; + dict_t *dict = NULL; + gf_cli_req cli_req = {{0},}; + glusterd_op_t cli_op = GD_OP_COPY_FILE; + glusterd_conf_t *priv = NULL; + char *host_uuid = NULL; + char err_str[2048] = {0,}; + xlator_t *this = NULL; + + GF_ASSERT (req); + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + ret = xdr_to_generic (req->msg[0], &cli_req, + (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (cli_req.dict.dict_len) { + dict = dict_new (); + if (!dict) + goto out; + + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "unserialize req-buffer to dictionary"); + snprintf (err_str, sizeof (err_str), "Unable to decode " + "the command"); + goto out; + } else { + dict->extra_stdfree = cli_req.dict.dict_val; + } + + host_uuid = gf_strdup (uuid_utoa(MY_UUID)); + if (host_uuid == NULL) { + snprintf (err_str, sizeof (err_str), "Failed to get " + "the uuid of local glusterd"); + ret = -1; + goto out; + } + + ret = dict_set_dynstr (dict, "host-uuid", host_uuid); + if (ret) + goto out; + } + + ret = glusterd_op_begin_synctask (req, cli_op, dict); + +out: + if (ret) { + if (err_str[0] == '\0') + snprintf (err_str, sizeof (err_str), + "Operation failed"); + ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, + dict, err_str); + } + return ret; +} + +int +__glusterd_handle_gsync_set (rpcsvc_request_t *req) { int32_t ret = 0; dict_t *dict = NULL; @@ -99,13 +280,13 @@ glusterd_handle_gsync_set (rpcsvc_request_t *req) ret = dict_get_str (dict, "master", &master); if (ret < 0) { gf_log (this->name, GF_LOG_INFO, "master not found, while " - "handling"GEOREP" options"); + "handling "GEOREP" options"); master = "(No Master)"; } ret = dict_get_str (dict, "slave", &slave); if (ret < 0) { - gf_log (this->name, GF_LOG_INFO, "slave not not found, while" + gf_log (this->name, GF_LOG_INFO, "slave not found, while " "handling "GEOREP" options"); slave = "(No Slave)"; } @@ -119,6 +300,10 @@ glusterd_handle_gsync_set (rpcsvc_request_t *req) } switch (type) { + case GF_GSYNC_OPTION_TYPE_CREATE: + strncpy (operation, "create", sizeof (operation)); + cli_op = GD_OP_GSYNC_CREATE; + break; case GF_GSYNC_OPTION_TYPE_START: strncpy (operation, "start", sizeof (operation)); @@ -135,12 +320,9 @@ glusterd_handle_gsync_set (rpcsvc_request_t *req) case GF_GSYNC_OPTION_TYPE_STATUS: strncpy (operation, "status", sizeof (operation)); break; - case GF_GSYNC_OPTION_TYPE_ROTATE: - strncpy (operation, "rotate", sizeof(operation)); - break; } - ret = glusterd_op_begin_synctask (req, GD_OP_GSYNC_SET, dict); + ret = glusterd_op_begin_synctask (req, cli_op, dict); out: if (ret) { @@ -153,6 +335,23 @@ out: return ret; } +int +glusterd_handle_sys_exec (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_sys_exec); +} + +int +glusterd_handle_copy_file (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_copy_file); +} + +int +glusterd_handle_gsync_set (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_gsync_set); +} /***** * @@ -437,7 +636,7 @@ _fcbk_conftodict (char *resbuf, size_t blen, FILE *fp, void *data) } static int -glusterd_gsync_get_config (char *master, char *slave, char *gl_workdir, dict_t *dict) +glusterd_gsync_get_config (char *master, char *slave, char *conf_path, dict_t *dict) { /* key + value, where value must be able to accommodate a path */ char resbuf[256 + PATH_MAX] = {0,}; @@ -445,7 +644,7 @@ glusterd_gsync_get_config (char *master, char *slave, char *gl_workdir, dict_t * runinit (&runner); runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL); - runner_argprintf (&runner, "%s/"GSYNC_CONF, gl_workdir); + runner_argprintf (&runner, "%s", conf_path); runner_argprintf (&runner, ":%s", master); runner_add_args (&runner, slave, "--config-get-all", NULL); @@ -455,13 +654,13 @@ glusterd_gsync_get_config (char *master, char *slave, char *gl_workdir, dict_t * static int glusterd_gsync_get_param_file (char *prmfile, const char *param, char *master, - char *slave, char *gl_workdir) + char *slave, char *conf_path) { runner_t runner = {0,}; runinit (&runner); runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL); - runner_argprintf (&runner, "%s/"GSYNC_CONF, gl_workdir); + runner_argprintf (&runner, "%s", conf_path); runner_argprintf (&runner, ":%s", master); runner_add_args (&runner, slave, "--config-get", NULL); runner_argprintf (&runner, "%s-file", param); @@ -470,83 +669,14 @@ glusterd_gsync_get_param_file (char *prmfile, const char *param, char *master, } static int -glusterd_gsync_get_session_owner (char *master, char *slave, char *session_owner, - char *gl_workdir) -{ - runner_t runner = {0,}; - - runinit(&runner); - runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL); - runner_argprintf (&runner, "%s/"GSYNC_CONF, gl_workdir); - runner_argprintf (&runner, ":%s", master); - runner_add_args (&runner, slave, "--config-get", "session-owner", - NULL); - - return glusterd_query_extutil (session_owner, &runner); -} - -/* check whether @slave is local or remote. normalized - * urls starting with ssh are considered to be remote - * @returns - * 1 if slave is remote - * 0 is slave is local - */ -static int -glusterd_gsync_slave_is_remote (char *slave) -{ - int ret = 0; - char *ssh_pos = NULL; - - ssh_pos = strstr(slave, "ssh://"); - if ( ssh_pos && ((ssh_pos - slave) == 0) ) - ret = 1; - - return ret; -} - -static int -glusterd_gsync_get_slave_log_file (char *master, char *slave, char *log_file) -{ - int ret = -1; - runner_t runner = {0,}; - char uuid_str[64] = {0,}; - glusterd_conf_t *priv = NULL; - char *gl_workdir = NULL; - - GF_ASSERT(THIS); - GF_ASSERT(THIS->private); - - priv = THIS->private; - - GF_VALIDATE_OR_GOTO("gsyncd", master, out); - GF_VALIDATE_OR_GOTO("gsyncd", slave, out); - - gl_workdir = priv->workdir; - - /* get the session owner for the master-slave session */ - ret = glusterd_gsync_get_session_owner (master, slave, uuid_str, - gl_workdir); - if (ret) - goto out; - - /* get the log file for the slave */ - runinit(&runner); - runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL); - runner_argprintf (&runner, "%s/"GSYNC_CONF, gl_workdir); - runner_argprintf (&runner, "--session-owner=%s", uuid_str); - runner_add_args (&runner, slave, "--config-get", "log-file", NULL); - - ret = glusterd_query_extutil (log_file, &runner); - - out: - return ret; -} - -static int -gsyncd_getpidfile (char *master, char *slave, char *pidfile) +gsyncd_getpidfile (char *master, char *slave, char *pidfile, char *conf_path) { int ret = -1; glusterd_conf_t *priv = NULL; + char *confpath = NULL; + char conf_buf[PATH_MAX] = ""; + struct stat stbuf = {0,}; + GF_ASSERT (THIS); GF_ASSERT (THIS->private); @@ -556,8 +686,22 @@ gsyncd_getpidfile (char *master, char *slave, char *pidfile) GF_VALIDATE_OR_GOTO ("gsync", master, out); GF_VALIDATE_OR_GOTO ("gsync", slave, out); + ret = lstat (conf_path, &stbuf); + if (!ret) { + gf_log ("", GF_LOG_DEBUG, "Using passed config template(%s).", + conf_path); + confpath = conf_path; + } else { + ret = snprintf (conf_buf, sizeof(conf_buf) - 1, + "%s/"GSYNC_CONF_TEMPLATE, priv->workdir); + conf_buf[ret] = '\0'; + confpath = conf_buf; + gf_log ("", GF_LOG_DEBUG, "Using default config template(%s).", + confpath); + } + ret = glusterd_gsync_get_param_file (pidfile, "pid", master, - slave, priv->workdir); + slave, confpath); if (ret == -1) { ret = -2; gf_log ("", GF_LOG_WARNING, "failed to create the pidfile string"); @@ -571,32 +715,6 @@ gsyncd_getpidfile (char *master, char *slave, char *pidfile) } static int -glusterd_gsyncd_getlogfile (char *master, char *slave, char *log_file) -{ - int ret = -1; - glusterd_conf_t *priv = NULL; - - GF_ASSERT (THIS); - GF_ASSERT (THIS->private); - - priv = THIS->private; - - GF_VALIDATE_OR_GOTO ("gsync", master, out); - GF_VALIDATE_OR_GOTO ("gsync", slave, out); - - ret = glusterd_gsync_get_param_file (log_file, "log", master, - slave, priv->workdir); - if (ret == -1) { - ret = -2; - gf_log ("", GF_LOG_WARNING, "failed to gsyncd logfile"); - goto out; - } - - out: - return ret; -} - -static int gsync_status_byfd (int fd) { GF_ASSERT (fd >= -1); @@ -613,12 +731,12 @@ gsync_status_byfd (int fd) * return -1 when not running */ int -gsync_status (char *master, char *slave, int *status) +gsync_status (char *master, char *slave, char *conf_path, int *status) { char pidfile[PATH_MAX] = {0,}; int fd = -1; - fd = gsyncd_getpidfile (master, slave, pidfile); + fd = gsyncd_getpidfile (master, slave, pidfile, conf_path); if (fd == -2) return -1; @@ -655,16 +773,48 @@ out: } static int -gsync_verify_config_options (dict_t *dict, char **op_errstr) +glusterd_verify_gsyncd_spawn (char *master, char *slave) { - char **resopt = NULL; - int i = 0; - char *subop = NULL; - char *slave = NULL; - char *op_name = NULL; - char *op_value = NULL; - char *t = NULL; - gf_boolean_t banned = _gf_true; + int ret = 0; + runner_t runner = {0,}; + + runinit (&runner); + runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", + "--verify", "spawning", NULL); + runner_argprintf (&runner, ":%s", master); + runner_add_args (&runner, slave, NULL); + runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); + ret = runner_start (&runner); + if (ret) { + gf_log ("", GF_LOG_ERROR, "spawning child failed"); + ret = -1; + goto out; + } + + if (runner_end (&runner) != 0) + ret = -1; + +out: + gf_log ("", GF_LOG_DEBUG, "returning %d", ret); + return ret; +} + +static int +gsync_verify_config_options (dict_t *dict, char **op_errstr, char *volname) +{ + char **resopt = NULL; + int i = 0; + int ret = -1; + char *subop = NULL; + char *slave = NULL; + char *op_name = NULL; + char *op_value = NULL; + char *t = NULL; + char errmsg[PATH_MAX] = ""; + gf_boolean_t banned = _gf_true; + gf_boolean_t op_match = _gf_true; + gf_boolean_t val_match = _gf_true; + struct gsync_config_opt_vals_ *conf_vals = NULL; if (dict_get_str (dict, "subop", &subop) != 0) { gf_log ("", GF_LOG_WARNING, "missing subop"); @@ -688,6 +838,12 @@ gsync_verify_config_options (dict_t *dict, char **op_errstr) } if (runcmd (GSYNCD_PREFIX"/gsyncd", "--config-check", op_name, NULL)) { + ret = glusterd_verify_gsyncd_spawn (volname, slave); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to spawn gsyncd"); + return 0; + } + gf_log ("", GF_LOG_WARNING, "Invalid option %s", op_name); *op_errstr = gf_strdup ("Invalid option"); @@ -732,35 +888,109 @@ gsync_verify_config_options (dict_t *dict, char **op_errstr) } } + /* Check options in gsync_confopt_vals for invalid values */ + for (conf_vals = gsync_confopt_vals; conf_vals->op_name; conf_vals++) { + op_match = _gf_true; + for (i = 0; conf_vals->op_name[i] && op_name[i]; i++) { + if (conf_vals->op_name[i] == op_name[i] || + (conf_vals->op_name[i] == '_' && op_name[i] == '-')) + continue; + op_match = _gf_false; + } + + if (op_match) { + val_match = _gf_false; + for (i = 0; i < conf_vals->no_of_pos_vals; i++) { + if(conf_vals->case_sensitive){ + if (!strcmp (conf_vals->values[i], op_value)) + val_match = _gf_true; + } else { + if (!strcasecmp (conf_vals->values[i], op_value)) + val_match = _gf_true; + } + } + + if (!val_match) { + ret = snprintf (errmsg, sizeof(errmsg) - 1, + "Invalid values (%s) for" + " option %s", op_value, + op_name); + errmsg[ret] = '\0'; + + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + *op_errstr = gf_strdup (errmsg); + return -1; + } + } + } + return 0; } static int glusterd_get_gsync_status_mst_slv (glusterd_volinfo_t *volinfo, - char *slave, dict_t *rsp_dict, char *node); + char *slave, char *conf_path, + dict_t *rsp_dict, char *node); static int _get_status_mst_slv (dict_t *this, char *key, data_t *value, void *data) { glusterd_gsync_status_temp_t *param = NULL; char *slave = NULL; - int ret = 0; + char *slave_buf = NULL; + char *slave_ip = NULL; + char *slave_vol = NULL; + char *errmsg = NULL; + char conf_path[PATH_MAX] = ""; + int ret = -1; + glusterd_conf_t *priv = NULL; param = (glusterd_gsync_status_temp_t *)data; GF_ASSERT (param); GF_ASSERT (param->volinfo); + if (THIS) + priv = THIS->private; + if (priv == NULL) { + gf_log ("", GF_LOG_ERROR, "priv of glusterd not present"); + goto out; + } + slave = strchr(value->data, ':'); - if (slave) - slave ++; - else + if (!slave) return 0; + slave++; + + ret = glusterd_get_slave_info (slave, &slave_ip, &slave_vol, &errmsg); + if (ret) { + if (errmsg) + gf_log ("", GF_LOG_ERROR, "Unable to fetch " + "slave details. Error: %s", errmsg); + else + gf_log ("", GF_LOG_ERROR, + "Unable to fetch slave details."); + ret = -1; + goto out; + } + + ret = snprintf (conf_path, sizeof(conf_path) - 1, + "%s/"GEOREP"/%s_%s_%s/gsyncd.conf", + priv->workdir, param->volinfo->volname, + slave_ip, slave_vol); + conf_path[ret] = '\0'; ret = glusterd_get_gsync_status_mst_slv(param->volinfo, - slave, param->rsp_dict, + slave, conf_path, + param->rsp_dict, param->node); - return 0; +out: + + if (slave_buf) + GF_FREE(slave_buf); + + gf_log ("", GF_LOG_DEBUG, "Returning %d.", ret); + return ret; } @@ -781,19 +1011,22 @@ static int glusterd_remove_slave_in_info (glusterd_volinfo_t *volinfo, char *slave, char **op_errstr) { + int zero_slave_entries = _gf_true; int ret = 0; char *slavekey = NULL; GF_ASSERT (volinfo); GF_ASSERT (slave); - ret = glusterd_get_slave (volinfo, slave, &slavekey); - if (ret < 0) { - ret++; - goto out; - } - - dict_del (volinfo->gsync_slaves, slavekey); + do { + ret = glusterd_get_slave (volinfo, slave, &slavekey); + if (ret < 0 && zero_slave_entries) { + ret++; + goto out; + } + zero_slave_entries = _gf_false; + dict_del (volinfo->gsync_slaves, slavekey); + } while (ret >= 0); ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); @@ -846,8 +1079,9 @@ glusterd_gsync_get_uuid (char *slave, glusterd_volinfo_t *vol, return ret; } -static int +int glusterd_check_gsync_running_local (char *master, char *slave, + char *conf_path, gf_boolean_t *is_run) { int ret = -1; @@ -858,7 +1092,7 @@ glusterd_check_gsync_running_local (char *master, char *slave, GF_ASSERT (is_run); *is_run = _gf_false; - ret = gsync_status (master, slave, &ret_status); + ret = gsync_status (master, slave, conf_path, &ret_status); if (ret == 0 && ret_status == 0) { *is_run = _gf_true; } else if (ret == -1) { @@ -875,7 +1109,8 @@ glusterd_check_gsync_running_local (char *master, char *slave, static int glusterd_store_slave_in_info (glusterd_volinfo_t *volinfo, char *slave, - char *host_uuid, char **op_errstr) + char *host_uuid, char **op_errstr, + gf_boolean_t is_force) { int ret = 0; int maxslv = 0; @@ -898,7 +1133,8 @@ glusterd_store_slave_in_info (glusterd_volinfo_t *volinfo, char *slave, case -1: break; default: - GF_ASSERT (ret > 0); + if (!is_force) + GF_ASSERT (ret > 0); ret = dict_get_str (volinfo->gsync_slaves, slavekey, &slaveentry); GF_ASSERT (ret == 0); @@ -907,13 +1143,23 @@ glusterd_store_slave_in_info (glusterd_volinfo_t *volinfo, char *slave, * assert an uuid mismatch */ t = strtail (slaveentry, host_uuid); - GF_ASSERT (!t || *t != ':'); + if (!is_force) + GF_ASSERT (!t || *t != ':'); + + if (is_force) { + gf_log ("", GF_LOG_DEBUG, GEOREP" has already been " + "invoked for the %s (master) and %s (slave)." + " Allowing without saving info again due to" + " force command.", volinfo->volname, slave); + ret = 0; + goto out; + } gf_log ("", GF_LOG_ERROR, GEOREP" has already been invoked for " "the %s (master) and %s (slave) " "from a different machine", volinfo->volname, slave); - *op_errstr = gf_strdup (GEOREP" already running in an an" + *op_errstr = gf_strdup (GEOREP" already running in " "another machine"); ret = -1; goto out; @@ -946,23 +1192,26 @@ glusterd_store_slave_in_info (glusterd_volinfo_t *volinfo, char *slave, return ret; } - static int glusterd_op_verify_gsync_start_options (glusterd_volinfo_t *volinfo, - char *slave, char **op_errstr) + char *slave, char *conf_path, + char *statefile, char **op_errstr, + gf_boolean_t is_force) { int ret = -1; gf_boolean_t is_running = _gf_false; char msg[2048] = {0}; uuid_t uuid = {0}; - glusterd_conf_t *priv = NULL; - xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + struct stat stbuf = {0,}; this = THIS; GF_ASSERT (volinfo); GF_ASSERT (slave); GF_ASSERT (op_errstr); + GF_ASSERT (conf_path); GF_ASSERT (this && this->private); priv = this->private; @@ -972,26 +1221,56 @@ glusterd_op_verify_gsync_start_options (glusterd_volinfo_t *volinfo, "before "GEOREP" start", volinfo->volname); goto out; } + + ret = lstat (statefile, &stbuf); + if (ret) { + snprintf (msg, sizeof (msg), "Session between %s and %s has" + " not been created. Please create session and retry.", + volinfo->volname, slave); + gf_log ("", GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + goto out; + } + + /* Check if the gsync slave info is stored. If not + * session has not been created */ + ret = glusterd_gsync_get_uuid (slave, volinfo, uuid); + if (ret) { + snprintf (msg, sizeof (msg), "Session between %s and %s has" + " not been created. Please create session and retry.", + volinfo->volname, slave); + gf_log ("", GF_LOG_ERROR, "%s", msg); + goto out; + } + + if (is_force) { + ret = 0; + goto out; + } + /*Check if the gsync is already started in cmd. inited host * If so initiate add it into the glusterd's priv*/ - ret = glusterd_gsync_get_uuid (slave, volinfo, uuid); - if ((ret == 0) && (uuid_compare (MY_UUID, uuid) == 0)) { - ret = glusterd_check_gsync_running_local (volinfo->volname, - slave, &is_running); - if (ret) { - snprintf (msg, sizeof (msg), GEOREP" start option " - "validation failed "); - goto out; - } - if (_gf_true == is_running) { - snprintf (msg, sizeof (msg), GEOREP " session between" - " %s & %s already started", volinfo->volname, - slave); - ret = -1; - goto out; - } + ret = glusterd_check_gsync_running_local (volinfo->volname, + slave, conf_path, + &is_running); + if (ret) { + snprintf (msg, sizeof (msg), GEOREP" start option " + "validation failed "); + goto out; + } + if (_gf_true == is_running) { + snprintf (msg, sizeof (msg), GEOREP " session between" + " %s & %s already started", volinfo->volname, + slave); + ret = -1; + goto out; + } + + ret = glusterd_verify_gsyncd_spawn (volinfo->volname, slave); + if (ret) { + snprintf (msg, sizeof (msg), "Unable to spawn gsyncd"); + gf_log ("", GF_LOG_ERROR, "%s", msg); } - ret = 0; out: if (ret && (msg[0] != '\0')) { *op_errstr = gf_strdup (msg); @@ -1017,11 +1296,13 @@ glusterd_check_gsync_running (glusterd_volinfo_t *volinfo, gf_boolean_t *flag) static int glusterd_op_verify_gsync_running (glusterd_volinfo_t *volinfo, - char *slave, char **op_errstr) + char *slave, char *conf_path, + char **op_errstr) { - int ret = -1; - char msg[2048] = {0}; - uuid_t uuid = {0}; + int pfd = -1; + int ret = -1; + char msg[2048] = {0}; + char pidfile[PATH_MAX] = {0,}; GF_ASSERT (THIS && THIS->private); GF_ASSERT (volinfo); @@ -1034,13 +1315,26 @@ glusterd_op_verify_gsync_running (glusterd_volinfo_t *volinfo, goto out; } - ret = glusterd_gsync_get_uuid (slave, volinfo, uuid); - if (ret == -1) { - snprintf (msg, sizeof (msg), GEOREP" session between %s & %s" - " not active", volinfo->volname, slave); + + pfd = gsyncd_getpidfile (volinfo->volname, slave, pidfile, conf_path); + if (pfd == -2) { + gf_log ("", GF_LOG_ERROR, GEOREP" stop validation " + "failed for %s & %s", volinfo->volname, slave); + ret = -1; + goto out; + } + if (gsync_status_byfd (pfd) == -1) { + snprintf (msg, sizeof (msg), GEOREP" session b/w %s & %s is not" + " running on this node.", volinfo->volname, slave); + gf_log ("", GF_LOG_ERROR, "%s", msg); + ret = -1; + /* monitor gsyncd already dead */ goto out; } + if (pfd < 0) + goto out; + ret = 0; out: if (ret && (msg[0] != '\0')) { @@ -1059,6 +1353,18 @@ glusterd_verify_gsync_status_opts (dict_t *dict, char **op_errstr) gf_boolean_t exists = _gf_false; glusterd_volinfo_t *volinfo = NULL; int ret = 0; + char *conf_path = NULL; + char *slave_ip = NULL; + char *slave_vol = NULL; + glusterd_conf_t *priv = NULL; + + if (THIS) + priv = THIS->private; + if (priv == NULL) { + gf_log ("", GF_LOG_ERROR, "priv of glusterd not present"); + *op_errstr = gf_strdup ("glusterd defunct"); + goto out; + } ret = dict_get_str (dict, "master", &volname); if (ret < 0) { @@ -1083,16 +1389,25 @@ glusterd_verify_gsync_status_opts (dict_t *dict, char **op_errstr) goto out; } - out: + ret = glusterd_get_slave_details_confpath (volinfo, dict, &slave_ip, + &slave_vol, &conf_path, + op_errstr); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to fetch slave or confpath details."); + ret = -1; + goto out; + } + +out: gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; - } int glusterd_op_gsync_args_get (dict_t *dict, char **op_errstr, - char **master, char **slave) + char **master, char **slave, char **host_uuid) { int ret = -1; @@ -1117,6 +1432,14 @@ glusterd_op_gsync_args_get (dict_t *dict, char **op_errstr, } } + if (host_uuid) { + ret = dict_get_str (dict, "host-uuid", host_uuid); + if (ret < 0) { + gf_log ("", GF_LOG_WARNING, "host_uuid not found"); + *op_errstr = gf_strdup ("host_uuid not found"); + goto out; + } + } ret = 0; out: @@ -1125,17 +1448,647 @@ out: } int +glusterd_op_stage_sys_exec (dict_t *dict, char **op_errstr) +{ + char errmsg[PATH_MAX] = ""; + char *command = NULL; + char command_path[PATH_MAX] = ""; + struct stat st = {0,}; + int ret = -1; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); + + if (conf->op_version < 2) { + gf_log ("", GF_LOG_ERROR, "Op Version not supported."); + snprintf (errmsg, sizeof(errmsg), "One or more nodes do not" + " support the required op version."); + *op_errstr = gf_strdup (errmsg); + ret = -1; + goto out; + } + + ret = dict_get_str (dict, "command", &command); + if (ret) { + strcpy (errmsg, "internal error"); + gf_log ("", GF_LOG_ERROR, + "Unable to get command from dict"); + goto out; + } + + /* enforce local occurrence of the command */ + if (strchr (command, '/')) { + strcpy (errmsg, "invalid command name"); + ret = -1; + goto out; + } + + sprintf (command_path, GSYNCD_PREFIX"/peer_%s", command); + /* check if it's executable */ + ret = access (command_path, X_OK); + if (!ret) + /* check if it's a regular file */ + ret = stat (command_path, &st); + if (!ret && !S_ISREG (st.st_mode)) + ret = -1; + +out: + if (ret) { + if (errmsg[0] == '\0') + snprintf (errmsg, sizeof (errmsg), "%s not found.", + command); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + } + + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_op_stage_copy_file (dict_t *dict, char **op_errstr) +{ + char abs_filename[PATH_MAX] = ""; + char errmsg[PATH_MAX] = ""; + char *filename = NULL; + char *host_uuid = NULL; + char uuid_str [64] = {0}; + int ret = -1; + glusterd_conf_t *priv = NULL; + struct stat stbuf = {0,}; + + if (THIS) + priv = THIS->private; + if (priv == NULL) { + gf_log ("", GF_LOG_ERROR, "priv of glusterd not present"); + *op_errstr = gf_strdup ("glusterd defunct"); + goto out; + } + + if (priv->op_version < 2) { + gf_log ("", GF_LOG_ERROR, "Op Version not supported."); + snprintf (errmsg, sizeof(errmsg), "One or more nodes do not" + " support the required op version."); + *op_errstr = gf_strdup (errmsg); + ret = -1; + goto out; + } + + ret = dict_get_str (dict, "host-uuid", &host_uuid); + if (ret < 0) { + gf_log ("", GF_LOG_ERROR, "Unable to fetch" + " host-uuid from dict."); + goto out; + } + + uuid_utoa_r (MY_UUID, uuid_str); + if (!strcmp (uuid_str, host_uuid)) { + ret = dict_get_str (dict, "source", &filename); + if (ret < 0) { + gf_log ("", GF_LOG_ERROR, "Unable to fetch" + " filename from dict."); + *op_errstr = gf_strdup ("command unsuccessful"); + goto out; + } + snprintf (abs_filename, sizeof(abs_filename), + "%s/%s", priv->workdir, filename); + + ret = lstat (abs_filename, &stbuf); + if (ret) { + snprintf (errmsg, sizeof (errmsg), "Source file" + " does not exist in %s", priv->workdir); + *op_errstr = gf_strdup (errmsg); + goto out; + } + + if (!S_ISREG(stbuf.st_mode)) { + snprintf (errmsg, sizeof (errmsg), "Source file" + " is not a regular file."); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + goto out; + } + } + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +static int +glusterd_get_statefile_name (glusterd_volinfo_t *volinfo, char *slave, + char *conf_path, char **statefile) +{ + glusterd_conf_t *priv = NULL; + int ret = -1; + char *master = NULL; + char *buf = NULL; + dict_t *confd = NULL; + char *confpath = NULL; + char conf_buf[PATH_MAX] = ""; + struct stat stbuf = {0,}; + + GF_ASSERT (THIS); + GF_ASSERT (THIS->private); + GF_ASSERT (volinfo); + + master = volinfo->volname; + + confd = dict_new (); + if (!confd) { + gf_log ("", GF_LOG_ERROR, "Unable to create new dict"); + goto out; + } + + priv = THIS->private; + + ret = lstat (conf_path, &stbuf); + if (!ret) { + gf_log ("", GF_LOG_INFO, "Using passed config template(%s).", + conf_path); + confpath = conf_path; + } else { + ret = snprintf (conf_buf, sizeof(conf_buf) - 1, + "%s/"GSYNC_CONF_TEMPLATE, priv->workdir); + conf_buf[ret] = '\0'; + confpath = conf_buf; + gf_log ("", GF_LOG_INFO, "Using default config template(%s).", + confpath); + } + + ret = glusterd_gsync_get_config (master, slave, confpath, + confd); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get configuration data" + "for %s(master), %s(slave)", master, slave); + goto out; + + } + + ret = dict_get_param (confd, "state_file", &buf); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get state_file's name."); + goto out; + } + + *statefile = gf_strdup(buf); + if (!*statefile) { + gf_log ("", GF_LOG_ERROR, "Unable to gf_strdup."); + ret = -1; + goto out; + } + + ret = 0; + out: + if (confd) + dict_destroy (confd); + + gf_log ("", GF_LOG_DEBUG, "Returning %d ", ret); + return ret; +} + +static int +glusterd_create_status_file (char *master, char *slave, char *slave_ip, + char *slave_vol, char *status) +{ + int ret = -1; + runner_t runner = {0,}; + glusterd_conf_t *priv = NULL; + + if (THIS) + priv = THIS->private; + if (priv == NULL) { + gf_log ("", GF_LOG_ERROR, "priv of glusterd not present"); + goto out; + } + + if (!status) { + gf_log ("", GF_LOG_ERROR, "Status Empty"); + goto out; + } + gf_log ("", GF_LOG_DEBUG, "slave = %s", slave); + + runinit (&runner); + runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "--create", + status, "-c", NULL); + runner_argprintf (&runner, "%s/"GEOREP"/%s_%s_%s/gsyncd.conf", + priv->workdir, master, slave_ip, slave_vol); + runner_argprintf (&runner, ":%s", master); + runner_add_args (&runner, slave, NULL); + synclock_unlock (&priv->big_lock); + ret = runner_run (&runner); + synclock_lock (&priv->big_lock); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Creating status file failed."); + ret = -1; + goto out; + } + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "returning %d", ret); + return ret; +} + +static int +glusterd_verify_slave (char *volname, char *slave_ip, char *slave, + char **op_errstr, gf_boolean_t *is_force_blocker) +{ + int32_t ret = -1; + runner_t runner = {0,}; + char log_file_path[PATH_MAX] = ""; + char buf[PATH_MAX] = ""; + char *tmp = NULL; + char *save_ptr = NULL; + glusterd_conf_t *priv = NULL; + + GF_ASSERT (volname); + GF_ASSERT (slave_ip); + GF_ASSERT (slave); + + if (THIS) + priv = THIS->private; + if (priv == NULL) { + gf_log ("", GF_LOG_ERROR, "priv of glusterd not present"); + goto out; + } + + snprintf (log_file_path, sizeof(log_file_path), + DEFAULT_LOG_FILE_DIRECTORY"/create_verify_log"); + + runinit (&runner); + runner_add_args (&runner, GSYNCD_PREFIX"/gverify.sh", NULL); + runner_argprintf (&runner, "%s", volname); + runner_argprintf (&runner, "%s", slave_ip); + runner_argprintf (&runner, "%s", slave); + runner_argprintf (&runner, "%s", log_file_path); + runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); + synclock_unlock (&priv->big_lock); + ret = runner_run (&runner); + synclock_lock (&priv->big_lock); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Not a valid slave"); + ret = glusterd_gsync_read_frm_status (log_file_path, + buf, sizeof(buf)); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to read from %s", + log_file_path); + goto out; + } + + /* Tokenize the error message from gverify.sh to figure out + * if the error is a force blocker or not. */ + tmp = strtok_r (buf, "|", &save_ptr); + if (!strcmp (tmp, "FORCE_BLOCKER")) + *is_force_blocker = 1; + else { + /* No FORCE_BLOCKER flag present so all that is + * present is the error message. */ + *is_force_blocker = 0; + if (tmp) + *op_errstr = gf_strdup (tmp); + ret = -1; + goto out; + } + + /* Copy rest of the error message to op_errstr */ + tmp = strtok_r (NULL, "|", &save_ptr); + if (tmp) + *op_errstr = gf_strdup (tmp); + ret = -1; + goto out; + } + ret = 0; +out: + unlink (log_file_path); + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_mountbroker_check (char **slave_ip, char **op_errstr) +{ + int ret = -1; + char *tmp = NULL; + char *save_ptr = NULL; + char *username = NULL; + char *host = NULL; + char errmsg[PATH_MAX] = ""; + + GF_ASSERT (slave_ip); + GF_ASSERT (*slave_ip); + + /* Checking if hostname has user specified */ + host = strstr (*slave_ip, "@"); + if (!host) { + gf_log ("", GF_LOG_DEBUG, "No username provided."); + ret = 0; + goto out; + } else { + /* Moving the host past the '@' and checking if the + * actual hostname also has '@' */ + host++; + if (strstr (host, "@")) { + gf_log ("", GF_LOG_DEBUG, "host = %s", host); + ret = snprintf (errmsg, sizeof(errmsg) - 1, + "Invalid Hostname (%s).", host); + errmsg[ret] = '\0'; + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + if (op_errstr) + *op_errstr = gf_strdup (errmsg); + goto out; + } + + /* Fetching the username and hostname + * and checking if the username is non-root */ + username = strtok_r (*slave_ip, "@", &save_ptr); + tmp = strtok_r (NULL, "@", &save_ptr); + if (strcmp (username, "root")) { + ret = snprintf (errmsg, sizeof(errmsg) - 1, + "Non-root username (%s@%s) not allowed.", + username, tmp); + errmsg[ret] = '\0'; + if (op_errstr) + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, + "Non-Root username not allowed."); + ret = -1; + goto out; + } + + *slave_ip = gf_strdup (tmp); + if (!*slave_ip) { + gf_log ("", GF_LOG_ERROR, "Out of memory"); + ret = -1; + goto out; + } + } + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_op_stage_gsync_create (dict_t *dict, char **op_errstr) +{ + char *down_peerstr = NULL; + char *slave = NULL; + char *volname = NULL; + char *host_uuid = NULL; + char *statefile = NULL; + char *slave_ip = NULL; + char *slave_vol = NULL; + char *conf_path = NULL; + char errmsg[PATH_MAX] = ""; + char common_pem_file[PATH_MAX] = ""; + char hook_script[PATH_MAX] = ""; + char uuid_str [64] = ""; + int ret = -1; + int is_pem_push = -1; + gf_boolean_t is_force = -1; + gf_boolean_t is_force_blocker = -1; + gf_boolean_t exists = _gf_false; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + struct stat stbuf = {0,}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); + + ret = glusterd_op_gsync_args_get (dict, op_errstr, &volname, + &slave, &host_uuid); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to fetch arguments"); + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return -1; + } + + if (conf->op_version < 2) { + gf_log ("", GF_LOG_ERROR, "Op Version not supported."); + snprintf (errmsg, sizeof(errmsg), "One or more nodes do not" + " support the required op version."); + *op_errstr = gf_strdup (errmsg); + ret = -1; + goto out; + } + + exists = glusterd_check_volume_exists (volname); + ret = glusterd_volinfo_find (volname, &volinfo); + if ((ret) || (!exists)) { + gf_log ("", GF_LOG_WARNING, "volume name does not exist"); + snprintf (errmsg, sizeof(errmsg), "Volume name %s does not" + " exist", volname); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return -1; + } + + ret = glusterd_get_slave_details_confpath (volinfo, dict, &slave_ip, + &slave_vol, &conf_path, + op_errstr); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to fetch slave or confpath details."); + ret = -1; + goto out; + } + + is_force = dict_get_str_boolean (dict, "force", _gf_false); + + uuid_utoa_r (MY_UUID, uuid_str); + if (!strcmp (uuid_str, host_uuid)) { + ret = glusterd_are_vol_all_peers_up (volinfo, + &conf->peers, + &down_peerstr); + if ((ret == _gf_false) && !is_force) { + snprintf (errmsg, sizeof (errmsg), "Peer %s," + " which is a part of %s volume, is" + " down. Please bring up the peer and" + " retry.", down_peerstr, + volinfo->volname); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + *op_errstr = gf_strdup (errmsg); + GF_FREE (down_peerstr); + down_peerstr = NULL; + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return -1; + } else if (ret == _gf_false) { + gf_log ("", GF_LOG_INFO, "Peer %s," + " which is a part of %s volume, is" + " down. Force creating geo-rep session." + " On bringing up the peer, re-run" + " \"gluster system:: execute" + " gsec_create\" and \"gluster volume" + " geo-replication %s %s create push-pem" + " force\"", down_peerstr, volinfo->volname, + volinfo->volname, slave); + } + + /* Checking if slave host is pingable, has proper passwordless + * ssh login setup, slave volume is created, slave vol is empty, + * and if it has enough memory and bypass in case of force if + * the error is not a force blocker */ + ret = glusterd_verify_slave (volname, slave_ip, slave_vol, + op_errstr, &is_force_blocker); + if (ret) { + if (is_force && !is_force_blocker) { + gf_log ("", GF_LOG_INFO, "%s is not a valid slave" + " volume. Error: %s. Force creating geo-rep" + " session.", slave, *op_errstr); + } else { + gf_log ("", GF_LOG_ERROR, + "%s is not a valid slave volume. Error: %s", + slave, *op_errstr); + ret = -1; + goto out; + } + } + + ret = dict_get_int32 (dict, "push_pem", &is_pem_push); + if (!ret && is_pem_push) { + ret = snprintf (common_pem_file, + sizeof(common_pem_file) - 1, + "%s"GLUSTERD_COMMON_PEM_PUB_FILE, + conf->workdir); + common_pem_file[ret] = '\0'; + + ret = snprintf (hook_script, sizeof(hook_script) - 1, + "%s"GLUSTERD_CREATE_HOOK_SCRIPT, + conf->workdir); + hook_script[ret] = '\0'; + + ret = lstat (common_pem_file, &stbuf); + if (ret) { + snprintf (errmsg, sizeof (errmsg), "%s" + " required for push-pem is" + " not present. Please run" + " \"gluster system:: execute" + " gsec_create\"", common_pem_file); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + *op_errstr = gf_strdup (errmsg); + ret = -1; + goto out; + } + + ret = lstat (hook_script, &stbuf); + if (ret) { + snprintf (errmsg, sizeof (errmsg), + "The hook-script (%s) required " + "for push-pem is not present. " + "Please install the hook-script " + "and retry", hook_script); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + *op_errstr = gf_strdup (errmsg); + ret = -1; + goto out; + } + + if (!S_ISREG(stbuf.st_mode)) { + snprintf (errmsg, sizeof (errmsg), "%s" + " required for push-pem is" + " not a regular file. Please run" + " \"gluster system:: execute" + " gsec_create\"", common_pem_file); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + goto out; + } + } + } + + ret = glusterd_get_statefile_name (volinfo, slave, conf_path, &statefile); + if (ret) { + if (!strstr(slave, "::")) + snprintf (errmsg, sizeof (errmsg), + "%s is not a valid slave url.", slave); + else + snprintf (errmsg, sizeof (errmsg), "Please check gsync " + "config file. Unable to get statefile's name"); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + goto out; + } + + ret = dict_set_str (dict, "statefile", statefile); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to store statefile path"); + goto out; + } + + ret = lstat (statefile, &stbuf); + if (!ret && !is_force) { + snprintf (errmsg, sizeof (errmsg), "Session between %s" + " and %s is already created.", + volinfo->volname, slave); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + goto out; + } else if (!ret) + gf_log ("", GF_LOG_INFO, "Session between %s" + " and %s is already created. Force" + " creating again.", volinfo->volname, slave); + + ret = glusterd_verify_gsyncd_spawn (volinfo->volname, slave); + if (ret) { + snprintf (errmsg, sizeof (errmsg), "Unable to spawn gsyncd."); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; + } + + ret = 0; +out: + + if (ret && errmsg[0] != '\0') + *op_errstr = gf_strdup (errmsg); + + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr) { int ret = 0; int type = 0; char *volname = NULL; char *slave = NULL; + char *slave_ip = NULL; + char *slave_vol = NULL; + char *down_peerstr = NULL; + char *statefile = NULL; + char *path_list = NULL; + char *conf_path = NULL; gf_boolean_t exists = _gf_false; glusterd_volinfo_t *volinfo = NULL; char errmsg[PATH_MAX] = {0,}; dict_t *ctx = NULL; + gf_boolean_t is_force = 0; + gf_boolean_t is_force_blocker = -1; + gf_boolean_t is_running = _gf_false; + uuid_t uuid = {0}; + char uuid_str [64] = {0}; + char *host_uuid = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + struct stat stbuf = {0,}; + this = THIS; + GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); ret = dict_get_int32 (dict, "type", &type); if (ret < 0) { @@ -1144,25 +2097,26 @@ glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr) goto out; } - switch (type) { - case GF_GSYNC_OPTION_TYPE_STATUS: + if (type == GF_GSYNC_OPTION_TYPE_STATUS) { ret = glusterd_verify_gsync_status_opts (dict, op_errstr); - goto out; - case GF_GSYNC_OPTION_TYPE_CONFIG: - ret = gsync_verify_config_options (dict, op_errstr); + } + ret = glusterd_op_gsync_args_get (dict, op_errstr, + &volname, &slave, &host_uuid); + if (ret) goto out; - case GF_GSYNC_OPTION_TYPE_ROTATE: - /* checks same as status mode */ - ret = glusterd_verify_gsync_status_opts(dict, op_errstr); - goto out; - } + uuid_utoa_r (MY_UUID, uuid_str); - ret = glusterd_op_gsync_args_get (dict, op_errstr, &volname, &slave); - if (ret) + if (conf->op_version < 2) { + gf_log ("", GF_LOG_ERROR, "Op Version not supported."); + snprintf (errmsg, sizeof(errmsg), "One or more nodes do not" + " support the required op version."); + *op_errstr = gf_strdup (errmsg); + ret = -1; goto out; + } exists = glusterd_check_volume_exists (volname); ret = glusterd_volinfo_find (volname, &volinfo); @@ -1175,12 +2129,96 @@ glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr) goto out; } + ret = glusterd_get_slave_details_confpath (volinfo, dict, &slave_ip, + &slave_vol, &conf_path, + op_errstr); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to fetch slave or confpath details."); + ret = -1; + goto out; + } + + ret = glusterd_get_statefile_name (volinfo, slave, conf_path, &statefile); + if (ret) { + /* Checking if slave host is pingable, has proper passwordless + * ssh login setup */ + ret = glusterd_verify_slave (volname, slave_ip, slave_vol, + op_errstr, &is_force_blocker); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "%s is not a valid slave volume. Error: %s", + slave, *op_errstr); + goto out; + } + + if (!strstr(slave, "::")) + snprintf (errmsg, sizeof (errmsg), + "%s is not a valid slave url.", slave); + else + snprintf (errmsg, sizeof (errmsg), + "Unable to get statefile's name"); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + goto out; + } + + ret = dict_set_str (dict, "statefile", statefile); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to store statefile path"); + goto out; + } + + is_force = dict_get_str_boolean (dict, "force", _gf_false); + + /* Allowing stop force to bypass the statefile check + * as this command acts as a fail safe method to stop geo-rep + * session. */ + if ((type == GF_GSYNC_OPTION_TYPE_CONFIG) || + ((type == GF_GSYNC_OPTION_TYPE_STOP) && !is_force) || + (type == GF_GSYNC_OPTION_TYPE_DELETE)) { + ret = lstat (statefile, &stbuf); + if (ret) { + snprintf (errmsg, sizeof(errmsg), "Geo-replication" + " session between %s and %s does not exist.", + volinfo->volname, slave); + gf_log ("", GF_LOG_ERROR, "%s. statefile = %s", + errmsg, statefile); + *op_errstr = gf_strdup (errmsg); + ret = -1; + goto out; + } + } + + /* Check if all peers that are a part of the volume are up or not */ + if ((type == GF_GSYNC_OPTION_TYPE_DELETE) || + ((type == GF_GSYNC_OPTION_TYPE_STOP) && !is_force)) { + if (!strcmp (uuid_str, host_uuid)) { + ret = glusterd_are_vol_all_peers_up (volinfo, + &conf->peers, + &down_peerstr); + if (ret == _gf_false) { + snprintf (errmsg, sizeof (errmsg), "Peer %s," + " which is a part of %s volume, is" + " down. Please bring up the peer and" + " retry.", down_peerstr, + volinfo->volname); + *op_errstr = gf_strdup (errmsg); + ret = -1; + GF_FREE (down_peerstr); + down_peerstr = NULL; + goto out; + } + } + } + switch (type) { case GF_GSYNC_OPTION_TYPE_START: /* don't attempt to start gsync if replace-brick is * in progress */ if (glusterd_is_rb_ongoing (volinfo)) { - snprintf (errmsg, sizeof(errmsg),"replace-brick is in" + snprintf (errmsg, sizeof(errmsg), "replace-brick is in" " progress, not starting geo-replication"); *op_errstr = gf_strdup (errmsg); ret = -1; @@ -1188,16 +2226,18 @@ glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr) } ret = glusterd_op_verify_gsync_start_options (volinfo, slave, - op_errstr); + conf_path, statefile, + op_errstr, is_force); if (ret) goto out; ctx = glusterd_op_get_ctx(); if (ctx) { - /*gsyncd does a fuse mount to start the geo-rep session*/ + /* gsyncd does a fuse mount to start + * the geo-rep session */ if (!glusterd_is_fuse_available ()) { - gf_log ("glusterd", GF_LOG_ERROR, "Unable to open" - " /dev/fuse (%s), geo-replication start" - " failed", strerror (errno)); + gf_log ("glusterd", GF_LOG_ERROR, "Unable to " + "open /dev/fuse (%s), geo-replication " + "start failed", strerror (errno)); snprintf (errmsg, sizeof(errmsg), "fuse unvailable"); *op_errstr = gf_strdup (errmsg); @@ -1208,17 +2248,72 @@ glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr) break; case GF_GSYNC_OPTION_TYPE_STOP: - ret = glusterd_op_verify_gsync_running (volinfo, slave, - op_errstr); + if (!is_force) { + ret = glusterd_op_verify_gsync_running (volinfo, slave, + conf_path, + op_errstr); + if (ret) { + ret = glusterd_get_local_brickpaths (volinfo, + &path_list); + if (path_list) + ret = -1; + } + } + break; + + case GF_GSYNC_OPTION_TYPE_CONFIG: + ret = gsync_verify_config_options (dict, op_errstr, volname); + goto out; + break; + + case GF_GSYNC_OPTION_TYPE_DELETE: + /* Check if the gsync session is still running + * If so ask the user to stop geo-replication first.*/ + ret = glusterd_gsync_get_uuid (slave, volinfo, uuid); + if (ret) { + snprintf (errmsg, sizeof(errmsg), "Geo-replication" + " session between %s and %s does not exist.", + volinfo->volname, slave); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + *op_errstr = gf_strdup (errmsg); + ret = -1; + goto out; + } else { + ret = glusterd_check_gsync_running_local (volinfo->volname, + slave, conf_path, + &is_running); + if (_gf_true == is_running) { + snprintf (errmsg, sizeof (errmsg), GEOREP + " session between %s & %s is " + "still active. Please stop the " + "session and retry.", + volinfo->volname, slave); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + *op_errstr = gf_strdup (errmsg); + ret = -1; + goto out; + } + } + + ret = glusterd_verify_gsyncd_spawn (volinfo->volname, slave); + if (ret) { + snprintf (errmsg, sizeof (errmsg), + "Unable to spawn gsyncd"); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + } + break; } out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } static int -stop_gsync (char *master, char *slave, char **msg) +stop_gsync (char *master, char *slave, char **msg, + char *conf_path, gf_boolean_t is_force) { int32_t ret = 0; int pfd = -1; @@ -1230,19 +2325,16 @@ stop_gsync (char *master, char *slave, char **msg) GF_ASSERT (THIS); GF_ASSERT (THIS->private); - pfd = gsyncd_getpidfile (master, slave, pidfile); - if (pfd == -2) { + pfd = gsyncd_getpidfile (master, slave, pidfile, conf_path); + if (pfd == -2 && !is_force) { gf_log ("", GF_LOG_ERROR, GEOREP" stop validation " " failed for %s & %s", master, slave); ret = -1; goto out; } - if (gsync_status_byfd (pfd) == -1) { + if (gsync_status_byfd (pfd) == -1 && !is_force) { gf_log ("", GF_LOG_ERROR, "gsyncd b/w %s & %s is not" " running", master, slave); - if (msg) - *msg = gf_strdup ("Warning: "GEOREP" session was " - "defunct at stop time"); /* monitor gsyncd already dead */ goto out; } @@ -1277,16 +2369,16 @@ stop_gsync (char *master, char *slave, char **msg) out: sys_close (pfd); + + if (is_force) + ret = 0; return ret; } static int -glusterd_check_restart_gsync_session (glusterd_volinfo_t *volinfo, char *slave, - dict_t *resp_dict); - -static int glusterd_gsync_configure (glusterd_volinfo_t *volinfo, char *slave, - dict_t *dict, dict_t *resp_dict, char **op_errstr) + char *path_list, dict_t *dict, + dict_t *resp_dict, char **op_errstr) { int32_t ret = -1; char *op_name = NULL; @@ -1295,6 +2387,10 @@ glusterd_gsync_configure (glusterd_volinfo_t *volinfo, char *slave, glusterd_conf_t *priv = NULL; char *subop = NULL; char *master = NULL; + char *conf_path = NULL; + char *slave_ip = NULL; + char *slave_vol = NULL; + struct stat stbuf = {0, }; GF_ASSERT (slave); GF_ASSERT (op_errstr); @@ -1329,10 +2425,17 @@ glusterd_gsync_configure (glusterd_volinfo_t *volinfo, char *slave, goto out; } + ret = dict_get_str (dict, "conf_path", &conf_path); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to fetch conf file path."); + goto out; + } + master = ""; runinit (&runner); runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL); - runner_argprintf (&runner, "%s/"GSYNC_CONF, priv->workdir); + runner_argprintf (&runner, "%s", conf_path); if (volinfo) { master = volinfo->volname; runner_argprintf (&runner, ":%s", master); @@ -1342,7 +2445,9 @@ glusterd_gsync_configure (glusterd_volinfo_t *volinfo, char *slave, runner_add_arg (&runner, op_name); if (op_value) runner_add_arg (&runner, op_value); + synclock_unlock (&priv->big_lock); ret = runner_run (&runner); + synclock_lock (&priv->big_lock); if (ret) { gf_log ("", GF_LOG_WARNING, "gsyncd failed to " "%s %s option for %s %s peers", @@ -1353,13 +2458,46 @@ glusterd_gsync_configure (glusterd_volinfo_t *volinfo, char *slave, goto out; } + + if (!strcmp (op_name, "state_file")) { + + ret = lstat (op_value, &stbuf); + if (ret) { + ret = dict_get_str (dict, "slave_ip", &slave_ip); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to fetch slave IP."); + goto out; + } + + ret = dict_get_str (dict, "slave_vol", &slave_vol); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to fetch slave volume name."); + goto out; + } + + ret = glusterd_create_status_file (volinfo->volname, slave, + slave_ip, slave_vol, + "Switching Status File"); + if (ret || lstat (op_value, &stbuf)) { + gf_log ("", GF_LOG_ERROR, "Unable to create %s" + ". Error : %s", op_value, + strerror (errno)); + ret = -1; + goto out; + } + } + } + ret = 0; gf_asprintf (op_errstr, "config-%s successful", subop); out: if (!ret && volinfo) { ret = glusterd_check_restart_gsync_session (volinfo, slave, - resp_dict); + resp_dict, path_list, + conf_path, 0); if (ret) *op_errstr = gf_strdup ("internal error"); } @@ -1485,13 +2623,14 @@ dict_get_param (dict_t *dict, char *key, char **param) } static int -glusterd_read_status_file (char *master, char *slave, - dict_t *dict, char *node) +glusterd_read_status_file (glusterd_volinfo_t *volinfo, char *slave, + char *conf_path, dict_t *dict, char *node) { glusterd_conf_t *priv = NULL; int ret = 0; char *statefile = NULL; - char buf[1024] = {0, }; + char *master = NULL; + char buf[1024] = "defunct"; char nds[1024] = {0, }; char mst[1024] = {0, }; char slv[1024] = {0, }; @@ -1501,34 +2640,38 @@ glusterd_read_status_file (char *master, char *slave, int gsync_count = 0; int status = 0; char *dyn_node = NULL; + char *path_list = NULL; GF_ASSERT (THIS); GF_ASSERT (THIS->private); + GF_ASSERT (volinfo); + + master = volinfo->volname; confd = dict_new (); - if (!dict) + if (!dict) { + gf_log ("", GF_LOG_ERROR, "Not able to create dict."); return -1; + } priv = THIS->private; - ret = glusterd_gsync_get_config (master, slave, priv->workdir, + + ret = glusterd_gsync_get_config (master, slave, conf_path, confd); if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to get configuration data" "for %s(master), %s(slave)", master, slave); - goto out; + goto done; } - ret = gsync_status (master, slave, &status); - if (ret == 0 && status == -1) { - strncpy (buf, "defunct", sizeof (buf)); - goto done; - } else if (ret == -1) - goto out; - ret = dict_get_param (confd, "state_file", &statefile); - if (ret) - goto out; + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get state_file's name " + "for %s(master), %s(slave). Please check gsync " + "config file.", master, slave); + goto done; + } ret = glusterd_gsync_read_frm_status (statefile, buf, sizeof (buf)); if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to read the status" @@ -1536,12 +2679,27 @@ glusterd_read_status_file (char *master, char *slave, strncpy (buf, "defunct", sizeof (buf)); goto done; } - if (strcmp (buf, "OK") != 0) + + ret = gsync_status (master, slave, conf_path, &status); + if (ret == 0 && status == -1) { + if ((strcmp (buf, "Not Started")) && + (strcmp (buf, "Stopped"))) + strncpy (buf, "defunct", sizeof (buf)); + goto done; + } else if (ret == -1) { + gf_log ("", GF_LOG_ERROR, "Unable to get gsync status"); + goto done; + } + + if (strcmp (buf, "Stable") != 0) goto done; ret = dict_get_param (confd, "state_socket_unencoded", &statefile); - if (ret) - goto out; + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get state_socket_unencoded" + " filepath. Please check gsync config file."); + goto done; + } ret = glusterd_gsync_fetch_status_extra (statefile, buf, sizeof (buf)); if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to fetch extra status" @@ -1562,6 +2720,19 @@ glusterd_read_status_file (char *master, char *slave, } done: + if ((!strcmp (buf, "defunct")) || + (!strcmp (buf, "Not Started")) || + (!strcmp (buf, "Stopped"))) { + ret = glusterd_get_local_brickpaths (volinfo, &path_list); + if (!path_list) { + gf_log ("", GF_LOG_DEBUG, "This node not being part of" + " volume should not be running gsyncd. Hence" + " shouldn't display status for this node."); + ret = 0; + goto out; + } + } + ret = dict_get_int32 (dict, "gsync-count", &gsync_count); if (ret) @@ -1612,23 +2783,22 @@ glusterd_read_status_file (char *master, char *slave, if (ret) goto out; - ret = 0; out: dict_destroy (confd); - gf_log ("", GF_LOG_DEBUG, "Returning %d ", ret); - return ret; + return 0; } -static int +int glusterd_check_restart_gsync_session (glusterd_volinfo_t *volinfo, char *slave, - dict_t *resp_dict) + dict_t *resp_dict, char *path_list, + char *conf_path, gf_boolean_t is_force) { int ret = 0; - uuid_t uuid = {0, }; glusterd_conf_t *priv = NULL; char *status_msg = NULL; + gf_boolean_t is_running = _gf_false; GF_ASSERT (volinfo); GF_ASSERT (slave); @@ -1637,18 +2807,22 @@ glusterd_check_restart_gsync_session (glusterd_volinfo_t *volinfo, char *slave, priv = THIS->private; - if (glusterd_gsync_get_uuid (slave, volinfo, uuid)) - /* session does not exist, nothing to do */ + ret = glusterd_check_gsync_running_local (volinfo->volname, + slave, conf_path, + &is_running); + if (!ret && (_gf_true != is_running)) + /* gsynd not running, nothing to do */ goto out; - if (uuid_compare (MY_UUID, uuid) == 0) { - ret = stop_gsync (volinfo->volname, slave, &status_msg); - if (ret == 0 && status_msg) - ret = dict_set_str (resp_dict, "gsync-status", - status_msg); - if (ret == 0) - ret = glusterd_start_gsync (volinfo, slave, - uuid_utoa(MY_UUID), NULL); - } + + ret = stop_gsync (volinfo->volname, slave, &status_msg, + conf_path, is_force); + if (ret == 0 && status_msg) + ret = dict_set_str (resp_dict, "gsync-status", + status_msg); + if (ret == 0) + ret = glusterd_start_gsync (volinfo, slave, path_list, + conf_path, uuid_utoa(MY_UUID), + NULL); out: gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); @@ -1656,7 +2830,7 @@ glusterd_check_restart_gsync_session (glusterd_volinfo_t *volinfo, char *slave, } static int32_t -glusterd_marker_create_volfile (glusterd_volinfo_t *volinfo) +glusterd_marker_changelog_create_volfile (glusterd_volinfo_t *volinfo) { int32_t ret = 0; @@ -1680,58 +2854,82 @@ out: } static int -glusterd_set_marker_gsync (glusterd_volinfo_t *volinfo) +glusterd_set_gsync_knob (glusterd_volinfo_t *volinfo, char *key, int *vc) { - int ret = -1; - int marker_set = _gf_false; - char *gsync_status = NULL; + int ret = -1; + int conf_enabled = _gf_false; + char *knob_on = NULL; GF_ASSERT (THIS); GF_ASSERT (THIS->private); - marker_set = glusterd_volinfo_get_boolean (volinfo, VKEY_MARKER_XTIME); - if (marker_set == -1) { - gf_log ("", GF_LOG_ERROR, "failed to get the marker status"); - ret = -1; + conf_enabled = glusterd_volinfo_get_boolean (volinfo, key); + if (conf_enabled == -1) { + gf_log ("", GF_LOG_ERROR, + "failed to get key %s from volinfo", key); goto out; } - if (marker_set == _gf_false) { - gsync_status = gf_strdup ("on"); - if (gsync_status == NULL) { + ret = 0; + if (conf_enabled == _gf_false) { + *vc = 1; + knob_on = gf_strdup ("on"); + if (knob_on == NULL) { ret = -1; goto out; } ret = glusterd_gsync_volinfo_dict_set (volinfo, - VKEY_MARKER_XTIME, gsync_status); - if (ret < 0) - goto out; - - ret = glusterd_marker_create_volfile (volinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Setting dict failed"); - goto out; - } + key, knob_on); } - ret = 0; -out: + out: gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } +static int +glusterd_set_gsync_confs (glusterd_volinfo_t *volinfo) +{ + int ret = -1; + int volfile_changed = 0; + ret = glusterd_set_gsync_knob (volinfo, + VKEY_MARKER_XTIME, &volfile_changed); + if (ret) + goto out; + /** + * enable ignore-pid-check blindly as it could be needed for + * cascading setups. + */ + ret = glusterd_set_gsync_knob (volinfo, VKEY_MARKER_XTIME_FORCE, + &volfile_changed); + if (ret) + goto out; + + ret = glusterd_set_gsync_knob (volinfo, + VKEY_CHANGELOG, &volfile_changed); + if (ret) + goto out; + + if (volfile_changed) + ret = glusterd_marker_changelog_create_volfile (volinfo); + + out: + return ret; +} static int glusterd_get_gsync_status_mst_slv (glusterd_volinfo_t *volinfo, - char *slave, dict_t *rsp_dict, - char *node) + char *slave, char *conf_path, + dict_t *rsp_dict, char *node) { + char *statefile = NULL; uuid_t uuid = {0, }; glusterd_conf_t *priv = NULL; int ret = 0; + struct stat stbuf = {0, }; GF_ASSERT (volinfo); GF_ASSERT (slave); @@ -1741,19 +2939,38 @@ glusterd_get_gsync_status_mst_slv (glusterd_volinfo_t *volinfo, priv = THIS->private; ret = glusterd_gsync_get_uuid (slave, volinfo, uuid); - if ((ret == 0) && (uuid_compare (MY_UUID, uuid) != 0)) - goto out; - if (ret) { - ret = 0; gf_log ("", GF_LOG_INFO, "geo-replication status %s %s :" "session is not active", volinfo->volname, slave); - goto out; + + ret = glusterd_get_statefile_name (volinfo, slave, + conf_path, &statefile); + if (ret) { + if (!strstr(slave, "::")) + gf_log ("", GF_LOG_INFO, + "%s is not a valid slave url.", slave); + else + gf_log ("", GF_LOG_INFO, "Unable to get" + " statefile's name"); + ret = 0; + goto out; + } + + ret = lstat (statefile, &stbuf); + if (ret) { + gf_log ("", GF_LOG_INFO, "%s statefile not present.", + statefile); + ret = 0; + goto out; + } } - ret = glusterd_read_status_file (volinfo->volname, - slave, rsp_dict, node); - out: + ret = glusterd_read_status_file (volinfo, slave, conf_path, + rsp_dict, node); +out: + if (statefile) + GF_FREE (statefile); + gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); return ret; } @@ -1804,6 +3021,7 @@ glusterd_get_gsync_status (dict_t *dict, char **op_errstr, dict_t *rsp_dict) { char *slave = NULL; char *volname = NULL; + char *conf_path = NULL; char errmsg[PATH_MAX] = {0, }; gf_boolean_t exists = _gf_false; glusterd_volinfo_t *volinfo = NULL; @@ -1841,7 +3059,14 @@ glusterd_get_gsync_status (dict_t *dict, char **op_errstr, dict_t *rsp_dict) goto out; } - ret = glusterd_get_gsync_status_mst_slv (volinfo, slave, + ret = dict_get_str (dict, "conf_path", &conf_path); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to fetch conf file path."); + goto out; + } + + ret = glusterd_get_gsync_status_mst_slv (volinfo, slave, conf_path, rsp_dict, my_hostname); out: @@ -1850,300 +3075,406 @@ glusterd_get_gsync_status (dict_t *dict, char **op_errstr, dict_t *rsp_dict) } static int -glusterd_send_sigstop (pid_t pid) +glusterd_gsync_delete (glusterd_volinfo_t *volinfo, char *slave, char *slave_ip, + char *slave_vol, char *path_list, dict_t *dict, + dict_t *resp_dict, char **op_errstr) { - int ret = 0; - ret = kill (pid, SIGSTOP); - if (ret) - gf_log ("", GF_LOG_ERROR, GEOREP"failed to send SIGSTOP signal"); - return ret; -} - -static int -glusterd_send_sigcont (pid_t pid) -{ - int ret = 0; - ret = kill (pid, SIGCONT); - if (ret) - gf_log ("", GF_LOG_ERROR, GEOREP"failed to send SIGCONT signal"); - return ret; -} - -/* - * Log rotations flow is something like this: - * - Send SIGSTOP to process group (this will stop monitor/worker process - * and also the slave if it's local) - * - Rotate log file for monitor/worker - * - Rotate log file for slave if it's local - * - Send SIGCONT to the process group. Monitor wakes up, kills the worker - * (this is done in the SIGCONT handler), which results in the termination - * of the slave (local/remote). After returning from signal handler, - * monitor detects absence of worker and starts it again, which in-turn - * starts the slave. - */ -static int -glusterd_send_log_rotate_signal (pid_t pid, char *logfile1, char *logfile2) -{ - int ret = 0; - char rlogfile[PATH_MAX] = {0,}; - time_t rottime = 0; - - ret = glusterd_send_sigstop (-pid); - rottime = time (NULL); + int32_t ret = -1; + runner_t runner = {0,}; + glusterd_conf_t *priv = NULL; + char *master = NULL; + char *gl_workdir = NULL; + char geo_rep_dir[PATH_MAX] = ""; + char *conf_path = NULL; - snprintf (rlogfile, sizeof (rlogfile), "%s.%"PRIu64, logfile1, - (uint64_t) rottime); - ret = rename (logfile1, rlogfile); - if (ret) - gf_log ("", GF_LOG_ERROR, "rename failed for geo-rep log file"); + GF_ASSERT (slave); + GF_ASSERT (slave_ip); + GF_ASSERT (slave_vol); + GF_ASSERT (op_errstr); + GF_ASSERT (dict); + GF_ASSERT (resp_dict); - if (!*logfile2) { - gf_log ("", GF_LOG_DEBUG, "Slave is not local," - " skipping rotation"); - ret = 0; + if (THIS) + priv = THIS->private; + if (priv == NULL) { + gf_log ("", GF_LOG_ERROR, "priv of glusterd not present"); + *op_errstr = gf_strdup ("glusterd defunct"); goto out; } - (void) snprintf (rlogfile, sizeof (rlogfile), "%s.%"PRIu64, logfile2, - (uint64_t) rottime); - ret = rename (logfile2, rlogfile); - if (ret) - gf_log ("", GF_LOG_ERROR, "rename failed for geo-rep slave" - " log file"); + ret = dict_get_str (dict, "conf_path", &conf_path); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to fetch conf file path."); + goto out; + } - out: - ret = glusterd_send_sigcont (-pid); + gl_workdir = priv->workdir; + master = ""; + runinit (&runner); + runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", + "--delete", "-c", NULL); + runner_argprintf (&runner, "%s", conf_path); - return ret; -} + if (volinfo) { + master = volinfo->volname; + runner_argprintf (&runner, ":%s", master); + } + runner_add_arg (&runner, slave); + runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); + synclock_unlock (&priv->big_lock); + ret = runner_run (&runner); + synclock_lock (&priv->big_lock); + if (ret) { + gf_log ("", GF_LOG_ERROR, "gsyncd failed to " + "delete session info for %s and %s peers", + master, slave); -static int -glusterd_get_pid_from_file (char *master, char *slave, pid_t *pid) -{ - int ret = -1; - int pfd = 0; - char pidfile[PATH_MAX] = {0,}; - char buff[1024] = {0,}; + gf_asprintf (op_errstr, "gsyncd failed to " + "delete session info for %s and %s peers", + master, slave); - pfd = gsyncd_getpidfile (master, slave, pidfile); - if (pfd == -2) { - gf_log ("", GF_LOG_ERROR, GEOREP" log-rotate validation " - " failed for %s & %s", master, slave); - goto out; - } - if (gsync_status_byfd (pfd) == -1) { - gf_log ("", GF_LOG_ERROR, "gsyncd b/w %s & %s is not" - " running", master, slave); goto out; } - if (pfd < 0) - goto out; + ret = snprintf (geo_rep_dir, sizeof(geo_rep_dir) - 1, + "%s/"GEOREP"/%s_%s_%s", gl_workdir, + volinfo->volname, slave_ip, slave_vol); + geo_rep_dir[ret] = '\0'; - ret = read (pfd, buff, 1024); - if (ret < 0) { - gf_log ("", GF_LOG_ERROR, GEOREP" cannot read pid from pid-file"); - goto out; + ret = rmdir (geo_rep_dir); + if (ret) { + if (errno == ENOENT) + gf_log ("", GF_LOG_DEBUG, "Geo Rep Dir(%s) Not Present.", + geo_rep_dir); + else { + gf_log ("", GF_LOG_ERROR, "Unable to delete " + "Geo Rep Dir(%s). Error: %s", geo_rep_dir, + strerror (errno)); + goto out; + } } - - *pid = strtol (buff, NULL, 10); ret = 0; + gf_asprintf (op_errstr, "delete successful"); + out: - sys_close(pfd); + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } -static int -glusterd_do_gsync_log_rotate (char *master, char *slave, uuid_t *uuid, char **op_errstr) +int +glusterd_op_sys_exec (dict_t *dict, char **op_errstr, dict_t *rsp_dict) { - int ret = 0; - glusterd_conf_t *priv = NULL; - pid_t pid = 0; - char log_file1[PATH_MAX] = {0,}; - char log_file2[PATH_MAX] = {0,}; - - GF_ASSERT (THIS); - GF_ASSERT (THIS->private); + char buf[PATH_MAX] = ""; + char cmd_arg_name[PATH_MAX] = ""; + char output_name[PATH_MAX] = ""; + char errmsg[PATH_MAX] = ""; + char *ptr = NULL; + char *bufp = NULL; + char *command = NULL; + char **cmd_args = NULL; + int ret = -1; + int i = -1; + int cmd_args_count = 0; + int output_count = 0; + glusterd_conf_t *priv = NULL; + runner_t runner = {0,}; - priv = THIS->private; + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); - ret = glusterd_get_pid_from_file (master, slave, &pid); - if (ret) + if (THIS) + priv = THIS->private; + if (priv == NULL) { + gf_log ("", GF_LOG_ERROR, "priv of glusterd not present"); + *op_errstr = gf_strdup ("glusterd defunct"); goto out; + } - /* log file */ - ret = glusterd_gsyncd_getlogfile (master, slave, log_file1); - if (ret) + ret = dict_get_str (dict, "command", &command); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get command from dict"); goto out; + } - /* check if slave is local or remote */ - ret = glusterd_gsync_slave_is_remote (slave); - if (ret) - goto do_rotate; - - /* slave log file - slave is local and it's log can be rotated */ - ret = glusterd_gsync_get_slave_log_file (master, slave, log_file2); + ret = dict_get_int32 (dict, "cmd_args_count", &cmd_args_count); if (ret) - goto out; - - do_rotate: - ret = glusterd_send_log_rotate_signal (pid, log_file1, log_file2); - - out: - if (ret && op_errstr) - *op_errstr = gf_strdup("Error rotating log file"); - return ret; -} - -static int -glusterd_do_gsync_log_rotation_mst_slv (glusterd_volinfo_t *volinfo, char *slave, - char **op_errstr) -{ - uuid_t uuid = {0, }; - glusterd_conf_t *priv = NULL; - int ret = 0; - char errmsg[1024] = {0,}; - xlator_t *this = NULL; + gf_log ("", GF_LOG_INFO, "No cmd_args_count"); + + if (cmd_args_count) { + cmd_args = GF_CALLOC (cmd_args_count, sizeof (char*), + gf_common_mt_char); + if (!cmd_args) { + gf_log ("", GF_LOG_ERROR, "Unable to calloc. " + "Errno = %s", strerror(errno)); + goto out; + } - GF_ASSERT (volinfo); - GF_ASSERT (slave); - GF_ASSERT (THIS); - this = THIS; - GF_ASSERT (this->private); - priv = this->private; + for (i=1; i <= cmd_args_count; i++) { + memset (cmd_arg_name, '\0', sizeof(cmd_arg_name)); + snprintf (cmd_arg_name, sizeof(cmd_arg_name), + "cmd_arg_%d", i); + ret = dict_get_str (dict, cmd_arg_name, &cmd_args[i-1]); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get %s in dict", + cmd_arg_name); + goto out; + } + } + } - ret = glusterd_gsync_get_uuid (slave, volinfo, uuid); - if ((ret == 0) && (uuid_compare (MY_UUID, uuid) != 0)) + runinit (&runner); + runner_argprintf (&runner, GSYNCD_PREFIX"/peer_%s", command); + for (i=0; i < cmd_args_count; i++) + runner_add_arg (&runner, cmd_args[i]); + runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); + synclock_unlock (&priv->big_lock); + ret = runner_start (&runner); + if (ret == -1) { + snprintf (errmsg, sizeof (errmsg), "Unable to " + "execute command. Error : %s", + strerror (errno)); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + synclock_lock (&priv->big_lock); goto out; + } + + ptr = fgets(buf, sizeof(buf), runner_chio (&runner, STDOUT_FILENO)); + if (ptr) { + ret = dict_get_int32 (rsp_dict, "output_count", &output_count); + if (ret) + output_count = 1; + else + output_count++; + memset (output_name, '\0', sizeof (output_name)); + snprintf (output_name, sizeof (output_name), + "output_%d", output_count); + if (buf[strlen(buf) - 1] == '\n') + buf[strlen(buf) - 1] = '\0'; + bufp = gf_strdup (buf); + if (!bufp) + gf_log ("", GF_LOG_ERROR, "gf_strdup failed."); + ret = dict_set_dynstr (rsp_dict, output_name, bufp); + if (ret) { + GF_FREE (bufp); + gf_log ("", GF_LOG_ERROR, "output set failed."); + } + ret = dict_set_int32 (rsp_dict, "output_count", output_count); + if (ret) + gf_log ("", GF_LOG_ERROR, "output_count set failed."); + } + ret = runner_end (&runner); if (ret) { - snprintf(errmsg, sizeof(errmsg), "geo-replication session b/w %s %s not active", - volinfo->volname, slave); - gf_log (this->name, GF_LOG_WARNING, "%s", errmsg); - if (op_errstr) - *op_errstr = gf_strdup(errmsg); + snprintf (errmsg, sizeof (errmsg), "Unable to " + "end. Error : %s", + strerror (errno)); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + synclock_lock (&priv->big_lock); goto out; } + synclock_lock (&priv->big_lock); - ret = glusterd_do_gsync_log_rotate (volinfo->volname, slave, &uuid, op_errstr); + ret = 0; +out: + if (cmd_args) { + GF_FREE (cmd_args); + cmd_args = NULL; + } - out: - gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } -static int -_iterate_log_rotate_mst_slv (dict_t *this, char *key, data_t *value, void *data) +int +glusterd_op_copy_file (dict_t *dict, char **op_errstr) { - glusterd_gsync_status_temp_t *param = NULL; - char *slave = NULL; + char abs_filename[PATH_MAX] = ""; + char errmsg[PATH_MAX] = ""; + char *filename = NULL; + char *host_uuid = NULL; + char uuid_str [64] = {0}; + char *contents = NULL; + char buf[1024] = ""; + int ret = -1; + int fd = -1; + int bytes_writen = 0; + int bytes_read = 0; + int contents_size = -1; + int file_mode = -1; + glusterd_conf_t *priv = NULL; + struct stat stbuf = {0,}; - param = (glusterd_gsync_status_temp_t *) data; - GF_ASSERT (param); - GF_ASSERT (param->volinfo); - - slave = strchr (value->data, ':'); - if (slave) - slave++; - else { - gf_log ("", GF_LOG_ERROR, "geo-replication log-rotate: slave (%s) " - "not conforming to format", slave); - return -1; + if (THIS) + priv = THIS->private; + if (priv == NULL) { + gf_log ("", GF_LOG_ERROR, "priv of glusterd not present"); + *op_errstr = gf_strdup ("glusterd defunct"); + goto out; } - (void) glusterd_do_gsync_log_rotation_mst_slv (param->volinfo, slave, NULL); - return 0; -} + ret = dict_get_str (dict, "host-uuid", &host_uuid); + if (ret < 0) + goto out; -static int -glusterd_do_gsync_log_rotation_mst (glusterd_volinfo_t *volinfo) -{ - glusterd_gsync_status_temp_t param = {0, }; + ret = dict_get_str (dict, "source", &filename); + if (ret < 0) { + gf_log ("", GF_LOG_ERROR, "Unable to fetch" + " filename from dict."); + *op_errstr = gf_strdup ("command unsuccessful"); + goto out; + } + snprintf (abs_filename, sizeof(abs_filename), + "%s/%s", priv->workdir, filename); - GF_ASSERT (volinfo); + uuid_utoa_r (MY_UUID, uuid_str); + if (!strcmp (uuid_str, host_uuid)) { + ret = lstat (abs_filename, &stbuf); + if (ret) { + snprintf (errmsg, sizeof (errmsg), "Source file" + " does not exist in %s", priv->workdir); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; + } - param.volinfo = volinfo; - dict_foreach (volinfo->gsync_slaves, _iterate_log_rotate_mst_slv, ¶m); - return 0; -} + contents = GF_CALLOC(1, stbuf.st_size+1, gf_common_mt_char); + if (!contents) { + snprintf (errmsg, sizeof (errmsg), + "Unable to allocate memory"); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + goto out; + } -static int -glusterd_rotate_gsync_all () -{ - int32_t ret = 0; - glusterd_conf_t *priv = NULL; - glusterd_volinfo_t *volinfo = NULL; + fd = open (abs_filename, O_RDONLY); + if (fd < 0) { + snprintf (errmsg, sizeof (errmsg), "Unable to open %s", + abs_filename); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + goto out; + } - GF_ASSERT (THIS); - priv = THIS->private; + do { + ret = read (fd, buf, sizeof(buf)); + if (ret > 0) { + memcpy (contents+bytes_read, buf, ret); + bytes_read += ret; + memset (buf, '\0', sizeof(buf)); + } + } while (ret > 0); - GF_ASSERT (priv); + if (bytes_read != stbuf.st_size) { + snprintf (errmsg, sizeof (errmsg), "Unable to read all " + "the data from %s", abs_filename); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + goto out; + } - list_for_each_entry (volinfo, &priv->volumes, vol_list) { - ret = glusterd_do_gsync_log_rotation_mst (volinfo); - if (ret) + ret = dict_set_int32 (dict, "contents_size", stbuf.st_size); + if (ret) { + snprintf (errmsg, sizeof (errmsg), "Unable to set" + " contents size in dict."); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); goto out; - } + } - out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - return ret; -} + ret = dict_set_int32 (dict, "file_mode", + (int32_t)stbuf.st_mode); + if (ret) { + snprintf (errmsg, sizeof (errmsg), "Unable to set" + " file mode in dict."); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; + } -static int -glusterd_rotate_gsync_logs (dict_t *dict, char **op_errstr, dict_t *rsp_dict) -{ - char *slave = NULL; - char *volname = NULL; - char errmsg[1024] = {0,}; - gf_boolean_t exists = _gf_false; - glusterd_volinfo_t *volinfo = NULL; - char **linearr = NULL; - int ret = 0; + ret = dict_set_bin (dict, "common_pem_contents", + contents, stbuf.st_size); + if (ret) { + snprintf (errmsg, sizeof (errmsg), "Unable to set" + " pem contents in dict."); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; + } + close (fd); + } else { + ret = dict_get_bin (dict, "common_pem_contents", + (void **) &contents); + if (ret) { + snprintf (errmsg, sizeof (errmsg), "Unable to get" + " pem contents in dict."); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; + } - ret = dict_get_str (dict, "master", &volname); - if (ret < 0) { - ret = glusterd_rotate_gsync_all (); - goto out; - } + ret = dict_get_int32 (dict, "contents_size", &contents_size); + if (ret) { + snprintf (errmsg, sizeof (errmsg), "Unable to set" + " contents size in dict."); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; + } - exists = glusterd_check_volume_exists (volname); - ret = glusterd_volinfo_find (volname, &volinfo); - if ((ret) || (!exists)) { - snprintf (errmsg, sizeof(errmsg), "Volume %s does not" - " exist", volname); - gf_log ("", GF_LOG_WARNING, "%s", errmsg); - *op_errstr = gf_strdup (errmsg); - ret = -1; - goto out; - } + ret = dict_get_int32 (dict, "file_mode", &file_mode); + if (ret) { + snprintf (errmsg, sizeof (errmsg), "Unable to get" + " file mode in dict."); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; + } - ret = dict_get_str (dict, "slave", &slave); - if (ret < 0) { - ret = glusterd_do_gsync_log_rotation_mst (volinfo); - goto out; - } + fd = open (abs_filename, O_WRONLY | O_TRUNC | O_CREAT, 0600); + if (fd < 0) { + snprintf (errmsg, sizeof (errmsg), "Unable to open %s", + abs_filename); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + goto out; + } - /* for the given slave use the normalized url */ - ret = glusterd_urltransform_single (slave, "normalize", &linearr); - if (ret == -1) - goto out; + bytes_writen = write (fd, contents, contents_size); - ret = glusterd_do_gsync_log_rotation_mst_slv (volinfo, linearr[0], - op_errstr); - if (ret) - gf_log ("gsyncd", GF_LOG_ERROR, "gsyncd log-rotate failed for" - " %s & %s", volname, slave); + if (bytes_writen != contents_size) { + snprintf (errmsg, sizeof (errmsg), "Failed to write" + " to %s", abs_filename); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + goto out; + } - glusterd_urltransform_free (linearr, 1); - out: + fchmod (fd, file_mode); + close (fd); + } + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } - int glusterd_op_gsync_set (dict_t *dict, char **op_errstr, dict_t *rsp_dict) { @@ -2153,11 +3484,16 @@ glusterd_op_gsync_set (dict_t *dict, char **op_errstr, dict_t *rsp_dict) dict_t *resp_dict = NULL; char *host_uuid = NULL; char *slave = NULL; + char *slave_ip = NULL; + char *slave_vol = NULL; char *volname = NULL; + char *path_list = NULL; glusterd_volinfo_t *volinfo = NULL; glusterd_conf_t *priv = NULL; + gf_boolean_t is_force = _gf_false; char *status_msg = NULL; - uuid_t uuid = {0, }; + gf_boolean_t is_running = _gf_false; + char *conf_path = NULL; GF_ASSERT (THIS); GF_ASSERT (THIS->private); @@ -2183,15 +3519,28 @@ glusterd_op_gsync_set (dict_t *dict, char **op_errstr, dict_t *rsp_dict) goto out; } - if (type == GF_GSYNC_OPTION_TYPE_ROTATE) { - ret = glusterd_rotate_gsync_logs (dict, op_errstr, resp_dict); + ret = dict_get_str (dict, "slave", &slave); + if (ret < 0) goto out; + ret = dict_get_str (dict, "slave_ip", &slave_ip); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to fetch slave volume name."); + goto out; } - ret = dict_get_str (dict, "slave", &slave); - if (ret < 0) + ret = dict_get_str (dict, "slave_vol", &slave_vol); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to fetch slave volume name."); goto out; + } + + ret = dict_get_str (dict, "conf_path", &conf_path); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to fetch conf file path."); + goto out; + } if (dict_get_str (dict, "master", &volname) == 0) { ret = glusterd_volinfo_find (volname, &volinfo); @@ -2200,11 +3549,31 @@ glusterd_op_gsync_set (dict_t *dict, char **op_errstr, dict_t *rsp_dict) volname); goto out; } + + ret = glusterd_get_local_brickpaths (volinfo, &path_list); } if (type == GF_GSYNC_OPTION_TYPE_CONFIG) { - ret = glusterd_gsync_configure (volinfo, slave, dict, resp_dict, - op_errstr); + ret = glusterd_gsync_configure (volinfo, slave, path_list, + dict, resp_dict, op_errstr); + + ret = dict_set_str (resp_dict, "conf_path", conf_path); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to store conf_file_path."); + goto out; + } + goto out; + } + + if (type == GF_GSYNC_OPTION_TYPE_DELETE) { + ret = glusterd_remove_slave_in_info(volinfo, slave, op_errstr); + if (ret && !is_force && path_list) + goto out; + + ret = glusterd_gsync_delete (volinfo, slave, slave_ip, + slave_vol, path_list, dict, + resp_dict, op_errstr); goto out; } @@ -2213,48 +3582,652 @@ glusterd_op_gsync_set (dict_t *dict, char **op_errstr, dict_t *rsp_dict) goto out; } + is_force = dict_get_str_boolean (dict, "force", _gf_false); + if (type == GF_GSYNC_OPTION_TYPE_START) { - ret = glusterd_set_marker_gsync (volinfo); + ret = glusterd_set_gsync_confs (volinfo); if (ret != 0) { - gf_log ("", GF_LOG_WARNING, "marker start failed"); + gf_log ("", GF_LOG_WARNING, "marker/changelog start failed"); *op_errstr = gf_strdup ("failed to initialize indexing"); ret = -1; goto out; } - ret = glusterd_store_slave_in_info(volinfo, slave, - host_uuid, op_errstr); - if (ret) - goto out; - ret = glusterd_start_gsync (volinfo, slave, host_uuid, - op_errstr); + ret = glusterd_start_gsync (volinfo, slave, path_list, + conf_path, host_uuid, op_errstr); } if (type == GF_GSYNC_OPTION_TYPE_STOP) { - - ret = glusterd_gsync_get_uuid (slave, volinfo, uuid); - if (ret) { + ret = glusterd_check_gsync_running_local (volinfo->volname, + slave, conf_path, + &is_running); + if (!ret && !is_force && path_list && + (_gf_true != is_running)) { gf_log ("", GF_LOG_WARNING, GEOREP" is not set up for" "%s(master) and %s(slave)", volname, slave); *op_errstr = strdup (GEOREP" is not set up"); goto out; } - ret = glusterd_remove_slave_in_info(volinfo, slave, op_errstr); - if (ret) + ret = stop_gsync (volname, slave, &status_msg, conf_path, is_force); + if (ret == 0 && status_msg) + ret = dict_set_str (resp_dict, "gsync-status", + status_msg); + if (ret != 0 && !is_force && path_list) + *op_errstr = gf_strdup ("internal error"); + + if (!ret) { + ret = glusterd_create_status_file (volinfo->volname, + slave, slave_ip, + slave_vol, "Stopped"); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to update" + "state_file. Error : %s", + strerror (errno)); + } + } + } + +out: + if (path_list) { + GF_FREE (path_list); + path_list = NULL; + } + + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_get_slave_details_confpath (glusterd_volinfo_t *volinfo, dict_t *dict, + char **slave_ip, char **slave_vol, + char **conf_path, char **op_errstr) +{ + int ret = -1; + char confpath[PATH_MAX] = ""; + glusterd_conf_t *priv = NULL; + char *slave = NULL; + + GF_ASSERT (THIS); + priv = THIS->private; + GF_ASSERT (priv); + + ret = dict_get_str (dict, "slave", &slave); + if (ret || !slave) { + gf_log ("", GF_LOG_ERROR, "Unable to fetch slave from dict"); + ret = -1; + goto out; + } + + ret = glusterd_get_slave_info (slave, slave_ip, slave_vol, op_errstr); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to fetch slave details."); + ret = -1; + goto out; + } + + ret = dict_set_str (dict, "slave_ip", *slave_ip); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to store slave IP."); + goto out; + } + + ret = dict_set_str (dict, "slave_vol", *slave_vol); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to store slave volume name."); + goto out; + } + + ret = snprintf (confpath, sizeof(confpath) - 1, + "%s/"GEOREP"/%s_%s_%s/gsyncd.conf", + priv->workdir, volinfo->volname, + *slave_ip, *slave_vol); + confpath[ret] = '\0'; + *conf_path = gf_strdup (confpath); + if (!(*conf_path)) { + gf_log ("", GF_LOG_ERROR, + "Unable to gf_strdup. Error: %s", strerror (errno)); + ret = -1; + goto out; + } + + ret = dict_set_str (dict, "conf_path", *conf_path); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to store conf_path"); + goto out; + } + +out: + gf_log ("", GF_LOG_DEBUG,"Returning %d", ret); + return ret; + +} + +static int +glusterd_get_slave_info (char *slave, char **slave_ip, + char **slave_vol, char **op_errstr) +{ + char *tmp = NULL; + char *save_ptr = NULL; + char **linearr = NULL; + int32_t ret = -1; + char errmsg[PATH_MAX] = ""; + + ret = glusterd_urltransform_single (slave, "normalize", + &linearr); + if (ret == -1) { + ret = snprintf (errmsg, sizeof(errmsg) - 1, + "Invalid Url: %s", slave); + errmsg[ret] = '\0'; + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "Failed to normalize url"); + goto out; + } + + tmp = strtok_r (linearr[0], "/", &save_ptr); + tmp = strtok_r (NULL, "/", &save_ptr); + slave = strtok_r (tmp, ":", &save_ptr); + if (slave) { + ret = glusterd_mountbroker_check (&slave, op_errstr); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Invalid slave url: %s", *op_errstr); goto out; + } - if (uuid_compare (MY_UUID, uuid) != 0) { + *slave_ip = gf_strdup (slave); + if (!*slave_ip) { + gf_log ("", GF_LOG_ERROR, + "Failed to gf_strdup"); + ret = -1; goto out; } + gf_log ("", GF_LOG_DEBUG, "Slave IP : %s", *slave_ip); + ret = 0; + } else { + gf_log ("", GF_LOG_ERROR, "Invalid slave name"); + goto out; + } - ret = stop_gsync (volname, slave, &status_msg); - if (ret == 0 && status_msg) - ret = dict_set_str (resp_dict, "gsync-status", - status_msg); - if (ret != 0) - *op_errstr = gf_strdup ("internal error"); + slave = strtok_r (NULL, ":", &save_ptr); + if (slave) { + *slave_vol = gf_strdup (slave); + if (!*slave_vol) { + gf_log ("", GF_LOG_ERROR, + "Failed to gf_strdup"); + ret = -1; + goto out; + } + gf_log ("", GF_LOG_DEBUG, "Slave Vol : %s", *slave_vol); + ret = 0; + } else { + gf_log ("", GF_LOG_ERROR, "Invalid slave name"); + goto out; + } + +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +static void +runinit_gsyncd_setrx (runner_t *runner, char *conf_path) +{ + runinit (runner); + runner_add_args (runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL); + runner_argprintf (runner, "%s", conf_path); + runner_add_arg (runner, "--config-set-rx"); +} + +static int +glusterd_check_gsync_present (int *valid_state) +{ + char buff[PATH_MAX] = {0, }; + runner_t runner = {0,}; + char *ptr = NULL; + int ret = 0; + + runinit (&runner); + runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "--version", NULL); + runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); + ret = runner_start (&runner); + if (ret == -1) { + if (errno == ENOENT) { + gf_log ("glusterd", GF_LOG_INFO, GEOREP + " module not installed in the system"); + *valid_state = 0; + } + else { + gf_log ("glusterd", GF_LOG_ERROR, GEOREP + " module not working as desired"); + *valid_state = -1; + } + goto out; + } + + ptr = fgets(buff, sizeof(buff), runner_chio (&runner, STDOUT_FILENO)); + if (ptr) { + if (!strstr (buff, "gsyncd")) { + ret = -1; + gf_log ("glusterd", GF_LOG_ERROR, GEOREP" module not " + "working as desired"); + *valid_state = -1; + goto out; + } + } else { + ret = -1; + gf_log ("glusterd", GF_LOG_ERROR, GEOREP" module not " + "working as desired"); + *valid_state = -1; + goto out; + } + + ret = 0; + out: + + runner_end (&runner); + + gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); + return ret; + +} + +static int +create_conf_file (glusterd_conf_t *conf, char *conf_path) +#define RUN_GSYNCD_CMD do { \ + ret = runner_run_reuse (&runner); \ + if (ret == -1) { \ + runner_log (&runner, "glusterd", GF_LOG_ERROR, "command failed"); \ + runner_end (&runner); \ + goto out; \ + } \ + runner_end (&runner); \ +} while (0) +{ + int ret = 0; + runner_t runner = {0,}; + char georepdir[PATH_MAX] = {0,}; + int valid_state = 0; + + valid_state = -1; + ret = glusterd_check_gsync_present (&valid_state); + if (-1 == ret) { + ret = valid_state; + goto out; + } + + ret = snprintf (georepdir, sizeof(georepdir) - 1, "%s/"GEOREP, + conf->workdir); + georepdir[ret] = '\0'; + + /************ + * master pre-configuration + ************/ + + /* remote-gsyncd */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args (&runner, "remote-gsyncd", GSYNCD_PREFIX"/gsyncd", ".", ".", NULL); + RUN_GSYNCD_CMD; + + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args (&runner, "remote-gsyncd", "/nonexistent/gsyncd", + ".", "^ssh:", NULL); + RUN_GSYNCD_CMD; + + /* gluster-command-dir */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args (&runner, "gluster-command-dir", SBIN_DIR"/", + ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* gluster-params */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args (&runner, "gluster-params", + "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true", + ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* ssh-command */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_arg (&runner, "ssh-command"); + runner_argprintf (&runner, + "ssh -oPasswordAuthentication=no " + "-oStrictHostKeyChecking=no " + "-i %s/secret.pem", georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* pid-file */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_arg (&runner, "pid-file"); + runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}.pid", georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* state-file */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_arg (&runner, "state-file"); + runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}.status", georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* state-detail-file */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_arg (&runner, "state-detail-file"); + runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}-detail.status", georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* state-socket */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_arg (&runner, "state-socket-unencoded"); + runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}.socket", georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* socketdir */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args (&runner, "socketdir", GLUSTERD_SOCK_DIR, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* log-file */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args (&runner, + "log-file", + DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"/${mastervol}/${eSlave}.log", + ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* gluster-log-file */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args (&runner, + "gluster-log-file", + DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"/${mastervol}/${eSlave}${local_id}.gluster.log", + ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* ignore-deletes */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args (&runner, "ignore-deletes", "true", ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* special-sync-mode */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args (&runner, "special-sync-mode", "partial", ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* change-detector == changelog */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args(&runner, "change-detector", "changelog", ".", ".", NULL); + RUN_GSYNCD_CMD; + + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_arg(&runner, "working-dir"); + runner_argprintf(&runner, "%s/${mastervol}/${eSlave}", + DEFAULT_VAR_RUN_DIRECTORY); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /************ + * slave pre-configuration + ************/ + + /* gluster-command-dir */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args (&runner, "gluster-command-dir", SBIN_DIR"/", + ".", NULL); + RUN_GSYNCD_CMD; + + /* gluster-params */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args (&runner, "gluster-params", + "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true", + ".", NULL); + RUN_GSYNCD_CMD; + + /* log-file */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args (&runner, + "log-file", + DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/${session_owner}:${eSlave}.log", + ".", NULL); + RUN_GSYNCD_CMD; + + /* MountBroker log-file */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args (&runner, + "log-file-mbr", + DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/mbr/${session_owner}:${eSlave}.log", + ".", NULL); + RUN_GSYNCD_CMD; + + /* gluster-log-file */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args (&runner, + "gluster-log-file", + DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/${session_owner}:${eSlave}.gluster.log", + ".", NULL); + RUN_GSYNCD_CMD; + + out: + return ret ? -1 : 0; +} + +static int +glusterd_create_essential_dir_files (glusterd_volinfo_t *volinfo, dict_t *dict, + char *slave, char *slave_ip, + char *slave_vol, char **op_errstr) +{ + int ret = -1; + char *conf_path = NULL; + char *statefile = NULL; + char buf[PATH_MAX] = ""; + char errmsg[PATH_MAX] = ""; + glusterd_conf_t *conf = NULL; + struct stat stbuf = {0,}; + + GF_ASSERT (THIS); + conf = THIS->private; + + ret = dict_get_str (dict, "conf_path", &conf_path); + if (ret) { + snprintf (errmsg, sizeof (errmsg), + "Unable to fetch conf file path."); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; + } + + ret = dict_get_str (dict, "statefile", &statefile); + if (ret) { + snprintf (errmsg, sizeof (errmsg), + "Unable to fetch statefile path."); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; + } + + ret = snprintf (buf, sizeof(buf) - 1, "%s/"GEOREP"/%s_%s_%s", + conf->workdir, volinfo->volname, slave_ip, slave_vol); + buf[ret] = '\0'; + ret = mkdir_p (buf, 0777, _gf_true); + if (ret) { + snprintf (errmsg, sizeof (errmsg), "Unable to create %s" + ". Error : %s", buf, strerror (errno)); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; + } + + ret = snprintf (buf, PATH_MAX, DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"/%s", + volinfo->volname); + buf[ret] = '\0'; + ret = mkdir_p (buf, 0777, _gf_true); + if (ret) { + snprintf (errmsg, sizeof (errmsg), "Unable to create %s" + ". Error : %s", buf, strerror (errno)); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; + } + + ret = lstat (conf_path, &stbuf); + if (!ret) { + gf_log ("", GF_LOG_DEBUG, "Session already running." + " Not creating config file again."); + } else { + ret = create_conf_file (conf, conf_path); + if (ret || lstat (conf_path, &stbuf)) { + snprintf (errmsg, sizeof (errmsg), "Failed to create" + " config file(%s).", conf_path); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; + } + } + + ret = lstat (statefile, &stbuf); + if (!ret) { + gf_log ("", GF_LOG_DEBUG, "Session already running." + " Not creating status file again."); + goto out; + } else { + ret = glusterd_create_status_file (volinfo->volname, slave, + slave_ip, slave_vol, + "Not Started"); + if (ret || lstat (statefile, &stbuf)) { + snprintf (errmsg, sizeof (errmsg), "Unable to create %s" + ". Error : %s", statefile, strerror (errno)); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + goto out; + } + } + +out: + gf_log ("", GF_LOG_DEBUG,"Returning %d", ret); + return ret; +} + +int +glusterd_op_gsync_create (dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + char common_pem_file[PATH_MAX] = ""; + char errmsg[PATH_MAX] = ""; + char hooks_args[PATH_MAX] = ""; + char uuid_str [64] = ""; + char *host_uuid = NULL; + char *slave_ip = NULL; + char *slave_vol = NULL; + char *arg_buf = NULL; + char *volname = NULL; + char *slave = NULL; + int32_t ret = -1; + int32_t is_pem_push = -1; + gf_boolean_t is_force = -1; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + + GF_ASSERT (THIS); + conf = THIS->private; + GF_ASSERT (conf); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + ret = glusterd_op_gsync_args_get (dict, op_errstr, + &volname, &slave, &host_uuid); + if (ret) + goto out; + + snprintf (common_pem_file, sizeof(common_pem_file), + "%s"GLUSTERD_COMMON_PEM_PUB_FILE, conf->workdir); + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Volinfo for %s" + " (master) not found", volname); + goto out; + } + + ret = dict_get_str (dict, "slave_vol", &slave_vol); + if (ret) { + snprintf (errmsg, sizeof (errmsg), + "Unable to fetch slave volume name."); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; + } + + ret = dict_get_str (dict, "slave_ip", &slave_ip); + if (ret) { + snprintf (errmsg, sizeof (errmsg), + "Unable to fetch slave IP."); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + goto out; + } + + is_force = dict_get_str_boolean (dict, "force", _gf_false); + + uuid_utoa_r (MY_UUID, uuid_str); + if (!strcmp (uuid_str, host_uuid)) { + ret = dict_get_int32 (dict, "push_pem", &is_pem_push); + if (!ret && is_pem_push) { + gf_log ("", GF_LOG_DEBUG, "Trying to setup" + " pem files in slave"); + is_pem_push = 1; + } else + is_pem_push = 0; + + snprintf(hooks_args, sizeof(hooks_args), + "is_push_pem=%d pub_file=%s slave_ip=%s", + is_pem_push, common_pem_file, slave_ip); + + } else + snprintf(hooks_args, sizeof(hooks_args), + "This argument will stop the hooks script"); + + arg_buf = gf_strdup (hooks_args); + if (!arg_buf) { + gf_log ("", GF_LOG_ERROR, "Failed to" + " gf_strdup"); + if (is_force) { + ret = 0; + goto create_essentials; + } + ret = -1; + goto out; + } + + ret = dict_set_str (dict, "hooks_args", arg_buf); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Failed to set" + " hooks_args in dict."); + if (is_force) { + ret = 0; + goto create_essentials; + } + goto out; + } + +create_essentials: + + ret = glusterd_create_essential_dir_files (volinfo, dict, slave, + slave_ip, slave_vol, + op_errstr); + if (ret) + goto out; + + ret = glusterd_store_slave_in_info (volinfo, slave, + host_uuid, op_errstr, + is_force); + if (ret) { + snprintf (errmsg, sizeof (errmsg), "Unable to store" + " slave info."); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; } out: diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 328971c2d..71d076624 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -33,6 +33,7 @@ #include "glusterd-op-sm.h" #include "glusterd-utils.h" #include "glusterd-store.h" +#include "glusterd-locks.h" #include "glusterd1-xdr.h" #include "cli1-xdr.h" @@ -54,6 +55,32 @@ #include <lvm2app.h> #endif +extern uuid_t global_txn_id; + +int glusterd_big_locked_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, + void *data, rpc_clnt_notify_t notify_fn) +{ + glusterd_conf_t *priv = THIS->private; + int ret = -1; + synclock_lock (&priv->big_lock); + ret = notify_fn (rpc, mydata, event, data); + synclock_unlock (&priv->big_lock); + return ret; +} + +int glusterd_big_locked_handler (rpcsvc_request_t *req, rpcsvc_actor actor_fn) +{ + glusterd_conf_t *priv = THIS->private; + int ret = -1; + + synclock_lock (&priv->big_lock); + ret = actor_fn (req); + synclock_unlock (&priv->big_lock); + + return ret; +} + static int glusterd_handle_friend_req (rpcsvc_request_t *req, uuid_t uuid, char *hostname, int port, @@ -75,8 +102,8 @@ glusterd_handle_friend_req (rpcsvc_request_t *req, uuid_t uuid, ret = glusterd_friend_find (uuid, rhost, &peerinfo); if (ret) { - ret = glusterd_xfer_friend_add_resp (req, rhost, port, -1, - GF_PROBE_UNKNOWN_PEER); + ret = glusterd_xfer_friend_add_resp (req, hostname, rhost, port, + -1, GF_PROBE_UNKNOWN_PEER); if (friend_req->vols.vols_val) { free (friend_req->vols.vols_val); friend_req->vols.vols_val = NULL; @@ -230,13 +257,14 @@ glusterd_add_peer_detail_to_dict (glusterd_peerinfo_t *peerinfo, int ret = -1; char key[256] = {0, }; + char *peer_uuid_str = NULL; GF_ASSERT (peerinfo); GF_ASSERT (friends); snprintf (key, 256, "friend%d.uuid", count); - uuid_utoa_r (peerinfo->uuid, peerinfo->uuid_str); - ret = dict_set_str (friends, key, peerinfo->uuid_str); + peer_uuid_str = gd_peer_uuid_str (peerinfo); + ret = dict_set_str (friends, key, peer_uuid_str); if (ret) goto out; @@ -310,7 +338,9 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, char *volume_id_str = NULL; struct args_pack pack = {0,}; xlator_t *this = NULL; - +#ifdef HAVE_BD_XLATOR + int caps = 0; +#endif GF_ASSERT (volinfo); GF_ASSERT (volumes); @@ -335,6 +365,21 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, if (ret) goto out; + /* As of now, the snap volumes are also displayed as part of + volume info command. So this change is to display whether + the volume is original volume or the snap_volume. If + displaying of snap volumes in volume info o/p is not needed + this should be removed. + */ + snprintf (key, 256, "volume%d.snap_volume", count); + ret = dict_set_int32 (volumes, key, volinfo->is_snap_volume); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "failed to set whether " + "the volume is a snap volume or actual volume (%s)", + volinfo->volname); + goto out; + } + snprintf (key, 256, "volume%d.brick_count", count); ret = dict_set_int32 (volumes, key, volinfo->brick_count); if (ret) @@ -375,14 +420,76 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, goto out; #ifdef HAVE_BD_XLATOR - snprintf (key, 256, "volume%d.backend", count); - ret = dict_set_int32 (volumes, key, volinfo->backend); - if (ret) - goto out; + if (volinfo->caps) { + caps = 0; + snprintf (key, 256, "volume%d.xlator0", count); + buf = GF_MALLOC (256, gf_common_mt_char); + if (!buf) { + ret = ENOMEM; + goto out; + } + if (volinfo->caps & CAPS_BD) + snprintf (buf, 256, "BD"); + ret = dict_set_dynstr (volumes, key, buf); + if (ret) { + GF_FREE (buf); + goto out; + } + + if (volinfo->caps & CAPS_THIN) { + snprintf (key, 256, "volume%d.xlator0.caps%d", count, + caps++); + buf = GF_MALLOC (256, gf_common_mt_char); + if (!buf) { + ret = ENOMEM; + goto out; + } + snprintf (buf, 256, "thin"); + ret = dict_set_dynstr (volumes, key, buf); + if (ret) { + GF_FREE (buf); + goto out; + } + } + + if (volinfo->caps & CAPS_OFFLOAD_COPY) { + snprintf (key, 256, "volume%d.xlator0.caps%d", count, + caps++); + buf = GF_MALLOC (256, gf_common_mt_char); + if (!buf) { + ret = ENOMEM; + goto out; + } + snprintf (buf, 256, "offload_copy"); + ret = dict_set_dynstr (volumes, key, buf); + if (ret) { + GF_FREE (buf); + goto out; + } + } + + if (volinfo->caps & CAPS_OFFLOAD_SNAPSHOT) { + snprintf (key, 256, "volume%d.xlator0.caps%d", count, + caps++); + buf = GF_MALLOC (256, gf_common_mt_char); + if (!buf) { + ret = ENOMEM; + goto out; + } + snprintf (buf, 256, "offload_snapshot"); + ret = dict_set_dynstr (volumes, key, buf); + if (ret) { + GF_FREE (buf); + goto out; + } + } + + } #endif list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { char brick[1024] = {0,}; + char brick_uuid[64] = {0,}; snprintf (key, 256, "volume%d.brick%d", count, i); snprintf (brick, 1024, "%s:%s", brickinfo->hostname, brickinfo->path); @@ -390,6 +497,25 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, ret = dict_set_dynstr (volumes, key, buf); if (ret) goto out; + snprintf (key, 256, "volume%d.brick%d.uuid", count, i); + snprintf (brick_uuid, 64, "%s", uuid_utoa (brickinfo->uuid)); + buf = gf_strdup (brick_uuid); + if (!buf) + goto out; + ret = dict_set_dynstr (volumes, key, buf); + if (ret) + goto out; + +#ifdef HAVE_BD_XLATOR + if (volinfo->caps & CAPS_BD) { + snprintf (key, 256, "volume%d.vg%d", count, i); + snprintf (brick, 1024, "%s", brickinfo->vg); + buf = gf_strdup (brick); + ret = dict_set_dynstr (volumes, key, buf); + if (ret) + goto out; + } +#endif i++; } @@ -453,10 +579,17 @@ int32_t glusterd_op_txn_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx, char *err_str, size_t err_len) { - int32_t ret = -1; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; - int32_t locked = 0; + int32_t ret = -1; + dict_t *dict = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int32_t locked = 0; + char *tmp = NULL; + char *volname = NULL; + uuid_t *txn_id = NULL; + uuid_t *originator_uuid = NULL; + glusterd_op_info_t txn_op_info = {{0},}; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; GF_ASSERT (req); GF_ASSERT ((op > GD_OP_NONE) && (op < GD_OP_MAX)); @@ -467,46 +600,157 @@ glusterd_op_txn_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx, priv = this->private; GF_ASSERT (priv); - ret = glusterd_lock (MY_UUID); + dict = ctx; + + /* Generate a transaction-id for this operation and + * save it in the dict. This transaction id distinguishes + * each transaction, and helps separate opinfos in the + * op state machine. */ + txn_id = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t); + if (!txn_id) + goto out; + + uuid_generate (*txn_id); + + ret = dict_set_bin (dict, "transaction_id", + txn_id, sizeof(*txn_id)); if (ret) { gf_log (this->name, GF_LOG_ERROR, - "Unable to acquire lock on localhost, ret: %d", ret); - snprintf (err_str, err_len, "Another transaction is in progress. " - "Please try again after sometime."); + "Failed to set transaction id."); goto out; } + gf_log (this->name, GF_LOG_DEBUG, + "Transaction_id = %s", uuid_utoa (*txn_id)); + + /* Save the MY_UUID as the originator_uuid. This originator_uuid + * will be used by is_origin_glusterd() to determine if a node + * is the originator node for a command. */ + originator_uuid = GF_CALLOC (1, sizeof(uuid_t), + gf_common_mt_uuid_t); + if (!originator_uuid) { + ret = -1; + goto out; + } + + uuid_copy (*originator_uuid, MY_UUID); + ret = dict_set_bin (dict, "originator_uuid", + originator_uuid, sizeof (uuid_t)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set originator uuid."); + goto out; + } + + /* Based on the op_version, acquire a cluster or mgmt_v3 lock */ + if (priv->op_version < 3) { + ret = glusterd_lock (MY_UUID); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire lock on localhost, ret: %d", + ret); + snprintf (err_str, err_len, + "Another transaction is in progress. " + "Please try again after sometime."); + goto out; + } + } else { + /* If no volname is given as a part of the command, locks will + * not be held */ + ret = dict_get_str (dict, "volname", &tmp); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Failed to get volume " + "name"); + goto local_locking_done; + } else { + /* Use a copy of volname, as cli response will be + * sent before the unlock, and the volname in the + * dict, might be removed */ + volname = gf_strdup (tmp); + if (!volname) + goto out; + } + + ret = glusterd_mgmt_v3_lock (volname, MY_UUID, "vol"); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire lock for %s", volname); + snprintf (err_str, err_len, + "Another transaction is in progress for %s. " + "Please try again after sometime.", volname); + goto out; + } + } + locked = 1; gf_log (this->name, GF_LOG_DEBUG, "Acquired lock on localhost"); - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_START_LOCK, NULL); +local_locking_done: + + /* If no volname is given as a part of the command, locks will + * not be held, hence sending stage event. */ + if (volname) + event_type = GD_OP_EVENT_START_LOCK; + else { + txn_op_info.state.state = GD_OP_STATE_LOCK_SENT; + event_type = GD_OP_EVENT_ALL_ACC; + } + + /* Save opinfo for this transaction with the transaction id */ + glusterd_txn_opinfo_init (&txn_op_info, NULL, &op, ctx, req); + + ret = glusterd_set_txn_opinfo (txn_id, &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set transaction's opinfo"); + if (ctx) + dict_unref (ctx); + goto out; + } + + ret = glusterd_op_sm_inject_event (event_type, txn_id, ctx); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to acquire cluster" " lock."); goto out; } - glusterd_op_set_op (op); - glusterd_op_set_ctx (ctx); - glusterd_op_set_req (req); - - out: - if (locked && ret) - glusterd_unlock (MY_UUID); + if (locked && ret) { + /* Based on the op-version, we release the + * cluster or mgmt_v3 lock */ + if (priv->op_version < 3) + glusterd_unlock (MY_UUID); + else { + ret = glusterd_mgmt_v3_unlock (volname, MY_UUID, + "vol"); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to release lock for %s", + volname); + ret = -1; + } + } + + if (volname) + GF_FREE (volname); gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } int -glusterd_handle_cluster_lock (rpcsvc_request_t *req) +__glusterd_handle_cluster_lock (rpcsvc_request_t *req) { - gd1_mgmt_cluster_lock_req lock_req = {{0},}; - int32_t ret = -1; - glusterd_op_lock_ctx_t *ctx = NULL; - glusterd_peerinfo_t *peerinfo = NULL; - xlator_t *this = NULL; + dict_t *op_ctx = NULL; + int32_t ret = -1; + gd1_mgmt_cluster_lock_req lock_req = {{0},}; + glusterd_op_lock_ctx_t *ctx = NULL; + glusterd_op_t op = GD_OP_EVENT_LOCK; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_op_info_t txn_op_info = {{0},}; + uuid_t *txn_id = &global_txn_id; + xlator_t *this = NULL; this = THIS; GF_ASSERT (this); @@ -541,8 +785,29 @@ glusterd_handle_cluster_lock (rpcsvc_request_t *req) uuid_copy (ctx->uuid, lock_req.uuid); ctx->req = req; + ctx->dict = NULL; + + op_ctx = dict_new (); + if (!op_ctx) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set new dict"); + goto out; + } + + glusterd_txn_opinfo_init (&txn_op_info, NULL, &op, op_ctx, req); + + ret = glusterd_set_txn_opinfo (txn_id, &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set transaction's opinfo"); + dict_unref (txn_op_info.op_ctx); + goto out; + } - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_LOCK, ctx); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_LOCK, txn_id, ctx); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to inject event GD_OP_EVENT_LOCK"); out: gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -554,6 +819,13 @@ out: } int +glusterd_handle_cluster_lock (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_cluster_lock); +} + +int glusterd_req_ctx_create (rpcsvc_request_t *rpc_req, glusterd_op_t op, uuid_t uuid, char *buf_val, size_t buf_len, @@ -604,13 +876,16 @@ out: } int -glusterd_handle_stage_op (rpcsvc_request_t *req) +__glusterd_handle_stage_op (rpcsvc_request_t *req) { int32_t ret = -1; glusterd_req_ctx_t *req_ctx = NULL; gd1_mgmt_stage_op_req op_req = {{0},}; glusterd_peerinfo_t *peerinfo = NULL; xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; + glusterd_op_info_t txn_op_info = {{0},}; + glusterd_op_sm_state_info_t state; this = THIS; GF_ASSERT (this); @@ -639,7 +914,36 @@ glusterd_handle_stage_op (rpcsvc_request_t *req) if (ret) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_STAGE_OP, req_ctx); + ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + + /* In cases where there is no volname, the receivers won't have a + * transaction opinfo created, as for those operations, the locking + * phase where the transaction opinfos are created, won't be called. */ + ret = glusterd_get_txn_opinfo (txn_id, &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "No transaction's opinfo set"); + + state.state = GD_OP_STATE_LOCKED; + glusterd_txn_opinfo_init (&txn_op_info, &state, + &op_req.op, req_ctx->dict, req); + + ret = glusterd_set_txn_opinfo (txn_id, &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set transaction's opinfo"); + dict_unref (req_ctx->dict); + goto out; + } + } + + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_STAGE_OP, + txn_id, req_ctx); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to inject event GD_OP_EVENT_STAGE_OP"); out: free (op_req.buf.buf_val);//malloced by xdr @@ -649,13 +953,21 @@ glusterd_handle_stage_op (rpcsvc_request_t *req) } int -glusterd_handle_commit_op (rpcsvc_request_t *req) +glusterd_handle_stage_op (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_stage_op); +} + + +int +__glusterd_handle_commit_op (rpcsvc_request_t *req) { int32_t ret = -1; glusterd_req_ctx_t *req_ctx = NULL; gd1_mgmt_commit_op_req op_req = {{0},}; glusterd_peerinfo_t *peerinfo = NULL; xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; this = THIS; GF_ASSERT (this); @@ -686,11 +998,12 @@ glusterd_handle_commit_op (rpcsvc_request_t *req) if (ret) goto out; - ret = glusterd_op_init_ctx (op_req.op); - if (ret) - goto out; + ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id); - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_OP, req_ctx); + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_OP, + txn_id, req_ctx); out: free (op_req.buf.buf_val);//malloced by xdr @@ -700,20 +1013,28 @@ out: } int -glusterd_handle_cli_probe (rpcsvc_request_t *req) +glusterd_handle_commit_op (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_commit_op); +} + +int +__glusterd_handle_cli_probe (rpcsvc_request_t *req) { int32_t ret = -1; - gf1_cli_probe_req cli_req = {0,}; - glusterd_peerinfo_t *peerinfo = NULL; - gf_boolean_t run_fsm = _gf_true; - xlator_t *this = NULL; - char *bind_name = NULL; + gf_cli_req cli_req = {{0,},}; + glusterd_peerinfo_t *peerinfo = NULL; + gf_boolean_t run_fsm = _gf_true; + xlator_t *this = NULL; + char *bind_name = NULL; + dict_t *dict = NULL; + char *hostname = NULL; + int port = 0; GF_ASSERT (req); this = THIS; - ret = xdr_to_generic (req->msg[0], &cli_req, - (xdrproc_t)xdr_gf1_cli_probe_req); + ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); if (ret < 0) { //failed to decode msg; gf_log ("", GF_LOG_ERROR, "xdr decoding error"); @@ -721,63 +1042,80 @@ glusterd_handle_cli_probe (rpcsvc_request_t *req) goto out; } + if (cli_req.dict.dict_len) { + dict = dict_new (); + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "unserialize req-buffer to dictionary"); + goto out; + } + } + + ret = dict_get_str (dict, "hostname", &hostname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get hostname"); + goto out; + } + + ret = dict_get_int32 (dict, "port", &port); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get port"); + goto out; + } + if (glusterd_is_any_volume_in_server_quorum (this) && !does_gd_meet_server_quorum (this)) { glusterd_xfer_cli_probe_resp (req, -1, GF_PROBE_QUORUM_NOT_MET, - NULL, - cli_req.hostname, cli_req.port); + NULL, hostname, port, dict); gf_log (this->name, GF_LOG_ERROR, "Quorum does not meet, " "rejecting operation"); ret = 0; goto out; } - gf_cmd_log ("peer probe", " on host %s:%d", cli_req.hostname, - cli_req.port); gf_log ("glusterd", GF_LOG_INFO, "Received CLI probe req %s %d", - cli_req.hostname, cli_req.port); + hostname, port); if (dict_get_str(this->options,"transport.socket.bind-address", &bind_name) == 0) { gf_log ("glusterd", GF_LOG_DEBUG, "only checking probe address vs. bind address"); - ret = glusterd_is_same_address(bind_name,cli_req.hostname); + ret = gf_is_same_address (bind_name, hostname); } else { - ret = glusterd_is_local_addr(cli_req.hostname); + ret = gf_is_local_addr (hostname); } if (ret) { - glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_LOCALHOST, NULL, - cli_req.hostname, cli_req.port); + glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_LOCALHOST, + NULL, hostname, port, dict); ret = 0; goto out; } - if (!(ret = glusterd_friend_find_by_hostname(cli_req.hostname, - &peerinfo))) { - if (strcmp (peerinfo->hostname, cli_req.hostname) == 0) { + if (!(ret = glusterd_friend_find_by_hostname (hostname, &peerinfo))) { + if (strcmp (peerinfo->hostname, hostname) == 0) { gf_log ("glusterd", GF_LOG_DEBUG, "Probe host %s port " - "%d already a peer", cli_req.hostname, - cli_req.port); + "%d already a peer", hostname, port); glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_FRIEND, - NULL, cli_req.hostname, - cli_req.port); + NULL, hostname, port, + dict); goto out; } } - ret = glusterd_probe_begin (req, cli_req.hostname, cli_req.port); - - gf_cmd_log ("peer probe","on host %s:%d %s",cli_req.hostname, - cli_req.port, (ret) ? "FAILED" : "SUCCESS"); + ret = glusterd_probe_begin (req, hostname, port, dict); if (ret == GLUSTERD_CONNECTION_AWAITED) { //fsm should be run after connection establishes run_fsm = _gf_false; ret = 0; } + out: - free (cli_req.hostname);//its malloced by xdr + free (cli_req.dict.dict_val); if (run_fsm) { glusterd_friend_sm (); @@ -788,14 +1126,24 @@ out: } int -glusterd_handle_cli_deprobe (rpcsvc_request_t *req) +glusterd_handle_cli_probe (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_cli_probe); +} + +int +__glusterd_handle_cli_deprobe (rpcsvc_request_t *req) { int32_t ret = -1; - gf1_cli_deprobe_req cli_req = {0,}; - uuid_t uuid = {0}; - int op_errno = 0; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; + gf_cli_req cli_req = {{0,},}; + uuid_t uuid = {0}; + int op_errno = 0; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + dict_t *dict = NULL; + char *hostname = NULL; + int port = 0; + int flags = 0; this = THIS; GF_ASSERT (this); @@ -804,16 +1152,46 @@ glusterd_handle_cli_deprobe (rpcsvc_request_t *req) GF_ASSERT (req); ret = xdr_to_generic (req->msg[0], &cli_req, - (xdrproc_t)xdr_gf1_cli_deprobe_req); + (xdrproc_t)xdr_gf_cli_req); if (ret < 0) { //failed to decode msg; req->rpc_err = GARBAGE_ARGS; goto out; } + if (cli_req.dict.dict_len) { + dict = dict_new (); + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "unserialize req-buffer to dictionary"); + goto out; + } + } + gf_log ("glusterd", GF_LOG_INFO, "Received CLI deprobe req"); - ret = glusterd_hostname_to_uuid (cli_req.hostname, uuid); + ret = dict_get_str (dict, "hostname", &hostname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get hostname"); + goto out; + } + + ret = dict_get_int32 (dict, "port", &port); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get port"); + goto out; + } + + ret = dict_get_int32 (dict, "flags", &flags); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get flags"); + goto out; + } + + ret = glusterd_hostname_to_uuid (hostname, uuid); if (ret) { op_errno = GF_DEPROBE_NOT_FRIEND; goto out; @@ -825,7 +1203,7 @@ glusterd_handle_cli_deprobe (rpcsvc_request_t *req) goto out; } - if (!(cli_req.flags & GF_CLI_FLAG_OP_FORCE)) { + if (!(flags & GF_CLI_FLAG_OP_FORCE)) { if (!uuid_is_null (uuid)) { /* Check if peers are connected, except peer being detached*/ if (!glusterd_chk_peers_connected_befriended (uuid)) { @@ -853,23 +1231,19 @@ glusterd_handle_cli_deprobe (rpcsvc_request_t *req) } if (!uuid_is_null (uuid)) { - ret = glusterd_deprobe_begin (req, cli_req.hostname, - cli_req.port, uuid); + ret = glusterd_deprobe_begin (req, hostname, port, uuid, dict); } else { - ret = glusterd_deprobe_begin (req, cli_req.hostname, - cli_req.port, NULL); + ret = glusterd_deprobe_begin (req, hostname, port, NULL, dict); } - gf_cmd_log ("peer deprobe", "on host %s:%d %s", cli_req.hostname, - cli_req.port, (ret) ? "FAILED" : "SUCCESS"); out: + free (cli_req.dict.dict_val); + if (ret) { ret = glusterd_xfer_cli_deprobe_resp (req, ret, op_errno, NULL, - cli_req.hostname); + hostname, dict); } - free (cli_req.hostname);//malloced by xdr - glusterd_friend_sm (); glusterd_op_sm (); @@ -877,7 +1251,13 @@ out: } int -glusterd_handle_cli_list_friends (rpcsvc_request_t *req) +glusterd_handle_cli_deprobe (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_cli_deprobe); +} + +int +__glusterd_handle_cli_list_friends (rpcsvc_request_t *req) { int32_t ret = -1; gf1_cli_peer_list_req cli_req = {0,}; @@ -925,7 +1305,14 @@ out: } int -glusterd_handle_cli_get_volume (rpcsvc_request_t *req) +glusterd_handle_cli_list_friends (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_cli_list_friends); +} + +int +__glusterd_handle_cli_get_volume (rpcsvc_request_t *req) { int32_t ret = -1; gf_cli_req cli_req = {{0,}}; @@ -978,76 +1365,15 @@ out: return ret; } -#ifdef HAVE_BD_XLATOR int -glusterd_handle_cli_bd_op (rpcsvc_request_t *req) +glusterd_handle_cli_get_volume (rpcsvc_request_t *req) { - int32_t ret = -1; - gf_cli_req cli_req = { {0,} }; - dict_t *dict = NULL; - char *volname = NULL; - char op_errstr[2048] = {0,}; - glusterd_op_t cli_op = GD_OP_BD_OP; - - GF_ASSERT (req); - - ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); - if (ret < 0) { - /* failed to decode msg */ - req->rpc_err = GARBAGE_ARGS; - goto out; - } - - gf_log ("glusterd", GF_LOG_DEBUG, "Received bd op req"); - - if (cli_req.dict.dict_len) { - /* Unserialize the dictionary */ - dict = dict_new (); - - ret = dict_unserialize (cli_req.dict.dict_val, - cli_req.dict.dict_len, - &dict); - if (ret < 0) { - gf_log ("glusterd", GF_LOG_ERROR, - "failed to " - "unserialize req-buffer to dictionary"); - goto out; - } else { - dict->extra_stdfree = cli_req.dict.dict_val; - } - } - - ret = dict_get_str (dict, "volname", &volname); - if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, - "failed to get volname"); - goto out; - } - - ret = glusterd_op_begin (req, GD_OP_BD_OP, dict, op_errstr, - sizeof (op_errstr)); - gf_cmd_log ("bd op: %s", ((ret == 0) ? "SUCCESS": "FAILED")); -out: - if (ret && dict) - dict_unref (dict); - - glusterd_friend_sm (); - glusterd_op_sm (); - - if (ret) { - if (op_errstr[0] == '\0') - snprintf (op_errstr, sizeof (op_errstr), - "Operation failed"); - ret = glusterd_op_send_cli_response (cli_op, ret, 0, - req, NULL, op_errstr); - } - - return ret; + return glusterd_big_locked_handler (req, + __glusterd_handle_cli_get_volume); } -#endif int -glusterd_handle_cli_uuid_reset (rpcsvc_request_t *req) +__glusterd_handle_cli_uuid_reset (rpcsvc_request_t *req) { int ret = -1; dict_t *dict = NULL; @@ -1144,7 +1470,112 @@ out: } int -glusterd_handle_cli_list_volume (rpcsvc_request_t *req) +glusterd_handle_cli_uuid_reset (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_cli_uuid_reset); +} + +int +__glusterd_handle_cli_uuid_get (rpcsvc_request_t *req) +{ + int ret = -1; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + gf_cli_rsp rsp = {0,}; + gf_cli_req cli_req = {{0,}}; + char msg_str[2048] = {0,}; + char uuid_str[64] = {0,}; + + GF_ASSERT (req); + + this = THIS; + priv = this->private; + GF_ASSERT (priv); + + ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_log ("glusterd", GF_LOG_DEBUG, "Received uuid get req"); + + if (cli_req.dict.dict_len) { + dict = dict_new (); + if (!dict) { + ret = -1; + goto out; + } + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_log ("glusterd", GF_LOG_ERROR, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf (msg_str, sizeof (msg_str), "Unable to decode " + "the buffer"); + goto out; + + } else { + dict->extra_stdfree = cli_req.dict.dict_val; + + } + } + + rsp_dict = dict_new (); + if (!rsp_dict) { + ret = -1; + goto out; + } + + uuid_utoa_r (MY_UUID, uuid_str); + ret = dict_set_str (rsp_dict, "uuid", uuid_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set uuid in " + "dictionary."); + goto out; + } + + ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to serialize " + "dictionary."); + goto out; + } + ret = 0; +out: + if (ret) { + rsp.op_ret = -1; + if (msg_str[0] == '\0') + snprintf (msg_str, sizeof (msg_str), "Operation " + "failed"); + rsp.op_errstr = msg_str; + + } else { + rsp.op_errstr = ""; + + } + + glusterd_to_cli (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_cli_rsp, dict); + + return 0; +} +int +glusterd_handle_cli_uuid_get (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_cli_uuid_get); +} + +int +__glusterd_handle_cli_list_volume (rpcsvc_request_t *req) { int ret = -1; dict_t *dict = NULL; @@ -1203,6 +1634,13 @@ out: return ret; } +int +glusterd_handle_cli_list_volume (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_cli_list_volume); +} + int32_t glusterd_op_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx, char *err_str, size_t err_len) @@ -1215,7 +1653,7 @@ glusterd_op_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx, } int -glusterd_handle_reset_volume (rpcsvc_request_t *req) +__glusterd_handle_reset_volume (rpcsvc_request_t *req) { int32_t ret = -1; gf_cli_req cli_req = {{0,}}; @@ -1280,9 +1718,15 @@ out: return ret; } +int +glusterd_handle_reset_volume (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_reset_volume); +} int -glusterd_handle_set_volume (rpcsvc_request_t *req) +__glusterd_handle_set_volume (rpcsvc_request_t *req) { int32_t ret = -1; gf_cli_req cli_req = {{0,}}; @@ -1382,7 +1826,13 @@ out: } int -glusterd_handle_sync_volume (rpcsvc_request_t *req) +glusterd_handle_set_volume (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_set_volume); +} + +int +__glusterd_handle_sync_volume (rpcsvc_request_t *req) { int32_t ret = -1; gf_cli_req cli_req = {{0,}}; @@ -1445,7 +1895,7 @@ glusterd_handle_sync_volume (rpcsvc_request_t *req) gf_log (this->name, GF_LOG_INFO, "Received volume sync req " "for volume %s", (flags & GF_CLI_SYNC_ALL) ? "all" : volname); - if (glusterd_is_local_addr (hostname)) { + if (gf_is_local_addr (hostname)) { ret = -1; snprintf (msg, sizeof (msg), "sync from localhost" " not allowed"); @@ -1471,6 +1921,12 @@ out: } int +glusterd_handle_sync_volume (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_sync_volume); +} + +int glusterd_fsm_log_send_resp (rpcsvc_request_t *req, int op_ret, char *op_errstr, dict_t *dict) { @@ -1497,7 +1953,7 @@ glusterd_fsm_log_send_resp (rpcsvc_request_t *req, int op_ret, } int -glusterd_handle_fsm_log (rpcsvc_request_t *req) +__glusterd_handle_fsm_log (rpcsvc_request_t *req) { int32_t ret = -1; gf1_cli_fsm_log_req cli_req = {0,}; @@ -1554,6 +2010,12 @@ out: } int +glusterd_handle_fsm_log (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_fsm_log); +} + +int glusterd_op_lock_send_resp (rpcsvc_request_t *req, int32_t status) { @@ -1592,13 +2054,65 @@ glusterd_op_unlock_send_resp (rpcsvc_request_t *req, int32_t status) } int -glusterd_handle_cluster_unlock (rpcsvc_request_t *req) +glusterd_op_mgmt_v3_lock_send_resp (rpcsvc_request_t *req, uuid_t *txn_id, + int32_t status) +{ + + gd1_mgmt_v3_lock_rsp rsp = {{0},}; + int ret = -1; + + GF_ASSERT (req); + GF_ASSERT (txn_id); + glusterd_get_uuid (&rsp.uuid); + rsp.op_ret = status; + if (rsp.op_ret) + rsp.op_errno = errno; + uuid_copy (rsp.txn_id, *txn_id); + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp); + + gf_log (THIS->name, GF_LOG_DEBUG, "Responded to mgmt_v3 lock, ret: %d", + ret); + + return ret; +} + +int +glusterd_op_mgmt_v3_unlock_send_resp (rpcsvc_request_t *req, uuid_t *txn_id, + int32_t status) +{ + + gd1_mgmt_v3_unlock_rsp rsp = {{0},}; + int ret = -1; + + GF_ASSERT (req); + GF_ASSERT (txn_id); + rsp.op_ret = status; + if (rsp.op_ret) + rsp.op_errno = errno; + glusterd_get_uuid (&rsp.uuid); + uuid_copy (rsp.txn_id, *txn_id); + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp); + + gf_log (THIS->name, GF_LOG_DEBUG, + "Responded to mgmt_v3 unlock, ret: %d", + ret); + + return ret; +} + +int +__glusterd_handle_cluster_unlock (rpcsvc_request_t *req) { gd1_mgmt_cluster_unlock_req unlock_req = {{0}, }; int32_t ret = -1; glusterd_op_lock_ctx_t *ctx = NULL; glusterd_peerinfo_t *peerinfo = NULL; xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; this = THIS; GF_ASSERT (this); @@ -1633,8 +2147,9 @@ glusterd_handle_cluster_unlock (rpcsvc_request_t *req) } uuid_copy (ctx->uuid, unlock_req.uuid); ctx->req = req; + ctx->dict = NULL; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_UNLOCK, ctx); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_UNLOCK, txn_id, ctx); out: glusterd_friend_sm (); @@ -1644,6 +2159,13 @@ out: } int +glusterd_handle_cluster_unlock (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_cluster_unlock); +} + +int glusterd_op_stage_send_resp (rpcsvc_request_t *req, int32_t op, int32_t status, char *op_errstr, dict_t *rsp_dict) @@ -1724,7 +2246,7 @@ out: } int -glusterd_handle_incoming_friend_req (rpcsvc_request_t *req) +__glusterd_handle_incoming_friend_req (rpcsvc_request_t *req) { int32_t ret = -1; gd1_mgmt_friend_req friend_req = {{0},}; @@ -1763,7 +2285,14 @@ out: } int -glusterd_handle_incoming_unfriend_req (rpcsvc_request_t *req) +glusterd_handle_incoming_friend_req (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_incoming_friend_req); +} + +int +__glusterd_handle_incoming_unfriend_req (rpcsvc_request_t *req) { int32_t ret = -1; gd1_mgmt_friend_req friend_req = {{0},}; @@ -1801,6 +2330,14 @@ out: } int +glusterd_handle_incoming_unfriend_req (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_incoming_unfriend_req); + +} + +int glusterd_handle_friend_update_delete (dict_t *dict) { char *hostname = NULL; @@ -1846,7 +2383,7 @@ out: } int -glusterd_handle_friend_update (rpcsvc_request_t *req) +__glusterd_handle_friend_update (rpcsvc_request_t *req) { int32_t ret = -1; gd1_mgmt_friend_update friend_req = {{0},}; @@ -1983,7 +2520,14 @@ out: } int -glusterd_handle_probe_query (rpcsvc_request_t *req) +glusterd_handle_friend_update (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_friend_update); +} + +int +__glusterd_handle_probe_query (rpcsvc_request_t *req) { int32_t ret = -1; xlator_t *this = NULL; @@ -2077,8 +2621,13 @@ out: return ret; } +int glusterd_handle_probe_query (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_probe_query); +} + int -glusterd_handle_cli_profile_volume (rpcsvc_request_t *req) +__glusterd_handle_cli_profile_volume (rpcsvc_request_t *req) { int32_t ret = -1; gf_cli_req cli_req = {{0,}}; @@ -2146,7 +2695,14 @@ out: } int -glusterd_handle_getwd (rpcsvc_request_t *req) +glusterd_handle_cli_profile_volume (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_cli_profile_volume); +} + +int +__glusterd_handle_getwd (rpcsvc_request_t *req) { int32_t ret = -1; gf1_cli_getwd_rsp rsp = {0,}; @@ -2171,16 +2727,23 @@ glusterd_handle_getwd (rpcsvc_request_t *req) return ret; } +int +glusterd_handle_getwd (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_getwd); +} int -glusterd_handle_mount (rpcsvc_request_t *req) +__glusterd_handle_mount (rpcsvc_request_t *req) { gf1_cli_mount_req mnt_req = {0,}; gf1_cli_mount_rsp rsp = {0,}; dict_t *dict = NULL; int ret = 0; + glusterd_conf_t *priv = NULL; GF_ASSERT (req); + priv = THIS->private; ret = xdr_to_generic (req->msg[0], &mnt_req, (xdrproc_t)xdr_gf1_cli_mount_req); @@ -2213,8 +2776,10 @@ glusterd_handle_mount (rpcsvc_request_t *req) } } + synclock_unlock (&priv->big_lock); rsp.op_ret = glusterd_do_mount (mnt_req.label, dict, &rsp.path, &rsp.op_errno); + synclock_lock (&priv->big_lock); out: if (!rsp.path) @@ -2236,7 +2801,13 @@ glusterd_handle_mount (rpcsvc_request_t *req) } int -glusterd_handle_umount (rpcsvc_request_t *req) +glusterd_handle_mount (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_mount); +} + +int +__glusterd_handle_umount (rpcsvc_request_t *req) { gf1_cli_umount_req umnt_req = {0,}; gf1_cli_umount_rsp rsp = {0,}; @@ -2249,9 +2820,11 @@ glusterd_handle_umount (rpcsvc_request_t *req) gf_boolean_t dir_ok = _gf_false; char *pdir = NULL; char *t = NULL; + glusterd_conf_t *priv = NULL; GF_ASSERT (req); GF_ASSERT (this); + priv = this->private; ret = xdr_to_generic (req->msg[0], &umnt_req, (xdrproc_t)xdr_gf1_cli_umount_req); @@ -2294,7 +2867,9 @@ glusterd_handle_umount (rpcsvc_request_t *req) runner_add_args (&runner, "umount", umnt_req.path, NULL); if (umnt_req.lazy) runner_add_arg (&runner, "-l"); + synclock_unlock (&priv->big_lock); rsp.op_ret = runner_run (&runner); + synclock_lock (&priv->big_lock); if (rsp.op_ret == 0) { if (realpath (umnt_req.path, mntp)) rmdir (mntp); @@ -2323,6 +2898,12 @@ glusterd_handle_umount (rpcsvc_request_t *req) } int +glusterd_handle_umount (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_umount); +} + +int glusterd_friend_remove (uuid_t uuid, char *hostname) { int ret = 0; @@ -2527,7 +3108,9 @@ glusterd_friend_add (const char *hoststr, int port, if (!restore) { ret = glusterd_store_peerinfo (*friend); if (ret == 0) { + synclock_unlock (&conf->big_lock); ret = glusterd_friend_rpc_create (this, *friend, args); + synclock_lock (&conf->big_lock); } else { gf_log (this->name, GF_LOG_ERROR, @@ -2546,7 +3129,8 @@ out: } int -glusterd_probe_begin (rpcsvc_request_t *req, const char *hoststr, int port) +glusterd_probe_begin (rpcsvc_request_t *req, const char *hoststr, int port, + dict_t *dict) { int ret = -1; glusterd_peerinfo_t *peerinfo = NULL; @@ -2562,6 +3146,7 @@ glusterd_probe_begin (rpcsvc_request_t *req, const char *hoststr, int port) " for host: %s (%d)", hoststr, port); args.mode = GD_MODE_ON; args.req = req; + args.dict = dict; ret = glusterd_friend_add ((char *)hoststr, port, GD_FRIEND_STATE_DEFAULT, NULL, &peerinfo, 0, &args); @@ -2583,11 +3168,11 @@ glusterd_probe_begin (rpcsvc_request_t *req, const char *hoststr, int port) ret = glusterd_friend_sm_inject_event (event); glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_SUCCESS, NULL, (char*)hoststr, - port); + port, dict); } } else { glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_FRIEND, NULL, - (char*)hoststr, port); + (char*)hoststr, port, dict); } out: @@ -2597,7 +3182,7 @@ out: int glusterd_deprobe_begin (rpcsvc_request_t *req, const char *hoststr, int port, - uuid_t uuid) + uuid_t uuid, dict_t *dict) { int ret = -1; glusterd_peerinfo_t *peerinfo = NULL; @@ -2638,6 +3223,7 @@ glusterd_deprobe_begin (rpcsvc_request_t *req, const char *hoststr, int port, ctx->hostname = gf_strdup (hoststr); ctx->port = port; ctx->req = req; + ctx->dict = dict; event->ctx = ctx; @@ -2685,15 +3271,16 @@ glusterd_xfer_friend_remove_resp (rpcsvc_request_t *req, char *hostname, int por int -glusterd_xfer_friend_add_resp (rpcsvc_request_t *req, char *hostname, int port, - int32_t op_ret, int32_t op_errno) +glusterd_xfer_friend_add_resp (rpcsvc_request_t *req, char *myhostname, + char *remote_hostname, int port, int32_t op_ret, + int32_t op_errno) { gd1_mgmt_friend_rsp rsp = {{0}, }; int32_t ret = -1; xlator_t *this = NULL; glusterd_conf_t *conf = NULL; - GF_ASSERT (hostname); + GF_ASSERT (myhostname); this = THIS; GF_ASSERT (this); @@ -2703,61 +3290,217 @@ glusterd_xfer_friend_add_resp (rpcsvc_request_t *req, char *hostname, int port, uuid_copy (rsp.uuid, MY_UUID); rsp.op_ret = op_ret; rsp.op_errno = op_errno; - rsp.hostname = gf_strdup (hostname); + rsp.hostname = gf_strdup (myhostname); rsp.port = port; ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, (xdrproc_t)xdr_gd1_mgmt_friend_rsp); gf_log ("glusterd", GF_LOG_INFO, - "Responded to %s (%d), ret: %d", hostname, port, ret); + "Responded to %s (%d), ret: %d", remote_hostname, port, ret); GF_FREE (rsp.hostname); return ret; } +static void +set_probe_error_str (int op_ret, int op_errno, char *op_errstr, char *errstr, + size_t len, char *hostname, int port) +{ + if ((op_errstr) && (strcmp (op_errstr, ""))) { + snprintf (errstr, len, "%s", op_errstr); + return; + } + + if (!op_ret) { + switch (op_errno) { + case GF_PROBE_LOCALHOST: + snprintf (errstr, len, "Probe on localhost not " + "needed"); + break; + + case GF_PROBE_FRIEND: + snprintf (errstr, len, "Host %s port %d already" + " in peer list", hostname, port); + break; + + default: + if (op_errno != 0) + snprintf (errstr, len, "Probe returned " + "with unknown errno %d", + op_errno); + break; + } + } else { + switch (op_errno) { + case GF_PROBE_ANOTHER_CLUSTER: + snprintf (errstr, len, "%s is already part of " + "another cluster", hostname); + break; + + case GF_PROBE_VOLUME_CONFLICT: + snprintf (errstr, len, "Atleast one volume on " + "%s conflicts with existing volumes " + "in the cluster", hostname); + break; + + case GF_PROBE_UNKNOWN_PEER: + snprintf (errstr, len, "%s responded with " + "'unknown peer' error, this could " + "happen if %s doesn't have localhost " + "in its peer database", hostname, + hostname); + break; + + case GF_PROBE_ADD_FAILED: + snprintf (errstr, len, "Failed to add peer " + "information on %s", hostname); + break; + + case GF_PROBE_SAME_UUID: + snprintf (errstr, len, "Peer uuid (host %s) is " + "same as local uuid", hostname); + break; + + case GF_PROBE_QUORUM_NOT_MET: + snprintf (errstr, len, "Cluster quorum is not " + "met. Changing peers is not allowed " + "in this state"); + break; + + default: + snprintf (errstr, len, "Probe returned with " + "unknown errno %d", op_errno); + break; + } + } +} + int glusterd_xfer_cli_probe_resp (rpcsvc_request_t *req, int32_t op_ret, int32_t op_errno, char *op_errstr, char *hostname, - int port) + int port, dict_t *dict) { - gf1_cli_probe_rsp rsp = {0, }; + gf_cli_rsp rsp = {0,}; int32_t ret = -1; + char errstr[2048] = {0,}; + char *cmd_str = NULL; + xlator_t *this = THIS; GF_ASSERT (req); + GF_ASSERT (this); + + (void) set_probe_error_str (op_ret, op_errno, op_errstr, errstr, + sizeof (errstr), hostname, port); + + if (dict) { + ret = dict_get_str (dict, "cmd-str", &cmd_str); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "command string"); + } rsp.op_ret = op_ret; rsp.op_errno = op_errno; - rsp.op_errstr = op_errstr ? op_errstr : ""; - rsp.hostname = hostname; - rsp.port = port; + rsp.op_errstr = (errstr[0] != '\0') ? errstr : ""; + + gf_cmd_log ("", "%s : %s %s %s", cmd_str, + (op_ret) ? "FAILED" : "SUCCESS", + (errstr[0] != '\0') ? ":" : " ", + (errstr[0] != '\0') ? errstr : " "); ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - (xdrproc_t)xdr_gf1_cli_probe_rsp); + (xdrproc_t)xdr_gf_cli_rsp); - gf_log ("glusterd", GF_LOG_INFO, "Responded to CLI, ret: %d",ret); + if (dict) + dict_unref (dict); + gf_log (this->name, GF_LOG_DEBUG, "Responded to CLI, ret: %d",ret); return ret; } +static void +set_deprobe_error_str (int op_ret, int op_errno, char *op_errstr, char *errstr, + size_t len, char *hostname) +{ + if ((op_errstr) && (strcmp (op_errstr, ""))) { + snprintf (errstr, len, "%s", op_errstr); + return; + } + + if (op_ret) { + switch (op_errno) { + case GF_DEPROBE_LOCALHOST: + snprintf (errstr, len, "%s is localhost", + hostname); + break; + + case GF_DEPROBE_NOT_FRIEND: + snprintf (errstr, len, "%s is not part of " + "cluster", hostname); + break; + + case GF_DEPROBE_BRICK_EXIST: + snprintf (errstr, len, "Brick(s) with the peer " + "%s exist in cluster", hostname); + break; + + case GF_DEPROBE_FRIEND_DOWN: + snprintf (errstr, len, "One of the peers is " + "probably down. Check with " + "'peer status'"); + break; + + case GF_DEPROBE_QUORUM_NOT_MET: + snprintf (errstr, len, "Cluster quorum is not " + "met. Changing peers is not allowed " + "in this state"); + break; + + default: + snprintf (errstr, len, "Detach returned with " + "unknown errno %d", op_errno); + break; + + } + } +} + + int glusterd_xfer_cli_deprobe_resp (rpcsvc_request_t *req, int32_t op_ret, int32_t op_errno, char *op_errstr, - char *hostname) + char *hostname, dict_t *dict) { - gf1_cli_deprobe_rsp rsp = {0, }; + gf_cli_rsp rsp = {0,}; int32_t ret = -1; + char *cmd_str = NULL; + char errstr[2048] = {0,}; GF_ASSERT (req); + (void) set_deprobe_error_str (op_ret, op_errno, op_errstr, errstr, + sizeof (errstr), hostname); + + if (dict) { + ret = dict_get_str (dict, "cmd-str", &cmd_str); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, "Failed to get " + "command string"); + } + rsp.op_ret = op_ret; rsp.op_errno = op_errno; - rsp.op_errstr = op_errstr ? op_errstr : ""; - rsp.hostname = hostname; + rsp.op_errstr = (errstr[0] != '\0') ? errstr : ""; + + gf_cmd_log ("", "%s : %s %s %s", cmd_str, + (op_ret) ? "FAILED" : "SUCCESS", + (errstr[0] != '\0') ? ":" : " ", + (errstr[0] != '\0') ? errstr : " "); ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - (xdrproc_t)xdr_gf1_cli_deprobe_rsp); + (xdrproc_t)xdr_gf_cli_rsp); - gf_log ("glusterd", GF_LOG_INFO, "Responded to CLI, ret: %d",ret); + gf_log (THIS->name, GF_LOG_DEBUG, "Responded to CLI, ret: %d",ret); return ret; } @@ -2771,37 +3514,50 @@ glusterd_list_friends (rpcsvc_request_t *req, dict_t *dict, int32_t flags) int32_t count = 0; dict_t *friends = NULL; gf1_cli_peer_list_rsp rsp = {0,}; + char my_uuid_str[64] = {0,}; + char key[256] = {0,}; priv = THIS->private; GF_ASSERT (priv); - if (!list_empty (&priv->peers)) { - friends = dict_new (); - if (!friends) { - gf_log ("", GF_LOG_WARNING, "Out of Memory"); - goto out; - } - } else { - ret = 0; + friends = dict_new (); + if (!friends) { + gf_log ("", GF_LOG_WARNING, "Out of Memory"); goto out; } - - if (flags == GF_CLI_LIST_ALL) { - list_for_each_entry (entry, &priv->peers, uuid_list) { - count++; - ret = glusterd_add_peer_detail_to_dict (entry, + if (!list_empty (&priv->peers)) { + list_for_each_entry (entry, &priv->peers, uuid_list) { + count++; + ret = glusterd_add_peer_detail_to_dict (entry, friends, count); - if (ret) - goto out; + if (ret) + goto out; + } + } - } + if (flags == GF_CLI_LIST_POOL_NODES) { + count++; + snprintf (key, 256, "friend%d.uuid", count); + uuid_utoa_r (MY_UUID, my_uuid_str); + ret = dict_set_str (friends, key, my_uuid_str); + if (ret) + goto out; - ret = dict_set_int32 (friends, "count", count); + snprintf (key, 256, "friend%d.hostname", count); + ret = dict_set_str (friends, key, "localhost"); + if (ret) + goto out; - if (ret) - goto out; + snprintf (key, 256, "friend%d.connected", count); + ret = dict_set_int32 (friends, key, 1); + if (ret) + goto out; } + ret = dict_set_int32 (friends, "count", count); + if (ret) + goto out; + ret = dict_allocate_and_serialize (friends, &rsp.friends.friends_val, &rsp.friends.friends_len); @@ -2932,7 +3688,7 @@ out: } int -glusterd_handle_status_volume (rpcsvc_request_t *req) +__glusterd_handle_status_volume (rpcsvc_request_t *req) { int32_t ret = -1; uint32_t cmd = 0; @@ -3004,7 +3760,14 @@ out: } int -glusterd_handle_cli_clearlocks_volume (rpcsvc_request_t *req) +glusterd_handle_status_volume (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_status_volume); +} + +int +__glusterd_handle_cli_clearlocks_volume (rpcsvc_request_t *req) { int32_t ret = -1; gf_cli_req cli_req = {{0,}}; @@ -3073,17 +3836,69 @@ out: } int -glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, - rpc_clnt_event_t event, - void *data) +glusterd_handle_cli_clearlocks_volume (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_cli_clearlocks_volume); +} + +static int +get_brickinfo_from_brickid (char *brickid, glusterd_brickinfo_t **brickinfo) +{ + glusterd_volinfo_t *volinfo = NULL; + char *volid_str = NULL; + char *brick = NULL; + char *brickid_dup = NULL; + uuid_t volid = {0}; + int ret = -1; + + brickid_dup = gf_strdup (brickid); + if (!brickid_dup) + goto out; + + volid_str = brickid_dup; + brick = strchr (brickid_dup, ':'); + *brick = '\0'; + brick++; + if (!volid_str || !brick) + goto out; + + uuid_parse (volid_str, volid); + ret = glusterd_volinfo_find_by_volume_id (volid, &volinfo); + if (ret) { + /* Check if it a snapshot volume */ + ret = glusterd_snap_volinfo_find_by_volume_id (volid, &volinfo); + if (ret) + goto out; + } + + ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo, + brickinfo); + if (ret) + goto out; + + ret = 0; +out: + GF_FREE (brickid_dup); + return ret; +} + +int +__glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) { xlator_t *this = NULL; glusterd_conf_t *conf = NULL; int ret = 0; + char *brickid = NULL; glusterd_brickinfo_t *brickinfo = NULL; - brickinfo = mydata; - if (!brickinfo) + brickid = mydata; + if (!brickid) + return 0; + + ret = get_brickinfo_from_brickid (brickid, &brickinfo); + if (ret) return 0; this = THIS; @@ -3093,15 +3908,21 @@ glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, switch (event) { case RPC_CLNT_CONNECT: - gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_CONNECT"); + gf_log (this->name, GF_LOG_DEBUG, "Connected to %s:%s", + brickinfo->hostname, brickinfo->path); glusterd_set_brick_status (brickinfo, GF_BRICK_STARTED); ret = default_notify (this, GF_EVENT_CHILD_UP, NULL); break; case RPC_CLNT_DISCONNECT: - gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_DISCONNECT"); + if (GF_BRICK_STARTED == brickinfo->status) + gf_log (this->name, GF_LOG_INFO, "Disconnected from " + "%s:%s", brickinfo->hostname, brickinfo->path); + glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED); + if (rpc_clnt_is_disabled (rpc)) + GF_FREE (brickid); break; default: @@ -3114,9 +3935,16 @@ glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, } int -glusterd_nodesvc_rpc_notify (struct rpc_clnt *rpc, void *mydata, - rpc_clnt_event_t event, - void *data) +glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) +{ + return glusterd_big_locked_notify (rpc, mydata, event, data, + __glusterd_brick_rpc_notify); +} + +int +__glusterd_nodesvc_rpc_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) { xlator_t *this = NULL; glusterd_conf_t *conf = NULL; @@ -3155,6 +3983,14 @@ glusterd_nodesvc_rpc_notify (struct rpc_clnt *rpc, void *mydata, } int +glusterd_nodesvc_rpc_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) +{ + return glusterd_big_locked_notify (rpc, mydata, event, data, + __glusterd_nodesvc_rpc_notify); +} + +int glusterd_friend_remove_notify (glusterd_peerctx_t *peerctx) { int ret = -1; @@ -3162,11 +3998,13 @@ glusterd_friend_remove_notify (glusterd_peerctx_t *peerctx) glusterd_peerinfo_t *peerinfo = peerctx->peerinfo; rpcsvc_request_t *req = peerctx->args.req; char *errstr = peerctx->errstr; + dict_t *dict = NULL; GF_ASSERT (peerctx); peerinfo = peerctx->peerinfo; req = peerctx->args.req; + dict = peerctx->args.dict; errstr = peerctx->errstr; ret = glusterd_friend_sm_new_event (GD_FRIEND_EVENT_REMOVE_FRIEND, @@ -3180,7 +4018,8 @@ glusterd_friend_remove_notify (glusterd_peerctx_t *peerctx) } glusterd_xfer_cli_probe_resp (req, -1, ENOTCONN, errstr, - peerinfo->hostname, peerinfo->port); + peerinfo->hostname, + peerinfo->port, dict); new_event->peerinfo = peerinfo; ret = glusterd_friend_sm_inject_event (new_event); @@ -3196,17 +4035,16 @@ out: } int -glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, - rpc_clnt_event_t event, - void *data) +__glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) { xlator_t *this = NULL; glusterd_conf_t *conf = NULL; int ret = 0; glusterd_peerinfo_t *peerinfo = NULL; glusterd_peerctx_t *peerctx = NULL; - uuid_t *peer_uuid = NULL; gf_boolean_t quorum_action = _gf_false; + glusterd_volinfo_t *volinfo = NULL; peerctx = mydata; if (!peerctx) @@ -3234,6 +4072,20 @@ glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_DISCONNECT %d", peerinfo->state.state); + if (peerinfo->connected) { + list_for_each_entry (volinfo, &conf->volumes, vol_list) { + ret = glusterd_mgmt_v3_unlock (volinfo->volname, + peerinfo->uuid, + "vol"); + if (ret) + gf_log (this->name, GF_LOG_TRACE, + "Lock not released for %s", + volinfo->volname); + } + + ret = 0; + } + if ((peerinfo->quorum_contrib != QUORUM_DOWN) && (peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED)) { peerinfo->quorum_contrib = QUORUM_DOWN; @@ -3241,64 +4093,15 @@ glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, peerinfo->quorum_action = _gf_false; } - // Remove peer if it is not a friend and connection/handshake - // fails, and notify cli. Happens only during probe. + /* Remove peer if it is not a friend and connection/handshake + * fails, and notify cli. Happens only during probe. + */ if (peerinfo->state.state == GD_FRIEND_STATE_DEFAULT) { glusterd_friend_remove_notify (peerctx); goto out; } - /* - local glusterd (thinks that it) is the owner of the cluster - lock and 'fails' the operation on the first disconnect from - a peer. - */ - if (peerinfo->connected) { - /*TODO: The following is needed till all volume - * operations are synctaskized. - * */ - if (is_origin_glusterd ()) { - switch (glusterd_op_get_op ()) { - case GD_OP_START_VOLUME: - case GD_OP_ADD_BRICK: - case GD_OP_REMOVE_BRICK: - case GD_OP_STATUS_VOLUME: - break; - - default: - ret = glusterd_op_sm_inject_event - (GD_OP_EVENT_START_UNLOCK, NULL); - if (ret) - gf_log (this->name, - GF_LOG_ERROR, - "Unable to enqueue " - "cluster unlock event"); - - break; - } - - } else { - peer_uuid = GF_CALLOC (1, sizeof (*peer_uuid), - gf_common_mt_char); - - if (peer_uuid) { - uuid_copy (*peer_uuid, peerinfo->uuid); - ret = glusterd_op_sm_inject_event - (GD_OP_EVENT_LOCAL_UNLOCK_NO_RESP, - peer_uuid); - if (ret) - gf_log (this->name, - GF_LOG_ERROR, - "Unable to enqueue " - "local lock flush " - "event."); - } - } - - } - peerinfo->connected = 0; - //default_notify (this, GF_EVENT_CHILD_DOWN, NULL); break; } default: @@ -3317,6 +4120,14 @@ out: } int +glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) +{ + return glusterd_big_locked_notify (rpc, mydata, event, data, + __glusterd_peer_rpc_notify); +} + +int glusterd_null (rpcsvc_request_t *req) { @@ -3324,11 +4135,11 @@ glusterd_null (rpcsvc_request_t *req) } rpcsvc_actor_t gd_svc_mgmt_actors[] = { - [GLUSTERD_MGMT_NULL] = { "NULL", GLUSTERD_MGMT_NULL, glusterd_null, NULL, 0}, - [GLUSTERD_MGMT_CLUSTER_LOCK] = { "CLUSTER_LOCK", GLUSTERD_MGMT_CLUSTER_LOCK, glusterd_handle_cluster_lock, NULL, 0}, - [GLUSTERD_MGMT_CLUSTER_UNLOCK] = { "CLUSTER_UNLOCK", GLUSTERD_MGMT_CLUSTER_UNLOCK, glusterd_handle_cluster_unlock, NULL, 0}, - [GLUSTERD_MGMT_STAGE_OP] = { "STAGE_OP", GLUSTERD_MGMT_STAGE_OP, glusterd_handle_stage_op, NULL, 0}, - [GLUSTERD_MGMT_COMMIT_OP] = { "COMMIT_OP", GLUSTERD_MGMT_COMMIT_OP, glusterd_handle_commit_op, NULL, 0}, + [GLUSTERD_MGMT_NULL] = { "NULL", GLUSTERD_MGMT_NULL, glusterd_null, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_CLUSTER_LOCK] = { "CLUSTER_LOCK", GLUSTERD_MGMT_CLUSTER_LOCK, glusterd_handle_cluster_lock, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_CLUSTER_UNLOCK] = { "CLUSTER_UNLOCK", GLUSTERD_MGMT_CLUSTER_UNLOCK, glusterd_handle_cluster_unlock, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_STAGE_OP] = { "STAGE_OP", GLUSTERD_MGMT_STAGE_OP, glusterd_handle_stage_op, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_COMMIT_OP] = { "COMMIT_OP", GLUSTERD_MGMT_COMMIT_OP, glusterd_handle_commit_op, NULL, 0, DRC_NA}, }; struct rpcsvc_program gd_svc_mgmt_prog = { @@ -3341,11 +4152,11 @@ struct rpcsvc_program gd_svc_mgmt_prog = { }; rpcsvc_actor_t gd_svc_peer_actors[] = { - [GLUSTERD_FRIEND_NULL] = { "NULL", GLUSTERD_MGMT_NULL, glusterd_null, NULL, 0}, - [GLUSTERD_PROBE_QUERY] = { "PROBE_QUERY", GLUSTERD_PROBE_QUERY, glusterd_handle_probe_query, NULL, 0}, - [GLUSTERD_FRIEND_ADD] = { "FRIEND_ADD", GLUSTERD_FRIEND_ADD, glusterd_handle_incoming_friend_req, NULL, 0}, - [GLUSTERD_FRIEND_REMOVE] = { "FRIEND_REMOVE", GLUSTERD_FRIEND_REMOVE, glusterd_handle_incoming_unfriend_req, NULL, 0}, - [GLUSTERD_FRIEND_UPDATE] = { "FRIEND_UPDATE", GLUSTERD_FRIEND_UPDATE, glusterd_handle_friend_update, NULL, 0}, + [GLUSTERD_FRIEND_NULL] = { "NULL", GLUSTERD_MGMT_NULL, glusterd_null, NULL, 0, DRC_NA}, + [GLUSTERD_PROBE_QUERY] = { "PROBE_QUERY", GLUSTERD_PROBE_QUERY, glusterd_handle_probe_query, NULL, 0, DRC_NA}, + [GLUSTERD_FRIEND_ADD] = { "FRIEND_ADD", GLUSTERD_FRIEND_ADD, glusterd_handle_incoming_friend_req, NULL, 0, DRC_NA}, + [GLUSTERD_FRIEND_REMOVE] = { "FRIEND_REMOVE", GLUSTERD_FRIEND_REMOVE, glusterd_handle_incoming_unfriend_req, NULL, 0, DRC_NA}, + [GLUSTERD_FRIEND_UPDATE] = { "FRIEND_UPDATE", GLUSTERD_FRIEND_UPDATE, glusterd_handle_friend_update, NULL, 0, DRC_NA}, }; struct rpcsvc_program gd_svc_peer_prog = { @@ -3360,38 +4171,39 @@ struct rpcsvc_program gd_svc_peer_prog = { rpcsvc_actor_t gd_svc_cli_actors[] = { - [GLUSTER_CLI_PROBE] = { "CLI_PROBE", GLUSTER_CLI_PROBE, glusterd_handle_cli_probe, NULL, 0}, - [GLUSTER_CLI_CREATE_VOLUME] = { "CLI_CREATE_VOLUME", GLUSTER_CLI_CREATE_VOLUME, glusterd_handle_create_volume, NULL, 0}, - [GLUSTER_CLI_DEFRAG_VOLUME] = { "CLI_DEFRAG_VOLUME", GLUSTER_CLI_DEFRAG_VOLUME, glusterd_handle_defrag_volume, NULL, 0}, - [GLUSTER_CLI_DEPROBE] = { "FRIEND_REMOVE", GLUSTER_CLI_DEPROBE, glusterd_handle_cli_deprobe, NULL, 0}, - [GLUSTER_CLI_LIST_FRIENDS] = { "LIST_FRIENDS", GLUSTER_CLI_LIST_FRIENDS, glusterd_handle_cli_list_friends, NULL, 0}, - [GLUSTER_CLI_UUID_RESET] = { "UUID_RESET", GLUSTER_CLI_UUID_RESET, glusterd_handle_cli_uuid_reset, NULL, 0}, - [GLUSTER_CLI_START_VOLUME] = { "START_VOLUME", GLUSTER_CLI_START_VOLUME, glusterd_handle_cli_start_volume, NULL, 0}, - [GLUSTER_CLI_STOP_VOLUME] = { "STOP_VOLUME", GLUSTER_CLI_STOP_VOLUME, glusterd_handle_cli_stop_volume, NULL, 0}, - [GLUSTER_CLI_DELETE_VOLUME] = { "DELETE_VOLUME", GLUSTER_CLI_DELETE_VOLUME, glusterd_handle_cli_delete_volume, NULL, 0}, - [GLUSTER_CLI_GET_VOLUME] = { "GET_VOLUME", GLUSTER_CLI_GET_VOLUME, glusterd_handle_cli_get_volume, NULL, 0}, - [GLUSTER_CLI_ADD_BRICK] = { "ADD_BRICK", GLUSTER_CLI_ADD_BRICK, glusterd_handle_add_brick, NULL, 0}, - [GLUSTER_CLI_REPLACE_BRICK] = { "REPLACE_BRICK", GLUSTER_CLI_REPLACE_BRICK, glusterd_handle_replace_brick, NULL, 0}, - [GLUSTER_CLI_REMOVE_BRICK] = { "REMOVE_BRICK", GLUSTER_CLI_REMOVE_BRICK, glusterd_handle_remove_brick, NULL, 0}, - [GLUSTER_CLI_LOG_ROTATE] = { "LOG FILENAME", GLUSTER_CLI_LOG_ROTATE, glusterd_handle_log_rotate, NULL, 0}, - [GLUSTER_CLI_SET_VOLUME] = { "SET_VOLUME", GLUSTER_CLI_SET_VOLUME, glusterd_handle_set_volume, NULL, 0}, - [GLUSTER_CLI_SYNC_VOLUME] = { "SYNC_VOLUME", GLUSTER_CLI_SYNC_VOLUME, glusterd_handle_sync_volume, NULL, 0}, - [GLUSTER_CLI_RESET_VOLUME] = { "RESET_VOLUME", GLUSTER_CLI_RESET_VOLUME, glusterd_handle_reset_volume, NULL, 0}, - [GLUSTER_CLI_FSM_LOG] = { "FSM_LOG", GLUSTER_CLI_FSM_LOG, glusterd_handle_fsm_log, NULL, 0}, - [GLUSTER_CLI_GSYNC_SET] = { "GSYNC_SET", GLUSTER_CLI_GSYNC_SET, glusterd_handle_gsync_set, NULL, 0}, - [GLUSTER_CLI_PROFILE_VOLUME] = { "STATS_VOLUME", GLUSTER_CLI_PROFILE_VOLUME, glusterd_handle_cli_profile_volume, NULL, 0}, - [GLUSTER_CLI_QUOTA] = { "QUOTA", GLUSTER_CLI_QUOTA, glusterd_handle_quota, NULL, 0}, - [GLUSTER_CLI_GETWD] = { "GETWD", GLUSTER_CLI_GETWD, glusterd_handle_getwd, NULL, 1}, - [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME", GLUSTER_CLI_STATUS_VOLUME, glusterd_handle_status_volume, NULL, 0}, - [GLUSTER_CLI_MOUNT] = { "MOUNT", GLUSTER_CLI_MOUNT, glusterd_handle_mount, NULL, 1}, - [GLUSTER_CLI_UMOUNT] = { "UMOUNT", GLUSTER_CLI_UMOUNT, glusterd_handle_umount, NULL, 1}, - [GLUSTER_CLI_HEAL_VOLUME] = { "HEAL_VOLUME", GLUSTER_CLI_HEAL_VOLUME, glusterd_handle_cli_heal_volume, NULL, 0}, - [GLUSTER_CLI_STATEDUMP_VOLUME] = {"STATEDUMP_VOLUME", GLUSTER_CLI_STATEDUMP_VOLUME, glusterd_handle_cli_statedump_volume, NULL, 0}, - [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", GLUSTER_CLI_LIST_VOLUME, glusterd_handle_cli_list_volume, NULL, 0}, - [GLUSTER_CLI_CLRLOCKS_VOLUME] = {"CLEARLOCKS_VOLUME", GLUSTER_CLI_CLRLOCKS_VOLUME, glusterd_handle_cli_clearlocks_volume, NULL, 0}, -#ifdef HAVE_BD_XLATOR - [GLUSTER_CLI_BD_OP] = {"BD_OP", GLUSTER_CLI_BD_OP, glusterd_handle_cli_bd_op, NULL, 0}, -#endif + [GLUSTER_CLI_PROBE] = { "CLI_PROBE", GLUSTER_CLI_PROBE, glusterd_handle_cli_probe, NULL, 0, DRC_NA}, + [GLUSTER_CLI_CREATE_VOLUME] = { "CLI_CREATE_VOLUME", GLUSTER_CLI_CREATE_VOLUME, glusterd_handle_create_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_DEFRAG_VOLUME] = { "CLI_DEFRAG_VOLUME", GLUSTER_CLI_DEFRAG_VOLUME, glusterd_handle_defrag_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_DEPROBE] = { "FRIEND_REMOVE", GLUSTER_CLI_DEPROBE, glusterd_handle_cli_deprobe, NULL, 0, DRC_NA}, + [GLUSTER_CLI_LIST_FRIENDS] = { "LIST_FRIENDS", GLUSTER_CLI_LIST_FRIENDS, glusterd_handle_cli_list_friends, NULL, 0, DRC_NA}, + [GLUSTER_CLI_UUID_RESET] = { "UUID_RESET", GLUSTER_CLI_UUID_RESET, glusterd_handle_cli_uuid_reset, NULL, 0, DRC_NA}, + [GLUSTER_CLI_UUID_GET] = { "UUID_GET", GLUSTER_CLI_UUID_GET, glusterd_handle_cli_uuid_get, NULL, 0, DRC_NA}, + [GLUSTER_CLI_START_VOLUME] = { "START_VOLUME", GLUSTER_CLI_START_VOLUME, glusterd_handle_cli_start_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_STOP_VOLUME] = { "STOP_VOLUME", GLUSTER_CLI_STOP_VOLUME, glusterd_handle_cli_stop_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_DELETE_VOLUME] = { "DELETE_VOLUME", GLUSTER_CLI_DELETE_VOLUME, glusterd_handle_cli_delete_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_GET_VOLUME] = { "GET_VOLUME", GLUSTER_CLI_GET_VOLUME, glusterd_handle_cli_get_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_ADD_BRICK] = { "ADD_BRICK", GLUSTER_CLI_ADD_BRICK, glusterd_handle_add_brick, NULL, 0, DRC_NA}, + [GLUSTER_CLI_REPLACE_BRICK] = { "REPLACE_BRICK", GLUSTER_CLI_REPLACE_BRICK, glusterd_handle_replace_brick, NULL, 0, DRC_NA}, + [GLUSTER_CLI_REMOVE_BRICK] = { "REMOVE_BRICK", GLUSTER_CLI_REMOVE_BRICK, glusterd_handle_remove_brick, NULL, 0, DRC_NA}, + [GLUSTER_CLI_LOG_ROTATE] = { "LOG FILENAME", GLUSTER_CLI_LOG_ROTATE, glusterd_handle_log_rotate, NULL, 0, DRC_NA}, + [GLUSTER_CLI_SET_VOLUME] = { "SET_VOLUME", GLUSTER_CLI_SET_VOLUME, glusterd_handle_set_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_SYNC_VOLUME] = { "SYNC_VOLUME", GLUSTER_CLI_SYNC_VOLUME, glusterd_handle_sync_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_RESET_VOLUME] = { "RESET_VOLUME", GLUSTER_CLI_RESET_VOLUME, glusterd_handle_reset_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_FSM_LOG] = { "FSM_LOG", GLUSTER_CLI_FSM_LOG, glusterd_handle_fsm_log, NULL, 0, DRC_NA}, + [GLUSTER_CLI_GSYNC_SET] = { "GSYNC_SET", GLUSTER_CLI_GSYNC_SET, glusterd_handle_gsync_set, NULL, 0, DRC_NA}, + [GLUSTER_CLI_PROFILE_VOLUME] = { "STATS_VOLUME", GLUSTER_CLI_PROFILE_VOLUME, glusterd_handle_cli_profile_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_QUOTA] = { "QUOTA", GLUSTER_CLI_QUOTA, glusterd_handle_quota, NULL, 0, DRC_NA}, + [GLUSTER_CLI_GETWD] = { "GETWD", GLUSTER_CLI_GETWD, glusterd_handle_getwd, NULL, 1, DRC_NA}, + [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME", GLUSTER_CLI_STATUS_VOLUME, glusterd_handle_status_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_MOUNT] = { "MOUNT", GLUSTER_CLI_MOUNT, glusterd_handle_mount, NULL, 1, DRC_NA}, + [GLUSTER_CLI_UMOUNT] = { "UMOUNT", GLUSTER_CLI_UMOUNT, glusterd_handle_umount, NULL, 1, DRC_NA}, + [GLUSTER_CLI_HEAL_VOLUME] = { "HEAL_VOLUME", GLUSTER_CLI_HEAL_VOLUME, glusterd_handle_cli_heal_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_STATEDUMP_VOLUME] = {"STATEDUMP_VOLUME", GLUSTER_CLI_STATEDUMP_VOLUME, glusterd_handle_cli_statedump_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", GLUSTER_CLI_LIST_VOLUME, glusterd_handle_cli_list_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_CLRLOCKS_VOLUME] = {"CLEARLOCKS_VOLUME", GLUSTER_CLI_CLRLOCKS_VOLUME, glusterd_handle_cli_clearlocks_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_COPY_FILE] = {"COPY_FILE", GLUSTER_CLI_COPY_FILE, glusterd_handle_copy_file, NULL, 0, DRC_NA}, + [GLUSTER_CLI_SYS_EXEC] = {"SYS_EXEC", GLUSTER_CLI_SYS_EXEC, glusterd_handle_sys_exec, NULL, 0, DRC_NA}, + [GLUSTER_CLI_SNAP] = {"SNAP", GLUSTER_CLI_SNAP, glusterd_handle_snapshot, NULL, 0, DRC_NA}, }; struct rpcsvc_program gd_svc_cli_prog = { @@ -3402,3 +4214,24 @@ struct rpcsvc_program gd_svc_cli_prog = { .actors = gd_svc_cli_actors, .synctask = _gf_true, }; + +/* This is a minimal RPC prog, which contains only the readonly RPC procs from + * the cli rpcsvc + */ +rpcsvc_actor_t gd_svc_cli_actors_ro[] = { + [GLUSTER_CLI_LIST_FRIENDS] = { "LIST_FRIENDS", GLUSTER_CLI_LIST_FRIENDS, glusterd_handle_cli_list_friends, NULL, 0, DRC_NA}, + [GLUSTER_CLI_UUID_GET] = { "UUID_GET", GLUSTER_CLI_UUID_GET, glusterd_handle_cli_uuid_get, NULL, 0, DRC_NA}, + [GLUSTER_CLI_GET_VOLUME] = { "GET_VOLUME", GLUSTER_CLI_GET_VOLUME, glusterd_handle_cli_get_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_GETWD] = { "GETWD", GLUSTER_CLI_GETWD, glusterd_handle_getwd, NULL, 1, DRC_NA}, + [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME", GLUSTER_CLI_STATUS_VOLUME, glusterd_handle_status_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", GLUSTER_CLI_LIST_VOLUME, glusterd_handle_cli_list_volume, NULL, 0, DRC_NA}, +}; + +struct rpcsvc_program gd_svc_cli_prog_ro = { + .progname = "GlusterD svc cli read-only", + .prognum = GLUSTER_CLI_PROGRAM, + .progver = GLUSTER_CLI_VERSION, + .numactors = GLUSTER_CLI_MAXVALUE, + .actors = gd_svc_cli_actors_ro, + .synctask = _gf_true, +}; diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c index 8b9647026..0f0357c4c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handshake.c +++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c @@ -30,26 +30,147 @@ extern struct rpc_clnt_program gd_peer_prog; extern struct rpc_clnt_program gd_mgmt_prog; +extern struct rpc_clnt_program gd_mgmt_v3_prog; +extern struct rpc_clnt_program gd_mgmt_v3_prog; + #define TRUSTED_PREFIX "trusted-" typedef ssize_t (*gfs_serialize_t) (struct iovec outmsg, void *data); +static int +get_snap_volname_and_volinfo (const char *volpath, char **volname, + glusterd_volinfo_t **volinfo) +{ + int ret = -1; + char *save_ptr = NULL; + char *str_token = NULL; + char *snapname = NULL; + char *volname_token = NULL; + char *vol = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (volpath); + GF_ASSERT (volinfo); + + str_token = gf_strdup (volpath); + if (NULL == str_token) { + goto out; + } + + /* Input volname will have below formats: + * /snaps/<snapname>/<volname>.<hostname> + * or + * /snaps/<snapname>/<parent-volname> + * We need to extract snapname and parent_volname */ + + /*split string by "/" */ + strtok_r (str_token, "/", &save_ptr); + snapname = strtok_r(NULL, "/", &save_ptr); + if (!snapname) { + gf_log(this->name, GF_LOG_ERROR, "Invalid path: %s", volpath); + goto out; + } + + volname_token = strtok_r(NULL, "/", &save_ptr); + if (!volname_token) { + gf_log(this->name, GF_LOG_ERROR, "Invalid path: %s", volpath); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + gf_log(this->name, GF_LOG_ERROR, "Failed to " + "fetch snap %s", snapname); + goto out; + } + + /* Find if its a parent volume name or snap volume + * name. This function will succeed if volname_token + * is a parent volname + */ + ret = glusterd_volinfo_find (volname_token, volinfo); + if (ret) { + *volname = gf_strdup (volname_token); + if (NULL == *volname) { + ret = -1; + goto out; + } + + ret = glusterd_snap_volinfo_find (volname_token, snap, + volinfo); + if (ret) { + /* Split the volume name */ + vol = strtok_r (volname_token, ".", &save_ptr); + if (!vol) { + gf_log(this->name, GF_LOG_ERROR, "Invalid " + "volname (%s)", volname_token); + goto out; + } + + ret = glusterd_snap_volinfo_find (vol, snap, volinfo); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "Failed to " + "fetch snap volume from volname (%s)", + vol); + goto out; + } + } + } else { + /*volname_token is parent volname*/ + ret = glusterd_snap_volinfo_find_from_parent_volname ( + volname_token, snap, volinfo); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "Failed to " + "fetch snap volume from parent " + "volname (%s)", volname_token); + goto out; + } + + /* Since volname_token is a parent volname we should + * get the snap volname here*/ + *volname = gf_strdup ((*volinfo)->volname); + if (NULL == *volname) { + ret = -1; + goto out; + } + } + +out: + if (ret && NULL != *volname) { + GF_FREE (*volname); + *volname = NULL; + } + return ret; +} + static size_t build_volfile_path (const char *volname, char *path, size_t path_len, char *trusted_str) { - struct stat stbuf = {0,}; - int32_t ret = -1; - glusterd_conf_t *priv = NULL; - char *vol = NULL; - char *dup_volname = NULL; - char *free_ptr = NULL; - char *tmp = NULL; - glusterd_volinfo_t *volinfo = NULL; - char *server = NULL; - - priv = THIS->private; + struct stat stbuf = {0,}; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + char *vol = NULL; + char *dup_volname = NULL; + char *free_ptr = NULL; + char *save_ptr = NULL; + char *str_token = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *server = NULL; + const char *volname_ptr = NULL; + char path_prefix [PATH_MAX] = {0,}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (volname); + GF_ASSERT (path); if (strstr (volname, "gluster/")) { server = strchr (volname, '/') + 1; @@ -57,6 +178,22 @@ build_volfile_path (const char *volname, char *path, path, path_len); ret = 1; goto out; + } else if ((str_token = strstr (volname, "/snaps/"))) { + ret = get_snap_volname_and_volinfo (str_token, &dup_volname, + &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get snap" + " volinfo from path (%s)", volname); + ret = -1; + goto out; + } + + snprintf (path_prefix, sizeof (path_prefix), "%s/snaps/%s", + priv->workdir, volinfo->snapshot->snapname); + + free_ptr = dup_volname; + volname_ptr = dup_volname; + goto gotvolinfo; } else if (volname[0] != '/') { /* Normal behavior */ dup_volname = gf_strdup (volname); @@ -67,39 +204,53 @@ build_volfile_path (const char *volname, char *path, dup_volname = gf_strdup (&volname[1]); } + if (!dup_volname) { + gf_log(THIS->name, GF_LOG_ERROR, "strdup failed"); + ret = -1; + goto out; + } free_ptr = dup_volname; + volname_ptr = volname; + + snprintf (path_prefix, sizeof (path_prefix), "%s/vols", + priv->workdir); ret = glusterd_volinfo_find (dup_volname, &volinfo); + if (ret) { /* Split the volume name */ - vol = strtok_r (dup_volname, ".", &tmp); + vol = strtok_r (dup_volname, ".", &save_ptr); if (!vol) goto out; + ret = glusterd_volinfo_find (vol, &volinfo); if (ret) goto out; } +gotvolinfo: if (!glusterd_auth_get_username (volinfo)) trusted_str = NULL; - ret = snprintf (path, path_len, "%s/vols/%s/%s.vol", - priv->workdir, volinfo->volname, volname); + ret = snprintf (path, path_len, "%s/%s/%s.vol", path_prefix, + volinfo->volname, volname_ptr); if (ret == -1) goto out; ret = stat (path, &stbuf); if ((ret == -1) && (errno == ENOENT)) { - snprintf (path, path_len, "%s/vols/%s/%s%s-fuse.vol", - priv->workdir, volinfo->volname, - (trusted_str ? trusted_str : ""), dup_volname); + snprintf (path, path_len, "%s/%s/%s%s-fuse.vol", + path_prefix, volinfo->volname, + (trusted_str ? trusted_str : ""), + dup_volname); + ret = stat (path, &stbuf); } if ((ret == -1) && (errno == ENOENT)) { - snprintf (path, path_len, "%s/vols/%s/%s-tcp.vol", - priv->workdir, volinfo->volname, volname); + snprintf (path, path_len, "%s/%s/%s-tcp.vol", + path_prefix, volinfo->volname, volname_ptr); } ret = 1; @@ -108,124 +259,158 @@ out: return ret; } -int -server_getspec (rpcsvc_request_t *req) +/* Get and store op-versions of the clients sending the getspec request + * Clients of versions <= 3.3, don't send op-versions, their op-versions are + * defaulted to 1 + */ +static int +_get_client_op_versions (gf_getspec_req *args, peer_info_t *peerinfo) { - int32_t ret = -1; - int32_t op_errno = 0; - int32_t spec_fd = -1; - size_t file_len = 0; - char filename[PATH_MAX] = {0,}; - struct stat stbuf = {0,}; - char *volume = NULL; - char *tmp = NULL; - int cookie = 0; - rpc_transport_t *trans = NULL; - gf_getspec_req args = {0,}; - gf_getspec_rsp rsp = {0,}; - char addrstr[RPCSVC_PEER_STRLEN] = {0}; - dict_t *dict = NULL; - xlator_t *this = NULL; - glusterd_conf_t *conf = NULL; - int client_min_op_version = 1; // OP-VERSIONs start at 1 - int client_max_op_version = 1; + int ret = 0; + int client_max_op_version = 1; + int client_min_op_version = 1; + dict_t *dict = NULL; - this = THIS; - GF_ASSERT (this); - - conf = this->private; - GF_ASSERT (conf); + GF_ASSERT (args); + GF_ASSERT (peerinfo); - ret = xdr_to_generic (req->msg[0], &args, - (xdrproc_t)xdr_gf_getspec_req); - if (ret < 0) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto fail; - } - - if (!args.xdata.xdata_len) { - // For clients <= 3.3.0, only allow if op_version = 1 - if (1 != conf->op_version) { - ret = -1; - op_errno = ENOTSUP; - gf_log (this->name, GF_LOG_INFO, - "Client %s doesn't support required op-version. " - "Rejecting getspec request.", - req->trans->peerinfo.identifier); - goto fail; - } - } else { - // For clients > 3.3, only allow if they can support - // clusters' op_version + if (args->xdata.xdata_len) { dict = dict_new (); if (!dict) { ret = -1; - goto fail; + goto out; } - ret = dict_unserialize (args.xdata.xdata_val, - args.xdata.xdata_len, &dict); + ret = dict_unserialize (args->xdata.xdata_val, + args->xdata.xdata_len, &dict); if (ret) { - gf_log (this->name, GF_LOG_ERROR, + gf_log ("glusterd", GF_LOG_ERROR, "Failed to unserialize request dictionary"); - goto fail; + goto out; } ret = dict_get_int32 (dict, "min-op-version", &client_min_op_version); if (ret) { - gf_log (this->name, GF_LOG_ERROR, + gf_log ("glusterd", GF_LOG_ERROR, "Failed to get client-min-op-version"); - goto fail; + goto out; } ret = dict_get_int32 (dict, "max-op-version", &client_max_op_version); if (ret) { - gf_log (this->name, GF_LOG_ERROR, + gf_log ("glusterd", GF_LOG_ERROR, "Failed to get client-max-op-version"); - goto fail; + goto out; } + } + + peerinfo->max_op_version = client_max_op_version; + peerinfo->min_op_version = client_min_op_version; + +out: + return ret; +} + +/* Checks if the client supports the volume, ie. client can understand all the + * options in the volfile + */ +static gf_boolean_t +_client_supports_volume (peer_info_t *peerinfo, int32_t *op_errno) +{ + gf_boolean_t ret = _gf_true; + glusterd_volinfo_t *volinfo = NULL; + + GF_ASSERT (peerinfo); + GF_ASSERT (op_errno); - if ((client_min_op_version > conf->op_version) || - (client_max_op_version < conf->op_version)) { - ret = -1; - op_errno = ENOTSUP; - //TODO: Add client identifier - gf_log (this->name, GF_LOG_INFO, - "Client %s doesn't support required op-version. " - "Rejecting getspec request.", - req->trans->peerinfo.identifier); - goto fail; - } + /* Only check when the volfile being requested is a volume. Not finding + * a volinfo implies that the volfile requested for is not of a gluster + * volume. A non volume volfile is requested by the local gluster + * services like shd and nfs-server. These need not be checked as they + * will be running at the same op-version as glusterd and will be able + * to support all the features + */ + if ((glusterd_volinfo_find (peerinfo->volname, &volinfo) == 0) && + ((peerinfo->min_op_version > volinfo->client_op_version) || + (peerinfo->max_op_version < volinfo->client_op_version))) { + ret = _gf_false; + *op_errno = ENOTSUP; + gf_log ("glusterd", GF_LOG_INFO, + "Client %s (%d -> %d) doesn't support required " + "op-version (%d). Rejecting volfile request.", + peerinfo->identifier, peerinfo->min_op_version, + peerinfo->max_op_version, volinfo->client_op_version); } - // Store the op-versions supported by the client - req->trans->peerinfo.max_op_version = client_max_op_version; - req->trans->peerinfo.min_op_version = client_min_op_version; + return ret; +} - volume = args.key; +int +__server_getspec (rpcsvc_request_t *req) +{ + int32_t ret = -1; + int32_t op_errno = 0; + int32_t spec_fd = -1; + size_t file_len = 0; + char filename[PATH_MAX] = {0,}; + struct stat stbuf = {0,}; + char *volume = NULL; + char *tmp = NULL; + int cookie = 0; + rpc_transport_t *trans = NULL; + gf_getspec_req args = {0,}; + gf_getspec_rsp rsp = {0,}; + char addrstr[RPCSVC_PEER_STRLEN] = {0}; + peer_info_t *peerinfo = NULL; + + ret = xdr_to_generic (req->msg[0], &args, + (xdrproc_t)xdr_gf_getspec_req); + if (ret < 0) { + //failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + goto fail; + } - // Store the name of volume being mounted + peerinfo = &req->trans->peerinfo; + + volume = args.key; + /* Need to strip leading '/' from volnames. This was introduced to + * support nfs style mount parameters for native gluster mount + */ if (volume[0] == '/') - strncpy (req->trans->peerinfo.volname, &volume[1], - strlen(&volume[1])); + strncpy (peerinfo->volname, &volume[1], strlen(&volume[1])); else - strncpy (req->trans->peerinfo.volname, volume, strlen(volume)); + strncpy (peerinfo->volname, volume, strlen(volume)); + + ret = _get_client_op_versions (&args, peerinfo); + if (ret) + goto fail; + + if (!_client_supports_volume (peerinfo, &op_errno)) { + ret = -1; + goto fail; + } trans = req->trans; + /* addrstr will be empty for cli socket connections */ ret = rpcsvc_transport_peername (trans, (char *)&addrstr, sizeof (addrstr)); if (ret) goto fail; - tmp = strrchr (addrstr, ':'); - *tmp = '\0'; + tmp = strrchr (addrstr, ':'); + if (tmp) + *tmp = '\0'; - /* we trust the local admin */ - if (glusterd_is_local_addr (addrstr)) { + /* The trusted volfiles are given to the glusterd owned process like NFS + * server, self-heal daemon etc., so that they are not inadvertently + * blocked by a auth.{allow,reject} setting. The trusted volfile is not + * meant for external users. + */ + if (strlen (addrstr) && gf_is_local_addr (addrstr)) { ret = build_volfile_path (volume, filename, sizeof (filename), @@ -289,8 +474,14 @@ fail: return 0; } +int +server_getspec (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __server_getspec); +} + int32_t -server_event_notify (rpcsvc_request_t *req) +__server_event_notify (rpcsvc_request_t *req) { int32_t ret = -1; int32_t op_errno = 0; @@ -350,6 +541,12 @@ fail: return 0; } +int32_t +server_event_notify (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __server_event_notify); +} + int gd_validate_cluster_op_version (xlator_t *this, int cluster_op_version, char *peerid) @@ -368,12 +565,16 @@ gd_validate_cluster_op_version (xlator_t *this, int cluster_op_version, goto out; } - if (cluster_op_version < conf->op_version) { + /* The peer can only reduce its op-version when it doesn't have any + * volumes. Reducing op-version when it already contains volumes can + * lead to inconsistencies in the cluster + */ + if ((cluster_op_version < conf->op_version) && + !list_empty (&conf->volumes)) { gf_log (this->name, GF_LOG_ERROR, - "operating version %d is less than the currently " - "running version (%d) on the machine (as per peer " - "request from %s)", cluster_op_version, - conf->op_version, peerid); + "cannot reduce operating version to %d from current " + "version %d as volumes exist (as per peer request from " + "%s)", cluster_op_version, conf->op_version, peerid); goto out; } @@ -383,7 +584,7 @@ out: } int -glusterd_mgmt_hndsk_versions (rpcsvc_request_t *req) +__glusterd_mgmt_hndsk_versions (rpcsvc_request_t *req) { dict_t *dict = NULL; xlator_t *this = NULL; @@ -459,7 +660,14 @@ out: } int -glusterd_mgmt_hndsk_versions_ack (rpcsvc_request_t *req) +glusterd_mgmt_hndsk_versions (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_mgmt_hndsk_versions); +} + +int +__glusterd_mgmt_hndsk_versions_ack (rpcsvc_request_t *req) { dict_t *clnt_dict = NULL; xlator_t *this = NULL; @@ -527,13 +735,17 @@ out: return ret; } +int +glusterd_mgmt_hndsk_versions_ack (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_mgmt_hndsk_versions_ack); +} rpcsvc_actor_t gluster_handshake_actors[] = { - [GF_HNDSK_NULL] = {"NULL", GF_HNDSK_NULL, NULL, NULL, 0}, - [GF_HNDSK_GETSPEC] = {"GETSPEC", GF_HNDSK_GETSPEC, - server_getspec, NULL, 0}, - [GF_HNDSK_EVENT_NOTIFY] = {"EVENTNOTIFY", GF_HNDSK_EVENT_NOTIFY, - server_event_notify, NULL, 0}, + [GF_HNDSK_NULL] = {"NULL", GF_HNDSK_NULL, NULL, NULL, 0, DRC_NA}, + [GF_HNDSK_GETSPEC] = {"GETSPEC", GF_HNDSK_GETSPEC, server_getspec, NULL, 0, DRC_NA}, + [GF_HNDSK_EVENT_NOTIFY] = {"EVENTNOTIFY", GF_HNDSK_EVENT_NOTIFY, server_event_notify, NULL, 0, DRC_NA}, }; @@ -545,6 +757,19 @@ struct rpcsvc_program gluster_handshake_prog = { .numactors = GF_HNDSK_MAXVALUE, }; +/* A minimal RPC program just for the cli getspec command */ +rpcsvc_actor_t gluster_cli_getspec_actors[] = { + [GF_HNDSK_GETSPEC] = {"GETSPEC", GF_HNDSK_GETSPEC, server_getspec, NULL, 0, DRC_NA}, +}; + +struct rpcsvc_program gluster_cli_getspec_prog = { + .progname = "Gluster Handshake (CLI Getspec)", + .prognum = GLUSTER_HNDSK_PROGRAM, + .progver = GLUSTER_HNDSK_VERSION, + .actors = gluster_cli_getspec_actors, + .numactors = GF_HNDSK_MAXVALUE, +}; + char *glusterd_dump_proc[GF_DUMP_MAXVALUE] = { [GF_DUMP_NULL] = "NULL", @@ -624,6 +849,7 @@ glusterd_event_connected_inject (glusterd_peerctx_t *peerctx) ctx->hostname = gf_strdup (peerinfo->hostname); ctx->port = peerinfo->port; ctx->req = peerctx->args.req; + ctx->dict = peerctx->args.dict; event->peerinfo = peerinfo; event->ctx = ctx; @@ -681,16 +907,6 @@ gd_validate_peer_op_version (xlator_t *this, glusterd_peerinfo_t *peerinfo, goto out; } - /* If peer is already operating at a higher op_version reject it. - * Cluster cannot be moved to higher op_version to accomodate a peer. - */ - if (peer_op_version > conf->op_version) { - ret = gf_asprintf (errstr, "Peer %s is already at a higher " - "op-version", peerinfo->hostname); - ret = -1; - goto out; - } - ret = 0; out: gf_log (this->name , GF_LOG_DEBUG, "Peer %s %s", peerinfo->hostname, @@ -699,7 +915,7 @@ out: } int -glusterd_mgmt_hndsk_version_ack_cbk (struct rpc_req *req, struct iovec *iov, +__glusterd_mgmt_hndsk_version_ack_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { int ret = -1; @@ -747,6 +963,7 @@ glusterd_mgmt_hndsk_version_ack_cbk (struct rpc_req *req, struct iovec *iov, */ peerinfo->mgmt = &gd_mgmt_prog; peerinfo->peer = &gd_peer_prog; + peerinfo->mgmt_v3 = &gd_mgmt_v3_prog; ret = default_notify (this, GF_EVENT_CHILD_UP, NULL); @@ -778,7 +995,15 @@ out: } int -glusterd_mgmt_hndsk_version_cbk (struct rpc_req *req, struct iovec *iov, +glusterd_mgmt_hndsk_version_ack_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + __glusterd_mgmt_hndsk_version_ack_cbk); +} + +int +__glusterd_mgmt_hndsk_version_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { int ret = -1; @@ -883,6 +1108,14 @@ out: } int +glusterd_mgmt_hndsk_version_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + __glusterd_mgmt_hndsk_version_cbk); +} + +int glusterd_mgmt_handshake (xlator_t *this, glusterd_peerctx_t *peerctx) { call_frame_t *frame = NULL; @@ -957,6 +1190,14 @@ glusterd_set_clnt_mgmt_program (glusterd_peerinfo_t *peerinfo, peerinfo->peer->progname, peerinfo->peer->prognum, peerinfo->peer->progver); } + + if (peerinfo->mgmt_v3) { + gf_log ("", GF_LOG_INFO, + "Using Program %s, Num (%d), Version (%d)", + peerinfo->mgmt_v3->progname, peerinfo->mgmt_v3->prognum, + peerinfo->mgmt_v3->progver); + } + ret = 0; out: return ret; @@ -985,7 +1226,7 @@ out: } int -glusterd_peer_dump_version_cbk (struct rpc_req *req, struct iovec *iov, +__glusterd_peer_dump_version_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { int ret = -1; @@ -1092,6 +1333,14 @@ out: int +glusterd_peer_dump_version_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + __glusterd_peer_dump_version_cbk); +} + +int glusterd_peer_dump_version (xlator_t *this, struct rpc_clnt *rpc, glusterd_peerctx_t *peerctx) { diff --git a/xlators/mgmt/glusterd/src/glusterd-hooks.c b/xlators/mgmt/glusterd/src/glusterd-hooks.c index a61e1e85f..2b43a452e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-hooks.c +++ b/xlators/mgmt/glusterd/src/glusterd-hooks.c @@ -49,6 +49,7 @@ char glusterd_hook_dirnames[GD_OP_MAX][256] = [GD_OP_RESET_VOLUME] = EMPTY, [GD_OP_SYNC_VOLUME] = EMPTY, [GD_OP_LOG_ROTATE] = EMPTY, + [GD_OP_GSYNC_CREATE] = "gsync-create", [GD_OP_GSYNC_SET] = EMPTY, [GD_OP_PROFILE_VOLUME] = EMPTY, [GD_OP_QUOTA] = EMPTY, @@ -185,6 +186,7 @@ static int glusterd_hooks_add_op_args (runner_t *runner, glusterd_op_t op, dict_t *op_ctx, glusterd_commit_hook_type_t type) { + char *hooks_args = NULL; int vol_count = 0; gf_boolean_t truth = _gf_false; glusterd_volinfo_t *voliter = NULL; @@ -236,6 +238,18 @@ glusterd_hooks_add_op_args (runner_t *runner, glusterd_op_t op, ret = glusterd_hooks_set_volume_args (op_ctx, runner); break; + case GD_OP_GSYNC_CREATE: + ret = dict_get_str (op_ctx, "hooks_args", &hooks_args); + if (ret) + gf_log ("", GF_LOG_DEBUG, + "No Hooks Arguments."); + else + gf_log ("", GF_LOG_DEBUG, + "Hooks Args = %s", hooks_args); + if (hooks_args) + runner_argprintf (runner, "%s", hooks_args); + break; + default: break; diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.c b/xlators/mgmt/glusterd/src/glusterd-locks.c new file mode 100644 index 000000000..0af2a186f --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-locks.c @@ -0,0 +1,637 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "common-utils.h" +#include "cli1-xdr.h" +#include "xdr-generic.h" +#include "glusterd.h" +#include "glusterd-op-sm.h" +#include "glusterd-store.h" +#include "glusterd-utils.h" +#include "glusterd-volgen.h" +#include "glusterd-locks.h" +#include "run.h" +#include "syscall.h" + +#include <signal.h> + +#define GF_MAX_LOCKING_ENTITIES 2 + +/* Valid entities that the mgmt_v3 lock can hold locks upon * + * To add newer entities to be locked, we can just add more * + * entries to this table along with the type and default value */ +valid_entities valid_types[] = { + { "vol", _gf_true }, + { "snap", _gf_false }, + { NULL }, +}; + +static dict_t *mgmt_v3_lock; + +/* Checks if the lock request is for a valid entity */ +gf_boolean_t +glusterd_mgmt_v3_is_type_valid (char *type) +{ + int32_t i = 0; + gf_boolean_t ret = _gf_false; + + GF_ASSERT (type); + + for (i = 0; valid_types[i].type; i++) { + if (!strcmp (type, valid_types[i].type)) { + ret = _gf_true; + break; + } + } + + return ret; +} + +/* Initialize the global mgmt_v3 lock list(dict) when + * glusterd is spawned */ +int32_t +glusterd_mgmt_v3_lock_init () +{ + int32_t ret = -1; + + mgmt_v3_lock = dict_new (); + if (!mgmt_v3_lock) { + ret = -1; + goto out; + } + + ret = 0; +out: + return ret; +} + +/* Destroy the global mgmt_v3 lock list(dict) when + * glusterd cleanup is performed */ +void +glusterd_mgmt_v3_lock_fini () +{ + if (mgmt_v3_lock) + dict_destroy (mgmt_v3_lock); +} + +int32_t +glusterd_get_mgmt_v3_lock_owner (char *key, uuid_t *uuid) +{ + int32_t ret = -1; + mgmt_v3_lock_obj *lock_obj = NULL; + uuid_t no_owner = {"\0"}; + xlator_t *this = NULL; + + GF_ASSERT(THIS); + this = THIS; + + if (!key || !uuid) { + gf_log (this->name, GF_LOG_ERROR, "key or uuid is null."); + ret = -1; + goto out; + } + + ret = dict_get_bin(mgmt_v3_lock, key, (void **) &lock_obj); + if (!ret) + uuid_copy (*uuid, lock_obj->lock_owner); + else + uuid_copy (*uuid, no_owner); + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* This function is called with the locked_count and type, to * + * release all the acquired locks. */ +static int32_t +glusterd_release_multiple_locks_per_entity (dict_t *dict, uuid_t uuid, + int32_t locked_count, + char *type) +{ + char name_buf[PATH_MAX] = ""; + char *name = NULL; + int32_t i = -1; + int32_t op_ret = 0; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT (dict); + GF_ASSERT (type); + + if (locked_count == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "No %s locked as part of this transaction", + type); + goto out; + } + + /* Release all the locks held */ + for (i = 0; i < locked_count; i++) { + snprintf (name_buf, sizeof(name_buf), + "%sname%d", type, i+1); + + /* Looking for volname1, volname2 or snapname1, * + * as key in the dict snapname2 */ + ret = dict_get_str (dict, name_buf, &name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to get %s locked_count = %d", + name_buf, locked_count); + op_ret = ret; + continue; + } + + ret = glusterd_mgmt_v3_unlock (name, uuid, type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to release lock for %s.", + name); + op_ret = ret; + } + } + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", op_ret); + return op_ret; +} + +/* Given the count and type of the entity this function acquires * + * locks on multiple elements of the same entity. For example: * + * If type is "vol" this function tries to acquire locks on multiple * + * volumes */ +static int32_t +glusterd_acquire_multiple_locks_per_entity (dict_t *dict, uuid_t uuid, + int32_t count, char *type) +{ + char name_buf[PATH_MAX] = ""; + char *name = NULL; + int32_t i = -1; + int32_t ret = -1; + int32_t locked_count = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT (dict); + GF_ASSERT (type); + + /* Locking one element after other */ + for (i = 0; i < count; i++) { + snprintf (name_buf, sizeof(name_buf), + "%sname%d", type, i+1); + + /* Looking for volname1, volname2 or snapname1, * + * as key in the dict snapname2 */ + ret = dict_get_str (dict, name_buf, &name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to get %s count = %d", + name_buf, count); + break; + } + + ret = glusterd_mgmt_v3_lock (name, uuid, type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to acquire lock for %s %s " + "on behalf of %s. Reversing " + "this transaction", type, name, + uuid_utoa(uuid)); + break; + } + locked_count++; + } + + if (count == locked_count) { + /* If all locking ops went successfuly, return as success */ + ret = 0; + goto out; + } + + /* If we failed to lock one element, unlock others and return failure */ + ret = glusterd_release_multiple_locks_per_entity (dict, uuid, + locked_count, + type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to release multiple %s locks", + type); + } + ret = -1; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Given the type of entity, this function figures out if it should unlock a * + * single element of multiple elements of the said entity. For example: * + * if the type is "vol", this function will accordingly unlock a single volume * + * or multiple volumes */ +static int32_t +glusterd_mgmt_v3_unlock_entity (dict_t *dict, uuid_t uuid, char *type, + gf_boolean_t default_value) +{ + char name_buf[PATH_MAX] = ""; + char *name = NULL; + int32_t count = -1; + int32_t ret = -1; + gf_boolean_t hold_locks = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT (dict); + GF_ASSERT (type); + + snprintf (name_buf, sizeof(name_buf), "hold_%s_locks", type); + hold_locks = dict_get_str_boolean (dict, name_buf, default_value); + + if (hold_locks == _gf_false) { + /* Locks were not held for this particular entity * + * Hence nothing to release */ + ret = 0; + goto out; + } + + /* Looking for volcount or snapcount in the dict */ + snprintf (name_buf, sizeof(name_buf), "%scount", type); + ret = dict_get_int32 (dict, name_buf, &count); + if (ret) { + /* count is not present. Only one * + * element name needs to be unlocked */ + snprintf (name_buf, sizeof(name_buf), "%sname", + type); + ret = dict_get_str (dict, name_buf, &name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch %sname", type); + goto out; + } + + ret = glusterd_mgmt_v3_unlock (name, uuid, type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to release lock for %s %s " + "on behalf of %s.", type, name, + uuid_utoa(uuid)); + goto out; + } + } else { + /* Unlocking one element name after another */ + ret = glusterd_release_multiple_locks_per_entity (dict, + uuid, + count, + type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to release all %s locks", type); + goto out; + } + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Given the type of entity, this function figures out if it should lock a * + * single element or multiple elements of the said entity. For example: * + * if the type is "vol", this function will accordingly lock a single volume * + * or multiple volumes */ +static int32_t +glusterd_mgmt_v3_lock_entity (dict_t *dict, uuid_t uuid, char *type, + gf_boolean_t default_value) +{ + char name_buf[PATH_MAX] = ""; + char *name = NULL; + int32_t count = -1; + int32_t ret = -1; + gf_boolean_t hold_locks = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT (dict); + GF_ASSERT (type); + + snprintf (name_buf, sizeof(name_buf), "hold_%s_locks", type); + hold_locks = dict_get_str_boolean (dict, name_buf, default_value); + + if (hold_locks == _gf_false) { + /* Not holding locks for this particular entity */ + ret = 0; + goto out; + } + + /* Looking for volcount or snapcount in the dict */ + snprintf (name_buf, sizeof(name_buf), "%scount", type); + ret = dict_get_int32 (dict, name_buf, &count); + if (ret) { + /* count is not present. Only one * + * element name needs to be locked */ + snprintf (name_buf, sizeof(name_buf), "%sname", + type); + ret = dict_get_str (dict, name_buf, &name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch %sname", type); + goto out; + } + + ret = glusterd_mgmt_v3_lock (name, uuid, type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to acquire lock for %s %s " + "on behalf of %s.", type, name, + uuid_utoa(uuid)); + goto out; + } + } else { + /* Locking one element name after another */ + ret = glusterd_acquire_multiple_locks_per_entity (dict, + uuid, + count, + type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to acquire all %s locks", type); + goto out; + } + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Try to release locks of multiple entities like * + * volume, snaps etc. */ +int32_t +glusterd_multiple_mgmt_v3_unlock (dict_t *dict, uuid_t uuid) +{ + int32_t i = -1; + int32_t ret = -1; + int32_t op_ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, "dict is null."); + ret = -1; + goto out; + } + + for (i = 0; valid_types[i].type; i++) { + ret = glusterd_mgmt_v3_unlock_entity + (dict, uuid, + valid_types[i].type, + valid_types[i].default_value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to unlock all %s", + valid_types[i].type); + op_ret = ret; + } + } + + ret = op_ret; +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +/* Try to acquire locks on multiple entities like * + * volume, snaps etc. */ +int32_t +glusterd_multiple_mgmt_v3_lock (dict_t *dict, uuid_t uuid) +{ + int32_t i = -1; + int32_t ret = -1; + int32_t locked_count = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, "dict is null."); + ret = -1; + goto out; + } + + /* Locking one entity after other */ + for (i = 0; valid_types[i].type; i++) { + ret = glusterd_mgmt_v3_lock_entity + (dict, uuid, + valid_types[i].type, + valid_types[i].default_value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to lock all %s", + valid_types[i].type); + break; + } + locked_count++; + } + + if (locked_count == GF_MAX_LOCKING_ENTITIES) { + /* If all locking ops went successfuly, return as success */ + ret = 0; + goto out; + } + + /* If we failed to lock one entity, unlock others and return failure */ + for (i = 0; i < locked_count; i++) { + ret = glusterd_mgmt_v3_unlock_entity + (dict, uuid, + valid_types[i].type, + valid_types[i].default_value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to unlock all %s", + valid_types[i].type); + } + } + ret = -1; +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_mgmt_v3_lock (const char *name, uuid_t uuid, char *type) +{ + char key[PATH_MAX] = ""; + int32_t ret = -1; + mgmt_v3_lock_obj *lock_obj = NULL; + gf_boolean_t is_valid = _gf_true; + uuid_t owner = {0}; + xlator_t *this = NULL; + + GF_ASSERT(THIS); + this = THIS; + + if (!name || !type) { + gf_log (this->name, GF_LOG_ERROR, "name or type is null."); + ret = -1; + goto out; + } + + is_valid = glusterd_mgmt_v3_is_type_valid (type); + if (is_valid != _gf_true) { + gf_log (this->name, GF_LOG_ERROR, + "Invalid entity. Cannot perform locking " + "operation on %s types", type); + ret = -1; + goto out; + } + + ret = snprintf (key, sizeof(key), "%s_%s", name, type); + if (ret != strlen(name) + 1 + strlen(type)) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Unable to create key"); + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, + "Trying to acquire lock of %s %s for %s as %s", + type, name, uuid_utoa (uuid), key); + + ret = glusterd_get_mgmt_v3_lock_owner (key, &owner); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Unable to get mgmt_v3 lock owner"); + goto out; + } + + /* If the lock has already been held for the given volume + * we fail */ + if (!uuid_is_null (owner)) { + gf_log (this->name, GF_LOG_ERROR, "Lock for %s held by %s", + name, uuid_utoa (owner)); + ret = -1; + goto out; + } + + lock_obj = GF_CALLOC (1, sizeof(mgmt_v3_lock_obj), + gf_common_mt_mgmt_v3_lock_obj_t); + if (!lock_obj) { + ret = -1; + goto out; + } + + uuid_copy (lock_obj->lock_owner, uuid); + + ret = dict_set_bin (mgmt_v3_lock, key, lock_obj, + sizeof(*lock_obj)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set lock owner in mgmt_v3 lock"); + if (lock_obj) + GF_FREE (lock_obj); + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, + "Lock for %s %s successfully held by %s", + type, name, uuid_utoa (uuid)); + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_mgmt_v3_unlock (const char *name, uuid_t uuid, char *type) +{ + char key[PATH_MAX] = ""; + int32_t ret = -1; + gf_boolean_t is_valid = _gf_true; + uuid_t owner = {0}; + xlator_t *this = NULL; + + GF_ASSERT(THIS); + this = THIS; + + if (!name || !type) { + gf_log (this->name, GF_LOG_ERROR, "name is null."); + ret = -1; + goto out; + } + + is_valid = glusterd_mgmt_v3_is_type_valid (type); + if (is_valid != _gf_true) { + gf_log (this->name, GF_LOG_ERROR, + "Invalid entity. Cannot perform unlocking " + "operation on %s types", type); + ret = -1; + goto out; + } + + ret = snprintf (key, sizeof(key), "%s_%s", + name, type); + if (ret != strlen(name) + 1 + strlen(type)) { + gf_log (this->name, GF_LOG_ERROR, "Unable to create key"); + ret = -1; + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, + "Trying to release lock of %s %s for %s as %s", + type, name, uuid_utoa (uuid), key); + + ret = glusterd_get_mgmt_v3_lock_owner (key, &owner); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Unable to get mgmt_v3 lock owner"); + goto out; + } + + if (uuid_is_null (owner)) { + gf_log (this->name, GF_LOG_ERROR, + "Lock for %s %s not held", type, name); + ret = -1; + goto out; + } + + ret = uuid_compare (uuid, owner); + if (ret) { + + gf_log (this->name, GF_LOG_ERROR, "Lock owner mismatch. " + "Lock for %s %s held by %s", + type, name, uuid_utoa (owner)); + goto out; + } + + /* Removing the mgmt_v3 lock from the global list */ + dict_del (mgmt_v3_lock, key); + + gf_log (this->name, GF_LOG_DEBUG, + "Lock for %s %s successfully released", + type, name); + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.h b/xlators/mgmt/glusterd/src/glusterd-locks.h new file mode 100644 index 000000000..83eb8c997 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-locks.h @@ -0,0 +1,51 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _GLUSTERD_LOCKS_H_ +#define _GLUSTERD_LOCKS_H_ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +typedef struct mgmt_v3_lock_object_ { + uuid_t lock_owner; +} mgmt_v3_lock_obj; + +typedef struct mgmt_v3_lock_valid_entities { + char *type; /* Entity type like vol, snap */ + gf_boolean_t default_value; /* The default value that * + * determines if the locks * + * should be held for that * + * entity */ +} valid_entities; + +int32_t +glusterd_mgmt_v3_lock_init (); + +void +glusterd_mgmt_v3_lock_fini (); + +int32_t +glusterd_get_mgmt_v3_lock_owner (char *volname, uuid_t *uuid); + +int32_t +glusterd_mgmt_v3_lock (const char *key, uuid_t uuid, char *type); + +int32_t +glusterd_mgmt_v3_unlock (const char *key, uuid_t uuid, char *type); + +int32_t +glusterd_multiple_mgmt_v3_lock (dict_t *dict, uuid_t uuid); + +int32_t +glusterd_multiple_mgmt_v3_unlock (dict_t *dict, uuid_t uuid); + +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-log-ops.c b/xlators/mgmt/glusterd/src/glusterd-log-ops.c index bd07af5b0..33bd95c03 100644 --- a/xlators/mgmt/glusterd/src/glusterd-log-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-log-ops.c @@ -24,7 +24,7 @@ #include <signal.h> int -glusterd_handle_log_rotate (rpcsvc_request_t *req) +__glusterd_handle_log_rotate (rpcsvc_request_t *req) { int32_t ret = -1; gf_cli_req cli_req = {{0,}}; @@ -90,6 +90,13 @@ out: return ret; } +int +glusterd_handle_log_rotate (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_log_rotate); +} + /* op-sm */ int glusterd_op_stage_log_rotate (dict_t *dict, char **op_errstr) @@ -160,7 +167,6 @@ glusterd_op_log_rotate (dict_t *dict) xlator_t *this = NULL; char *volname = NULL; char *brick = NULL; - char path[PATH_MAX] = {0,}; char logfile[PATH_MAX] = {0,}; char pidfile[PATH_MAX] = {0,}; FILE *file = NULL; @@ -216,10 +222,7 @@ cont: valid_brick = 1; - GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); - GLUSTERD_GET_BRICK_PIDFILE (pidfile, path, brickinfo->hostname, - brickinfo->path); - + GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv); file = fopen (pidfile, "r+"); if (!file) { gf_log ("", GF_LOG_ERROR, "Unable to open pidfile: %s", diff --git a/xlators/mgmt/glusterd/src/glusterd-mem-types.h b/xlators/mgmt/glusterd/src/glusterd-mem-types.h index 98216e28a..e6f6a0333 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mem-types.h +++ b/xlators/mgmt/glusterd/src/glusterd-mem-types.h @@ -66,7 +66,10 @@ typedef enum gf_gld_mem_types_ { gf_gld_mt_hooks_stub_t = gf_common_mt_end + 50, gf_gld_mt_hooks_priv_t = gf_common_mt_end + 51, gf_gld_mt_mop_commit_req_t = gf_common_mt_end + 52, - gf_gld_mt_end = gf_common_mt_end + 53, + gf_gld_mt_int = gf_common_mt_end + 53, + gf_gld_mt_snap_t = gf_common_mt_end + 54, + gf_gld_mt_missed_snapinfo_t = gf_common_mt_end + 55, + gf_gld_mt_end = gf_common_mt_end + 56, } gf_gld_mem_types_t; #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c new file mode 100644 index 000000000..a5f38ce9c --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c @@ -0,0 +1,924 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +/* rpc related syncops */ +#include "rpc-clnt.h" +#include "protocol-common.h" +#include "xdr-generic.h" +#include "glusterd1-xdr.h" +#include "glusterd-syncop.h" + +#include "glusterd.h" +#include "glusterd-utils.h" +#include "glusterd-locks.h" +#include "glusterd-mgmt.h" +#include "glusterd-op-sm.h" + +static int +glusterd_mgmt_v3_null (rpcsvc_request_t *req) +{ + return 0; +} + +static int +glusterd_mgmt_v3_lock_send_resp (rpcsvc_request_t *req, int32_t status) +{ + + gd1_mgmt_v3_lock_rsp rsp = {{0},}; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + rsp.op_ret = status; + if (rsp.op_ret) + rsp.op_errno = errno; + + glusterd_get_uuid (&rsp.uuid); + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp); + + gf_log (this->name, GF_LOG_DEBUG, + "Responded to mgmt_v3 lock, ret: %d", ret); + + return ret; +} + +static int +glusterd_synctasked_mgmt_v3_lock (rpcsvc_request_t *req, + gd1_mgmt_v3_lock_req *lock_req, + glusterd_op_lock_ctx_t *ctx) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (ctx); + GF_ASSERT (ctx->dict); + + /* Trying to acquire multiple mgmt_v3 locks */ + ret = glusterd_multiple_mgmt_v3_lock (ctx->dict, ctx->uuid); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to acquire mgmt_v3 locks for %s", + uuid_utoa (ctx->uuid)); + + ret = glusterd_mgmt_v3_lock_send_resp (req, ret); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_op_state_machine_mgmt_v3_lock (rpcsvc_request_t *req, + gd1_mgmt_v3_lock_req *lock_req, + glusterd_op_lock_ctx_t *ctx) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_op_info_t txn_op_info = {{0},}; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + glusterd_txn_opinfo_init (&txn_op_info, NULL, &lock_req->op, + ctx->dict, req); + + ret = glusterd_set_txn_opinfo (&lock_req->txn_id, &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set transaction's opinfo"); + goto out; + } + + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_LOCK, + &lock_req->txn_id, ctx); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to inject event GD_OP_EVENT_LOCK"); + +out: + glusterd_friend_sm (); + glusterd_op_sm (); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_handle_mgmt_v3_lock_fn (rpcsvc_request_t *req) +{ + gd1_mgmt_v3_lock_req lock_req = {{0},}; + int32_t ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_op_lock_ctx_t *ctx = NULL; + xlator_t *this = NULL; + gf_boolean_t is_synctasked = _gf_false; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &lock_req, + (xdrproc_t)xdr_gd1_mgmt_v3_lock_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode lock " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "Received mgmt_v3 lock req " + "from uuid: %s", uuid_utoa (lock_req.uuid)); + + if (glusterd_friend_find_by_uuid (lock_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (lock_req.uuid)); + ret = -1; + goto out; + } + + ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t); + if (!ctx) { + ret = -1; + goto out; + } + + uuid_copy (ctx->uuid, lock_req.uuid); + ctx->req = req; + + ctx->dict = dict_new (); + if (!ctx->dict) { + ret = -1; + goto out; + } + + ret = dict_unserialize (lock_req.dict.dict_val, + lock_req.dict.dict_len, &ctx->dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + is_synctasked = dict_get_str_boolean (ctx->dict, + "is_synctasked", _gf_false); + if (is_synctasked) + ret = glusterd_synctasked_mgmt_v3_lock (req, &lock_req, ctx); + else + ret = glusterd_op_state_machine_mgmt_v3_lock (req, &lock_req, + ctx); + +out: + + if (ret) { + if (ctx->dict) + dict_unref (ctx->dict); + if (ctx) + GF_FREE (ctx); + } + + free (lock_req.dict.dict_val); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_mgmt_v3_pre_validate_send_resp (rpcsvc_request_t *req, + int32_t op, int32_t status, + char *op_errstr, dict_t *rsp_dict) +{ + gd1_mgmt_v3_pre_val_rsp rsp = {{0},}; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + rsp.op_ret = status; + glusterd_get_uuid (&rsp.uuid); + rsp.op = op; + if (op_errstr) + rsp.op_errstr = op_errstr; + else + rsp.op_errstr = ""; + + ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get serialized length of dict"); + goto out; + } + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_pre_val_rsp); + + GF_FREE (rsp.dict.dict_val); +out: + gf_log (this->name, GF_LOG_DEBUG, + "Responded to pre validation, ret: %d", ret); + return ret; +} + +static int +glusterd_handle_pre_validate_fn (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gd1_mgmt_v3_pre_val_req op_req = {{0},}; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + char *op_errstr = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &op_req, + (xdrproc_t)xdr_gd1_mgmt_v3_pre_val_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to decode pre validation " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (glusterd_friend_find_by_uuid (op_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (op_req.uuid)); + ret = -1; + goto out; + } + + dict = dict_new (); + if (!dict) + goto out; + + ret = dict_unserialize (op_req.dict.dict_val, + op_req.dict.dict_len, &dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get new dictionary"); + return -1; + } + + ret = gd_mgmt_v3_pre_validate_fn (op_req.op, dict, &op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Pre Validation failed on operation %s", + gd_op_list[op_req.op]); + } + + ret = glusterd_mgmt_v3_pre_validate_send_resp (req, op_req.op, + ret, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to send Pre Validation " + "response for operation %s", + gd_op_list[op_req.op]); + goto out; + } + +out: + if (op_errstr && (strcmp (op_errstr, ""))) + GF_FREE (op_errstr); + + free (op_req.dict.dict_val); + + if (dict) + dict_unref (dict); + + if (rsp_dict) + dict_unref (rsp_dict); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_mgmt_v3_brick_op_send_resp (rpcsvc_request_t *req, + int32_t op, int32_t status, + char *op_errstr, dict_t *rsp_dict) +{ + gd1_mgmt_v3_brick_op_rsp rsp = {{0},}; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + rsp.op_ret = status; + glusterd_get_uuid (&rsp.uuid); + rsp.op = op; + if (op_errstr) + rsp.op_errstr = op_errstr; + else + rsp.op_errstr = ""; + + ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get serialized length of dict"); + goto out; + } + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_brick_op_rsp); + + GF_FREE (rsp.dict.dict_val); +out: + gf_log (this->name, GF_LOG_DEBUG, + "Responded to brick op, ret: %d", ret); + return ret; +} + +static int +glusterd_handle_brick_op_fn (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gd1_mgmt_v3_brick_op_req op_req = {{0},}; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + char *op_errstr = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &op_req, + (xdrproc_t)xdr_gd1_mgmt_v3_brick_op_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode brick op " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (glusterd_friend_find_by_uuid (op_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (op_req.uuid)); + ret = -1; + goto out; + } + + dict = dict_new (); + if (!dict) + goto out; + + ret = dict_unserialize (op_req.dict.dict_val, + op_req.dict.dict_len, &dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get new dictionary"); + return -1; + } + + ret = gd_mgmt_v3_brick_op_fn (op_req.op, dict, &op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Brick Op failed on operation %s", + gd_op_list[op_req.op]); + } + + ret = glusterd_mgmt_v3_brick_op_send_resp (req, op_req.op, + ret, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to send brick op " + "response for operation %s", + gd_op_list[op_req.op]); + goto out; + } + +out: + if (op_errstr && (strcmp (op_errstr, ""))) + GF_FREE (op_errstr); + + free (op_req.dict.dict_val); + + if (dict) + dict_unref (dict); + + if (rsp_dict) + dict_unref (rsp_dict); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_mgmt_v3_commit_send_resp (rpcsvc_request_t *req, + int32_t op, int32_t status, + char *op_errstr, dict_t *rsp_dict) +{ + gd1_mgmt_v3_commit_rsp rsp = {{0},}; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + rsp.op_ret = status; + glusterd_get_uuid (&rsp.uuid); + rsp.op = op; + if (op_errstr) + rsp.op_errstr = op_errstr; + else + rsp.op_errstr = ""; + + ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get serialized length of dict"); + goto out; + } + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_commit_rsp); + + GF_FREE (rsp.dict.dict_val); +out: + gf_log (this->name, GF_LOG_DEBUG, "Responded to commit, ret: %d", ret); + return ret; +} + +static int +glusterd_handle_commit_fn (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gd1_mgmt_v3_commit_req op_req = {{0},}; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + char *op_errstr = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &op_req, + (xdrproc_t)xdr_gd1_mgmt_v3_commit_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode commit " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (glusterd_friend_find_by_uuid (op_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (op_req.uuid)); + ret = -1; + goto out; + } + + dict = dict_new (); + if (!dict) + goto out; + + ret = dict_unserialize (op_req.dict.dict_val, + op_req.dict.dict_len, &dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get new dictionary"); + return -1; + } + + ret = gd_mgmt_v3_commit_fn (op_req.op, dict, &op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "commit failed on operation %s", + gd_op_list[op_req.op]); + } + + ret = glusterd_mgmt_v3_commit_send_resp (req, op_req.op, + ret, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to send commit " + "response for operation %s", + gd_op_list[op_req.op]); + goto out; + } + +out: + if (op_errstr && (strcmp (op_errstr, ""))) + GF_FREE (op_errstr); + + free (op_req.dict.dict_val); + + if (dict) + dict_unref (dict); + + if (rsp_dict) + dict_unref (rsp_dict); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_mgmt_v3_post_validate_send_resp (rpcsvc_request_t *req, + int32_t op, int32_t status, + char *op_errstr, dict_t *rsp_dict) +{ + gd1_mgmt_v3_post_val_rsp rsp = {{0},}; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + rsp.op_ret = status; + glusterd_get_uuid (&rsp.uuid); + rsp.op = op; + if (op_errstr) + rsp.op_errstr = op_errstr; + else + rsp.op_errstr = ""; + + ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get serialized length of dict"); + goto out; + } + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_post_val_rsp); + + GF_FREE (rsp.dict.dict_val); +out: + gf_log (this->name, GF_LOG_DEBUG, + "Responded to post validation, ret: %d", ret); + return ret; +} + +static int +glusterd_handle_post_validate_fn (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gd1_mgmt_v3_post_val_req op_req = {{0},}; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + char *op_errstr = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &op_req, + (xdrproc_t)xdr_gd1_mgmt_v3_post_val_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to decode post validation " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (glusterd_friend_find_by_uuid (op_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (op_req.uuid)); + ret = -1; + goto out; + } + + dict = dict_new (); + if (!dict) + goto out; + + ret = dict_unserialize (op_req.dict.dict_val, + op_req.dict.dict_len, &dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get new dictionary"); + return -1; + } + + ret = gd_mgmt_v3_post_validate_fn (op_req.op, op_req.op_ret, dict, + &op_errstr, rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Post Validation failed on operation %s", + gd_op_list[op_req.op]); + } + + ret = glusterd_mgmt_v3_post_validate_send_resp (req, op_req.op, + ret, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to send Post Validation " + "response for operation %s", + gd_op_list[op_req.op]); + goto out; + } + +out: + if (op_errstr && (strcmp (op_errstr, ""))) + GF_FREE (op_errstr); + + free (op_req.dict.dict_val); + + if (dict) + dict_unref (dict); + + if (rsp_dict) + dict_unref (rsp_dict); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_mgmt_v3_unlock_send_resp (rpcsvc_request_t *req, int32_t status) +{ + + gd1_mgmt_v3_unlock_rsp rsp = {{0},}; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + rsp.op_ret = status; + if (rsp.op_ret) + rsp.op_errno = errno; + + glusterd_get_uuid (&rsp.uuid); + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp); + + gf_log (this->name, GF_LOG_DEBUG, + "Responded to mgmt_v3 unlock, ret: %d", ret); + + return ret; +} + +static int +glusterd_synctasked_mgmt_v3_unlock (rpcsvc_request_t *req, + gd1_mgmt_v3_unlock_req *unlock_req, + glusterd_op_lock_ctx_t *ctx) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (ctx); + + /* Trying to release multiple mgmt_v3 locks */ + ret = glusterd_multiple_mgmt_v3_unlock (ctx->dict, ctx->uuid); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to release mgmt_v3 locks for %s", + uuid_utoa(ctx->uuid)); + } + + ret = glusterd_mgmt_v3_unlock_send_resp (req, ret); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + + +static int +glusterd_op_state_machine_mgmt_v3_unlock (rpcsvc_request_t *req, + gd1_mgmt_v3_unlock_req *lock_req, + glusterd_op_lock_ctx_t *ctx) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_UNLOCK, + &lock_req->txn_id, ctx); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to inject event GD_OP_EVENT_UNLOCK"); + + glusterd_friend_sm (); + glusterd_op_sm (); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_handle_mgmt_v3_unlock_fn (rpcsvc_request_t *req) +{ + gd1_mgmt_v3_unlock_req lock_req = {{0},}; + int32_t ret = -1; + glusterd_op_lock_ctx_t *ctx = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + gf_boolean_t is_synctasked = _gf_false; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &lock_req, + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode unlock " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "Received mgmt_v3 unlock req " + "from uuid: %s", uuid_utoa (lock_req.uuid)); + + if (glusterd_friend_find_by_uuid (lock_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (lock_req.uuid)); + ret = -1; + goto out; + } + + ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t); + if (!ctx) { + ret = -1; + goto out; + } + + uuid_copy (ctx->uuid, lock_req.uuid); + ctx->req = req; + + ctx->dict = dict_new (); + if (!ctx->dict) { + ret = -1; + goto out; + } + + ret = dict_unserialize (lock_req.dict.dict_val, + lock_req.dict.dict_len, &ctx->dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + is_synctasked = dict_get_str_boolean (ctx->dict, + "is_synctasked", _gf_false); + if (is_synctasked) + ret = glusterd_synctasked_mgmt_v3_unlock (req, &lock_req, ctx); + else + ret = glusterd_op_state_machine_mgmt_v3_unlock (req, &lock_req, + ctx); + +out: + + if (ret) { + if (ctx->dict) + dict_unref (ctx->dict); + if (ctx) + GF_FREE (ctx); + } + + free (lock_req.dict.dict_val); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_handle_mgmt_v3_lock (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_mgmt_v3_lock_fn); +} + +static int +glusterd_handle_pre_validate (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_pre_validate_fn); +} + +static int +glusterd_handle_brick_op (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_brick_op_fn); +} + +static int +glusterd_handle_commit (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_commit_fn); +} + +static int +glusterd_handle_post_validate (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_post_validate_fn); +} + +int +glusterd_handle_mgmt_v3_unlock (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_mgmt_v3_unlock_fn); +} + +rpcsvc_actor_t gd_svc_mgmt_v3_actors[] = { + [GLUSTERD_MGMT_V3_NULL] = { "NULL", GLUSTERD_MGMT_V3_NULL, glusterd_mgmt_v3_null, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_LOCK] = { "MGMT_V3_LOCK", GLUSTERD_MGMT_V3_LOCK, glusterd_handle_mgmt_v3_lock, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_PRE_VALIDATE] = { "PRE_VAL", GLUSTERD_MGMT_V3_PRE_VALIDATE, glusterd_handle_pre_validate, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_BRICK_OP] = { "BRCK_OP", GLUSTERD_MGMT_V3_BRICK_OP, glusterd_handle_brick_op, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_COMMIT] = { "COMMIT", GLUSTERD_MGMT_V3_COMMIT, glusterd_handle_commit, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_POST_VALIDATE] = { "POST_VAL", GLUSTERD_MGMT_V3_POST_VALIDATE, glusterd_handle_post_validate, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_UNLOCK] = { "MGMT_V3_UNLOCK", GLUSTERD_MGMT_V3_UNLOCK, glusterd_handle_mgmt_v3_unlock, NULL, 0, DRC_NA}, +}; + +struct rpcsvc_program gd_svc_mgmt_v3_prog = { + .progname = "GlusterD svc mgmt v3", + .prognum = GD_MGMT_V3_PROGRAM, + .progver = GD_MGMT_V3_VERSION, + .numactors = GLUSTERD_MGMT_V3_MAXVALUE, + .actors = gd_svc_mgmt_v3_actors, + .synctask = _gf_true, +}; diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c new file mode 100644 index 000000000..d52532e54 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c @@ -0,0 +1,1893 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +/* rpc related syncops */ +#include "rpc-clnt.h" +#include "protocol-common.h" +#include "xdr-generic.h" +#include "glusterd1-xdr.h" +#include "glusterd-syncop.h" + +#include "glusterd.h" +#include "glusterd-utils.h" +#include "glusterd-locks.h" +#include "glusterd-mgmt.h" +#include "glusterd-op-sm.h" + +extern struct rpc_clnt_program gd_mgmt_v3_prog; + + +static void +gd_mgmt_v3_collate_errors (struct syncargs *args, int op_ret, int op_errno, + char *op_errstr, int op_code, + glusterd_peerinfo_t *peerinfo, u_char *uuid) +{ + char *peer_str = NULL; + char err_str[PATH_MAX] = "Please check log file for details."; + char op_err[PATH_MAX] = ""; + int32_t len = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (args); + GF_ASSERT (uuid); + + if (op_ret) { + args->op_ret = op_ret; + args->op_errno = op_errno; + + if (peerinfo) + peer_str = peerinfo->hostname; + else + peer_str = uuid_utoa (uuid); + + if (op_errstr && strcmp (op_errstr, "")) { + len = snprintf (err_str, sizeof(err_str) - 1, + "Error: %s", op_errstr); + err_str[len] = '\0'; + } + + switch (op_code){ + case GLUSTERD_MGMT_V3_LOCK: + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Locking failed " + "on %s. %s", peer_str, err_str); + break; + } + case GLUSTERD_MGMT_V3_PRE_VALIDATE: + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Pre Validation failed " + "on %s. %s", peer_str, err_str); + break; + } + case GLUSTERD_MGMT_V3_BRICK_OP: + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Brick ops failed " + "on %s. %s", peer_str, err_str); + break; + } + case GLUSTERD_MGMT_V3_COMMIT: + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Commit failed on %s. %s", + peer_str, err_str); + break; + } + case GLUSTERD_MGMT_V3_POST_VALIDATE: + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Post Validation failed " + "on %s. %s", peer_str, err_str); + break; + } + case GLUSTERD_MGMT_V3_UNLOCK: + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Unlocking failed " + "on %s. %s", peer_str, err_str); + break; + } + } + op_err[len] = '\0'; + + if (args->errstr) { + len = snprintf (err_str, sizeof(err_str) - 1, + "%s\n%s", args->errstr, + op_err); + GF_FREE (args->errstr); + args->errstr = NULL; + } else + len = snprintf (err_str, sizeof(err_str) - 1, + "%s", op_err); + err_str[len] = '\0'; + + gf_log (this->name, GF_LOG_ERROR, "%s", op_err); + args->errstr = gf_strdup (err_str); + } + + return; +} + +int32_t +gd_mgmt_v3_pre_validate_fn (glusterd_op_t op, dict_t *dict, + char **op_errstr, dict_t *rsp_dict) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + + switch (op) { + case GD_OP_SNAP: + { + ret = glusterd_snapshot_prevalidate (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Snapshot Prevalidate Failed"); + goto out; + } + + break; + } + default: + break; + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_DEBUG, "OP = %d. Returning %d", op, ret); + return ret; +} + +int32_t +gd_mgmt_v3_brick_op_fn (glusterd_op_t op, dict_t *dict, + char **op_errstr, dict_t *rsp_dict) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + + switch (op) { + case GD_OP_SNAP: + { + ret = glusterd_snapshot_brickop (dict, op_errstr, rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "snapshot brickop " + "failed"); + goto out; + } + break; + } + default: + break; + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "OP = %d. Returning %d", op, ret); + return ret; +} + +int32_t +gd_mgmt_v3_commit_fn (glusterd_op_t op, dict_t *dict, + char **op_errstr, dict_t *rsp_dict) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + + switch (op) { + case GD_OP_SNAP: + { + ret = glusterd_snapshot (dict, op_errstr, rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Snapshot Commit Failed"); + goto out; + } + break; + } + default: + break; + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_DEBUG, "OP = %d. Returning %d", op, ret); + return ret; +} + +int32_t +gd_mgmt_v3_post_validate_fn (glusterd_op_t op, int32_t op_ret, dict_t *dict, + char **op_errstr, dict_t *rsp_dict) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + + switch (op) { + case GD_OP_SNAP: + { + ret = glusterd_snapshot_postvalidate (dict, op_ret, + op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "postvalidate operation failed"); + goto out; + } + break; + } + default: + break; + } + + ret = 0; + +out: + gf_log (this->name, GF_LOG_TRACE, "OP = %d. Returning %d", op, ret); + return ret; +} + +int32_t +gd_mgmt_v3_lock_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_v3_lock_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (myframe); + + /* Even though the lock command has failed, while collating the errors + (gd_mgmt_v3_collate_errors), args->op_ret and args->op_errno will be + used. @args is obtained from frame->local. So before checking the + status of the request and going out if its a failure, args should be + set to frame->local. Otherwise, while collating args will be NULL. + This applies to other phases such as prevalidate, brickop, commit and + postvalidate also. + */ + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + if (!iov) { + gf_log (this->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp); + if (ret < 0) + goto out; + + uuid_copy (args->uuid, rsp.uuid); + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + +out: + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_LOCK, + peerinfo, rsp.uuid); + free (rsp.dict.dict_val); + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_lock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + gd_mgmt_v3_lock_cbk_fn); +} + +int +gd_mgmt_v3_lock (glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid) +{ + gd1_mgmt_v3_lock_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (op_ctx); + GF_ASSERT (peerinfo); + GF_ASSERT (args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + req.op = op; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_LOCK, + gd_mgmt_v3_lock_cbk, + (xdrproc_t) xdr_gd1_mgmt_v3_lock_req); + synclock_lock (&conf->big_lock); +out: + GF_FREE (req.dict.dict_val); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_initiate_lockdown (glusterd_conf_t *conf, glusterd_op_t op, + dict_t *dict, char **op_errstr, int npeers, + gf_boolean_t *is_acquired) +{ + char *volname = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + int32_t ret = -1; + int32_t peer_cnt = 0; + struct syncargs args = {0}; + struct list_head *peers = NULL; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (conf); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (is_acquired); + + peers = &conf->xaction_peers; + + /* Trying to acquire multiple mgmt_v3 locks on local node */ + ret = glusterd_multiple_mgmt_v3_lock (dict, MY_UUID); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to acquire mgmt_v3 locks on localhost"); + goto out; + } + + *is_acquired = _gf_true; + + if (!npeers) { + ret = 0; + goto out; + } + + /* Sending mgmt_v3 lock req to other nodes in the cluster */ + gd_syncargs_init (&args, NULL); + synctask_barrier_init((&args)); + peer_cnt = 0; + list_for_each_entry (peerinfo, peers, op_peers_list) { + gd_mgmt_v3_lock (op, dict, peerinfo, &args, + MY_UUID, peer_uuid); + peer_cnt++; + } + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent lock op req for %s " + "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret); +out: + if (ret) { + if (*op_errstr) + gf_log (this->name, GF_LOG_ERROR, "%s", + *op_errstr); + + if (volname) + ret = gf_asprintf (op_errstr, + "Another transaction is in progress " + "for %s. Please try again after " + "sometime.", volname); + else + ret = gf_asprintf (op_errstr, + "Another transaction is in progress " + "Please try again after sometime."); + + if (ret == -1) + *op_errstr = NULL; + + ret = -1; + } + + return ret; +} + +int +glusterd_pre_validate_aggr_rsp_dict (glusterd_op_t op, dict_t *aggr, dict_t *rsp) +{ + int32_t ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (aggr); + GF_ASSERT (rsp); + + switch (op) { + case GD_OP_SNAP: + ret = glusterd_snap_pre_validate_use_rsp_dict (aggr, rsp); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to aggregate prevalidate " + "response dictionaries."); + goto out; + } + break; + default: + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Invalid op (%s)", gd_op_list[op]); + + break; + } +out: + return ret; +} + +int32_t +gd_mgmt_v3_pre_validate_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_v3_pre_val_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + dict_t *rsp_dict = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + if (!iov) { + gf_log (this->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_pre_val_rsp); + if (ret < 0) + goto out; + + if (rsp.dict.dict_len) { + /* Unserialize the dictionary */ + rsp_dict = dict_new (); + + ret = dict_unserialize (rsp.dict.dict_val, + rsp.dict.dict_len, + &rsp_dict); + if (ret < 0) { + GF_FREE (rsp.dict.dict_val); + goto out; + } else { + rsp_dict->extra_stdfree = rsp.dict.dict_val; + } + } + + uuid_copy (args->uuid, rsp.uuid); + pthread_mutex_lock (&args->lock_dict); + { + ret = glusterd_pre_validate_aggr_rsp_dict (rsp.op, args->dict, + rsp_dict); + } + pthread_mutex_unlock (&args->lock_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "%s", + "Failed to aggregate response from " + " node/brick"); + if (!rsp.op_ret) + op_ret = ret; + else { + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + } + } else { + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + } + +out: + if (rsp_dict) + dict_unref (rsp_dict); + + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_PRE_VALIDATE, + peerinfo, rsp.uuid); + + if (rsp.op_errstr) + free (rsp.op_errstr); + + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_pre_validate_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + gd_mgmt_v3_pre_validate_cbk_fn); +} + +int +gd_mgmt_v3_pre_validate_req (glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid) +{ + int32_t ret = -1; + gd1_mgmt_v3_pre_val_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (op_ctx); + GF_ASSERT (peerinfo); + GF_ASSERT (args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + req.op = op; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_PRE_VALIDATE, + gd_mgmt_v3_pre_validate_cbk, + (xdrproc_t) xdr_gd1_mgmt_v3_pre_val_req); + synclock_lock (&conf->big_lock); +out: + GF_FREE (req.dict.dict_val); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_pre_validate (glusterd_conf_t *conf, glusterd_op_t op, + dict_t *req_dict, char **op_errstr, int npeers) +{ + int32_t ret = -1; + int32_t peer_cnt = 0; + dict_t *rsp_dict = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + struct list_head *peers = NULL; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (conf); + GF_ASSERT (req_dict); + GF_ASSERT (op_errstr); + + peers = &conf->xaction_peers; + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create response dictionary"); + goto out; + } + + /* Pre Validation on local node */ + ret = gd_mgmt_v3_pre_validate_fn (op, req_dict, op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Pre Validation failed for " + "operation %s on local node", + gd_op_list[op]); + + if (*op_errstr == NULL) { + ret = gf_asprintf (op_errstr, + "Pre-validation failed " + "on localhost. Please " + "check log file for details"); + if (ret == -1) + *op_errstr = NULL; + + ret = -1; + } + goto out; + } + + ret = glusterd_pre_validate_aggr_rsp_dict (op, req_dict, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "%s", + "Failed to aggregate response from " + " node/brick"); + goto out; + } + + dict_unref (rsp_dict); + rsp_dict = NULL; + + if (!npeers) { + ret = 0; + goto out; + } + + /* Sending Pre Validation req to other nodes in the cluster */ + gd_syncargs_init (&args, req_dict); + synctask_barrier_init((&args)); + peer_cnt = 0; + list_for_each_entry (peerinfo, peers, op_peers_list) { + gd_mgmt_v3_pre_validate_req (op, req_dict, peerinfo, &args, + MY_UUID, peer_uuid); + peer_cnt++; + } + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "Pre Validation failed on peers"); + + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + } + + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent pre valaidation req for %s " + "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret); +out: + return ret; +} + +int +glusterd_mgmt_v3_build_payload (dict_t **req, char **op_errstr, dict_t *dict, + glusterd_op_t op) +{ + int32_t ret = -1; + dict_t *req_dict = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (op_errstr); + GF_ASSERT (dict); + + req_dict = dict_new (); + if (!req_dict) + goto out; + + switch (op) { + case GD_OP_SNAP: + dict_copy (dict, req_dict); + break; + default: + break; + } + + *req = req_dict; + ret = 0; +out: + return ret; +} + +int32_t +gd_mgmt_v3_brick_op_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_v3_brick_op_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + /* If the operation failed, then iov can be NULL. So better check the + status of the operation and then worry about iov (if the status of + the command is success) + */ + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + if (!iov) { + gf_log (this->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_brick_op_rsp); + if (ret < 0) + goto out; + + uuid_copy (args->uuid, rsp.uuid); + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + +out: + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_BRICK_OP, + peerinfo, rsp.uuid); + + if (rsp.op_errstr) + free (rsp.op_errstr); + + free (rsp.dict.dict_val); + + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_brick_op_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + gd_mgmt_v3_brick_op_cbk_fn); +} + +int +gd_mgmt_v3_brick_op_req (glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid) +{ + int32_t ret = -1; + gd1_mgmt_v3_brick_op_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (op_ctx); + GF_ASSERT (peerinfo); + GF_ASSERT (args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + req.op = op; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_BRICK_OP, + gd_mgmt_v3_brick_op_cbk, + (xdrproc_t) xdr_gd1_mgmt_v3_brick_op_req); + synclock_lock (&conf->big_lock); +out: + GF_FREE (req.dict.dict_val); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_brick_op (glusterd_conf_t *conf, glusterd_op_t op, + dict_t *req_dict, char **op_errstr, int npeers) +{ + int32_t ret = -1; + int32_t peer_cnt = 0; + dict_t *rsp_dict = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + struct list_head *peers = NULL; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (conf); + GF_ASSERT (req_dict); + GF_ASSERT (op_errstr); + + peers = &conf->xaction_peers; + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create response dictionary"); + goto out; + } + + /* Perform brick op on local node */ + ret = gd_mgmt_v3_brick_op_fn (op, req_dict, op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Brick ops failed for " + "operation %s on local node", + gd_op_list[op]); + + if (*op_errstr == NULL) { + ret = gf_asprintf (op_errstr, + "Brick ops failed " + "on localhost. Please " + "check log file for details"); + if (ret == -1) + *op_errstr = NULL; + + ret = -1; + } + goto out; + } + + dict_unref (rsp_dict); + rsp_dict = NULL; + + if (!npeers) { + ret = 0; + goto out; + } + + /* Sending brick op req to other nodes in the cluster */ + gd_syncargs_init (&args, NULL); + synctask_barrier_init((&args)); + peer_cnt = 0; + list_for_each_entry (peerinfo, peers, op_peers_list) { + gd_mgmt_v3_brick_op_req (op, req_dict, peerinfo, &args, + MY_UUID, peer_uuid); + peer_cnt++; + } + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "Brick ops failed on peers"); + + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + } + + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent brick op req for %s " + "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret); +out: + return ret; +} + +int32_t +gd_mgmt_v3_commit_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_v3_commit_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + dict_t *rsp_dict = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + if (!iov) { + gf_log (this->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_commit_rsp); + if (ret < 0) + goto out; + + if (rsp.dict.dict_len) { + /* Unserialize the dictionary */ + rsp_dict = dict_new (); + + ret = dict_unserialize (rsp.dict.dict_val, + rsp.dict.dict_len, + &rsp_dict); + if (ret < 0) { + GF_FREE (rsp.dict.dict_val); + goto out; + } else { + rsp_dict->extra_stdfree = rsp.dict.dict_val; + } + } + + uuid_copy (args->uuid, rsp.uuid); + pthread_mutex_lock (&args->lock_dict); + { + ret = glusterd_syncop_aggr_rsp_dict (rsp.op, args->dict, + rsp_dict); + } + pthread_mutex_unlock (&args->lock_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "%s", + "Failed to aggregate response from " + " node/brick"); + if (!rsp.op_ret) + op_ret = ret; + else { + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + } + } else { + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + } + +out: + if (rsp_dict) + dict_unref (rsp_dict); + + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_COMMIT, + peerinfo, rsp.uuid); + + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_commit_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + gd_mgmt_v3_commit_cbk_fn); +} + +int +gd_mgmt_v3_commit_req (glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid) +{ + int32_t ret = -1; + gd1_mgmt_v3_commit_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (op_ctx); + GF_ASSERT (peerinfo); + GF_ASSERT (args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + req.op = op; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_COMMIT, + gd_mgmt_v3_commit_cbk, + (xdrproc_t) xdr_gd1_mgmt_v3_commit_req); + synclock_lock (&conf->big_lock); +out: + GF_FREE (req.dict.dict_val); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_commit (glusterd_conf_t *conf, glusterd_op_t op, + dict_t *op_ctx, dict_t *req_dict, + char **op_errstr, int npeers) +{ + int32_t ret = -1; + int32_t peer_cnt = 0; + dict_t *rsp_dict = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + struct list_head *peers = NULL; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (conf); + GF_ASSERT (op_ctx); + GF_ASSERT (req_dict); + GF_ASSERT (op_errstr); + + peers = &conf->xaction_peers; + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create response dictionary"); + goto out; + } + + /* Commit on local node */ + ret = gd_mgmt_v3_commit_fn (op, req_dict, op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Commit failed for " + "operation %s on local node", + gd_op_list[op]); + + if (*op_errstr == NULL) { + ret = gf_asprintf (op_errstr, + "Commit failed " + "on localhost. Please " + "check log file for details."); + if (ret == -1) + *op_errstr = NULL; + + ret = -1; + } + goto out; + } + + ret = glusterd_syncop_aggr_rsp_dict (op, op_ctx, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "%s", + "Failed to aggregate response from " + " node/brick"); + goto out; + } + + dict_unref (rsp_dict); + rsp_dict = NULL; + + if (!npeers) { + ret = 0; + goto out; + } + + /* Sending commit req to other nodes in the cluster */ + gd_syncargs_init (&args, op_ctx); + synctask_barrier_init((&args)); + peer_cnt = 0; + list_for_each_entry (peerinfo, peers, op_peers_list) { + gd_mgmt_v3_commit_req (op, req_dict, peerinfo, &args, + MY_UUID, peer_uuid); + peer_cnt++; + } + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "Commit failed on peers"); + + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + } + + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent commit req for %s to %d " + "peers. Returning %d", gd_op_list[op], peer_cnt, ret); +out: + return ret; +} + +int32_t +gd_mgmt_v3_post_validate_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_v3_post_val_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + if (!iov) { + gf_log (this->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_post_val_rsp); + if (ret < 0) + goto out; + + uuid_copy (args->uuid, rsp.uuid); + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + +out: + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_POST_VALIDATE, + peerinfo, rsp.uuid); + if (rsp.op_errstr) + free (rsp.op_errstr); + + free (rsp.dict.dict_val); + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_post_validate_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + gd_mgmt_v3_post_validate_cbk_fn); +} + +int +gd_mgmt_v3_post_validate_req (glusterd_op_t op, int32_t op_ret, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid) +{ + int32_t ret = -1; + gd1_mgmt_v3_post_val_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (op_ctx); + GF_ASSERT (peerinfo); + GF_ASSERT (args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + req.op = op; + req.op_ret = op_ret; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_POST_VALIDATE, + gd_mgmt_v3_post_validate_cbk, + (xdrproc_t) xdr_gd1_mgmt_v3_post_val_req); + synclock_lock (&conf->big_lock); +out: + GF_FREE (req.dict.dict_val); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_post_validate (glusterd_conf_t *conf, glusterd_op_t op, + int32_t op_ret, dict_t *dict, dict_t *req_dict, + char **op_errstr, int npeers) +{ + int32_t ret = -1; + int32_t peer_cnt = 0; + dict_t *rsp_dict = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + struct list_head *peers = NULL; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (conf); + GF_ASSERT (dict); + GF_ASSERT (req_dict); + GF_ASSERT (op_errstr); + + peers = &conf->xaction_peers; + GF_ASSERT (peers); + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create response dictionary"); + goto out; + } + + /* Copy the contents of dict like missed snaps info to req_dict */ + dict_copy (dict, req_dict); + + /* Post Validation on local node */ + ret = gd_mgmt_v3_post_validate_fn (op, op_ret, req_dict, op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Post Validation failed for " + "operation %s on local node", + gd_op_list[op]); + + if (*op_errstr == NULL) { + ret = gf_asprintf (op_errstr, + "Post-validation failed " + "on localhost. Please check " + "log file for details"); + if (ret == -1) + *op_errstr = NULL; + + ret = -1; + } + goto out; + } + + dict_unref (rsp_dict); + rsp_dict = NULL; + + if (!npeers) { + ret = 0; + goto out; + } + + /* Sending Post Validation req to other nodes in the cluster */ + gd_syncargs_init (&args, req_dict); + synctask_barrier_init((&args)); + peer_cnt = 0; + list_for_each_entry (peerinfo, peers, op_peers_list) { + gd_mgmt_v3_post_validate_req (op, op_ret, req_dict, peerinfo, + &args, MY_UUID, peer_uuid); + peer_cnt++; + } + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "Post Validation failed on peers"); + + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + } + + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent post valaidation req for %s " + "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret); +out: + return ret; +} + +int32_t +gd_mgmt_v3_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_v3_unlock_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + if (!iov) { + gf_log (this->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp); + if (ret < 0) + goto out; + + uuid_copy (args->uuid, rsp.uuid); + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + +out: + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_UNLOCK, + peerinfo, rsp.uuid); + free (rsp.dict.dict_val); + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_unlock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + gd_mgmt_v3_unlock_cbk_fn); +} + +int +gd_mgmt_v3_unlock (glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid) +{ + int32_t ret = -1; + gd1_mgmt_v3_unlock_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (op_ctx); + GF_ASSERT (peerinfo); + GF_ASSERT (args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + req.op = op; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_UNLOCK, + gd_mgmt_v3_unlock_cbk, + (xdrproc_t) xdr_gd1_mgmt_v3_unlock_req); + synclock_lock (&conf->big_lock); +out: + GF_FREE (req.dict.dict_val); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_release_peer_locks (glusterd_conf_t *conf, glusterd_op_t op, + dict_t *dict, int32_t op_ret, + char **op_errstr, int npeers, + gf_boolean_t is_acquired) +{ + int32_t ret = -1; + int32_t peer_cnt = 0; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + struct list_head *peers = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (conf); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + peers = &conf->xaction_peers; + + /* If the lock has not been held during this + * transaction, do not send unlock requests */ + if (!is_acquired) + goto out; + + if (!npeers) { + ret = 0; + goto out; + } + + /* Sending mgmt_v3 unlock req to other nodes in the cluster */ + gd_syncargs_init (&args, NULL); + synctask_barrier_init((&args)); + peer_cnt = 0; + list_for_each_entry (peerinfo, peers, op_peers_list) { + gd_mgmt_v3_unlock (op, dict, peerinfo, &args, + MY_UUID, peer_uuid); + peer_cnt++; + } + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unlock failed on peers"); + + if (!op_ret && args.errstr) + *op_errstr = gf_strdup (args.errstr); + } + + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent unlock op req for %s " + "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret); + +out: + return ret; +} + +int32_t +glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict) +{ + int32_t ret = -1; + int32_t op_ret = -1; + int32_t npeers = 0; + dict_t *req_dict = NULL; + dict_t *tmp_dict = NULL; + glusterd_conf_t *conf = NULL; + char *op_errstr = NULL; + xlator_t *this = NULL; + gf_boolean_t is_acquired = _gf_false; + uuid_t *originator_uuid = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (dict); + conf = this->private; + GF_ASSERT (conf); + + /* Save the MY_UUID as the originator_uuid. This originator_uuid + * will be used by is_origin_glusterd() to determine if a node + * is the originator node for a command. */ + originator_uuid = GF_CALLOC (1, sizeof(uuid_t), + gf_common_mt_uuid_t); + if (!originator_uuid) { + ret = -1; + goto out; + } + + uuid_copy (*originator_uuid, MY_UUID); + ret = dict_set_bin (dict, "originator_uuid", + originator_uuid, sizeof (uuid_t)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set originator_uuid."); + goto out; + } + + /* Marking the operation as complete synctasked */ + ret = dict_set_int32 (dict, "is_synctasked", _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set synctasked flag."); + goto out; + } + + /* Use a copy at local unlock as cli response will be sent before + * the unlock and the volname in the dict might be removed */ + tmp_dict = dict_new(); + if (!tmp_dict) { + gf_log (this->name, GF_LOG_ERROR, "Unable to create dict"); + goto out; + } + dict_copy (dict, tmp_dict); + + /* BUILD PEERS LIST */ + INIT_LIST_HEAD (&conf->xaction_peers); + npeers = gd_build_peers_list (&conf->peers, &conf->xaction_peers, op); + + /* LOCKDOWN PHASE - Acquire mgmt_v3 locks */ + ret = glusterd_mgmt_v3_initiate_lockdown (conf, op, dict, &op_errstr, + npeers, &is_acquired); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "mgmt_v3 lockdown failed."); + goto out; + } + + /* BUILD PAYLOAD */ + ret = glusterd_mgmt_v3_build_payload (&req_dict, &op_errstr, dict, op); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, LOGSTR_BUILD_PAYLOAD, + gd_op_list[op]); + if (op_errstr == NULL) + gf_asprintf (&op_errstr, OPERRSTR_BUILD_PAYLOAD); + goto out; + } + + /* PRE-COMMIT VALIDATE PHASE */ + ret = glusterd_mgmt_v3_pre_validate (conf, op, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Pre Validation Failed"); + goto out; + } + + /* COMMIT OP PHASE */ + ret = glusterd_mgmt_v3_commit (conf, op, dict, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Commit Op Failed"); + goto out; + } + + /* POST-COMMIT VALIDATE PHASE */ + /* As of now, post_validate is not handling any other + commands other than snapshot. So as of now, I am + sending 0 (op_ret as 0). + */ + ret = glusterd_mgmt_v3_post_validate (conf, op, 0, dict, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Post Validation Failed"); + goto out; + } + + ret = 0; +out: + op_ret = ret; + /* UNLOCK PHASE FOR PEERS*/ + (void) glusterd_mgmt_v3_release_peer_locks (conf, op, dict, + op_ret, &op_errstr, + npeers, is_acquired); + + /* LOCAL VOLUME(S) UNLOCK */ + if (is_acquired) { + /* Trying to release multiple mgmt_v3 locks */ + ret = glusterd_multiple_mgmt_v3_unlock (tmp_dict, MY_UUID); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to release mgmt_v3 locks on localhost"); + op_ret = ret; + } + } + + /* SEND CLI RESPONSE */ + glusterd_op_send_cli_response (op, op_ret, 0, req, dict, op_errstr); + + if (req_dict) + dict_unref (req_dict); + + if (tmp_dict) + dict_unref (tmp_dict); + + if (op_errstr) { + GF_FREE (op_errstr); + op_errstr = NULL; + } + + return 0; +} + +int32_t +glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict) +{ + int32_t ret = -1; + int32_t op_ret = -1; + int32_t npeers = 0; + dict_t *req_dict = NULL; + dict_t *tmp_dict = NULL; + glusterd_conf_t *conf = NULL; + char *op_errstr = NULL; + xlator_t *this = NULL; + gf_boolean_t is_acquired = _gf_false; + uuid_t *originator_uuid = NULL; + gf_boolean_t success = _gf_false; + char *tmp_errstr = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (dict); + conf = this->private; + GF_ASSERT (conf); + + /* Save the MY_UUID as the originator_uuid. This originator_uuid + * will be used by is_origin_glusterd() to determine if a node + * is the originator node for a command. */ + originator_uuid = GF_CALLOC (1, sizeof(uuid_t), + gf_common_mt_uuid_t); + if (!originator_uuid) { + ret = -1; + goto out; + } + + uuid_copy (*originator_uuid, MY_UUID); + ret = dict_set_bin (dict, "originator_uuid", + originator_uuid, sizeof (uuid_t)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set originator_uuid."); + goto out; + } + + /* Marking the operation as complete synctasked */ + ret = dict_set_int32 (dict, "is_synctasked", _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set synctasked flag."); + goto out; + } + + /* Use a copy at local unlock as cli response will be sent before + * the unlock and the volname in the dict might be removed */ + tmp_dict = dict_new(); + if (!tmp_dict) { + gf_log (this->name, GF_LOG_ERROR, "Unable to create dict"); + goto out; + } + dict_copy (dict, tmp_dict); + + /* BUILD PEERS LIST */ + INIT_LIST_HEAD (&conf->xaction_peers); + npeers = gd_build_peers_list (&conf->peers, &conf->xaction_peers, op); + + /* LOCKDOWN PHASE - Acquire mgmt_v3 locks */ + ret = glusterd_mgmt_v3_initiate_lockdown (conf, op, dict, &op_errstr, + npeers, &is_acquired); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "mgmt_v3 lockdown failed."); + goto out; + } + + /* BUILD PAYLOAD */ + ret = glusterd_mgmt_v3_build_payload (&req_dict, &op_errstr, dict, op); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, LOGSTR_BUILD_PAYLOAD, + gd_op_list[op]); + if (op_errstr == NULL) + gf_asprintf (&op_errstr, OPERRSTR_BUILD_PAYLOAD); + goto out; + } + + /* PRE-COMMIT VALIDATE PHASE */ + ret = glusterd_mgmt_v3_pre_validate (conf, op, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Pre Validation Failed"); + goto out; + } + + /* BRICK OP PHASE for initiating barrier*/ + ret = dict_set_int32 (req_dict, "barrier", 1); + if (ret) + goto out; + ret = glusterd_mgmt_v3_brick_op (conf, op, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Brick Ops Failed"); + goto unbarrier; + } + + /* COMMIT OP PHASE */ + /* TODO: As of now, the plan is to do quorum check before sending the + commit fop and if the quorum succeeds, then commit is sent to all + the other glusterds. + snap create functionality now creates the in memory and on disk + objects for the snapshot (marking them as incomplete), takes the lvm + snapshot and then updates the status of the in memory and on disk + snap objects as complete. Suppose one of the glusterds goes down + after taking the lvm snapshot, but before updating the snap object, + then treat it as a snapshot create failure and trigger cleanup. + i.e the number of commit responses received by the originator + glusterd shold be the same as the number of peers it has sent the + request to (i.e npeers variable). If not, then originator glusterd + will initiate cleanup in post-validate fop. + Question: What if one of the other glusterds goes down as explained + above and along with it the originator glusterd also goes down? + Who will initiate the cleanup? + */ + ret = glusterd_mgmt_v3_commit (conf, op, dict, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Commit Op Failed"); + /* If the main op fails, we should save the error string. + Because, op_errstr will be used for unbarrier and + unlock ops also. We might lose the actual error that + caused the failure. + */ + tmp_errstr = op_errstr; + op_errstr = NULL; + goto unbarrier; + } + + success = _gf_true; +unbarrier: + /* BRICK OP PHASE for removing the barrier*/ + ret = dict_set_int32 (req_dict, "barrier", 0); + if (ret) + goto out; + ret = glusterd_mgmt_v3_brick_op (conf, op, req_dict, + &op_errstr, npeers); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Brick Ops Failed"); + goto out; + } + + ret = 0; + +out: + op_ret = ret; + + if (success == _gf_false) + op_ret = -1; + + /* POST-COMMIT VALIDATE PHASE */ + ret = glusterd_mgmt_v3_post_validate (conf, op, op_ret, dict, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Post Validation Failed"); + op_ret = -1; + } + + /* UNLOCK PHASE FOR PEERS*/ + (void) glusterd_mgmt_v3_release_peer_locks (conf, op, dict, + op_ret, &op_errstr, + npeers, is_acquired); + + /* If the commit op (snapshot taking) failed, then the error is stored + in tmp_errstr and unbarrier is called. Suppose, if unbarrier also + fails, then the error happened in unbarrier is logged and freed. + The error happened in commit op, which is stored in tmp_errstr + is sent to cli. + */ + if (tmp_errstr) { + if (op_errstr) { + gf_log (this->name, GF_LOG_ERROR, "unbarrier brick op" + "failed with the error %s", op_errstr); + GF_FREE (op_errstr); + op_errstr = NULL; + } + op_errstr = tmp_errstr; + } + + /* LOCAL VOLUME(S) UNLOCK */ + if (is_acquired) { + /* Trying to release multiple mgmt_v3 locks */ + ret = glusterd_multiple_mgmt_v3_unlock (tmp_dict, MY_UUID); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to release mgmt_v3 locks on localhost"); + op_ret = ret; + } + } + + /* SEND CLI RESPONSE */ + glusterd_op_send_cli_response (op, op_ret, 0, req, dict, op_errstr); + + if (req_dict) + dict_unref (req_dict); + + if (tmp_dict) + dict_unref (tmp_dict); + + if (op_errstr) { + GF_FREE (op_errstr); + op_errstr = NULL; + } + + return 0; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-mgmt.h new file mode 100644 index 000000000..b185a9bec --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.h @@ -0,0 +1,45 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _GLUSTERD_MGMT_H_ +#define _GLUSTERD_MGMT_H_ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +int32_t +gd_mgmt_v3_pre_validate_fn (glusterd_op_t op, dict_t *dict, + char **op_errstr, dict_t *rsp_dict); + +int32_t +gd_mgmt_v3_brick_op_fn (glusterd_op_t op, dict_t *dict, + char **op_errstr, dict_t *rsp_dict); + +int32_t +gd_mgmt_v3_commit_fn (glusterd_op_t op, dict_t *dict, + char **op_errstr, dict_t *rsp_dict); + +int32_t +gd_mgmt_v3_post_validate_fn (glusterd_op_t op, int32_t op_ret, dict_t *dict, + char **op_errstr, dict_t *rsp_dict); + +int32_t +glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict); + +int32_t +glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict); + +int +glusterd_snap_pre_validate_use_rsp_dict (dict_t *dst, dict_t *src); + +#endif /* _GLUSTERD_MGMT_H_ */ diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 4e4f0394d..1666f5e4d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -37,6 +37,7 @@ #include "glusterd-store.h" #include "glusterd-hooks.h" #include "glusterd-volgen.h" +#include "glusterd-locks.h" #include "syscall.h" #include "cli1-xdr.h" #include "common-utils.h" @@ -67,6 +68,165 @@ static struct list_head gd_op_sm_queue; pthread_mutex_t gd_op_sm_lock; glusterd_op_info_t opinfo = {{0},}; +uuid_t global_txn_id = {"\0"}; /* To be used in + * heterogeneous + * cluster with no + * transaction ids */ + +static dict_t *txn_opinfo; + +struct txn_opinfo_object_ { + glusterd_op_info_t opinfo; +}; +typedef struct txn_opinfo_object_ txn_opinfo_obj; + +int32_t +glusterd_txn_opinfo_dict_init () +{ + int32_t ret = -1; + + txn_opinfo = dict_new (); + if (!txn_opinfo) { + ret = -1; + goto out; + } + + ret = 0; +out: + return ret; +} + +void +glusterd_txn_opinfo_dict_fini () +{ + if (txn_opinfo) + dict_destroy (txn_opinfo); +} + +void +glusterd_txn_opinfo_init (glusterd_op_info_t *opinfo, + glusterd_op_sm_state_info_t *state, + glusterd_op_t *op, + dict_t *op_ctx, + rpcsvc_request_t *req) +{ + GF_ASSERT (opinfo); + + if (state) + opinfo->state = *state; + + if (op) + opinfo->op = *op; + + opinfo->op_ctx = dict_ref(op_ctx); + + if (req) + opinfo->req = req; + + return; +} + +int32_t +glusterd_get_txn_opinfo (uuid_t *txn_id, glusterd_op_info_t *opinfo) +{ + int32_t ret = -1; + txn_opinfo_obj *opinfo_obj = NULL; + + if (!txn_id || !opinfo) { + gf_log ("", GF_LOG_ERROR, + "Empty transaction id or opinfo received."); + ret = -1; + goto out; + } + + ret = dict_get_bin(txn_opinfo, uuid_utoa (*txn_id), + (void **) &opinfo_obj); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get transaction opinfo"); + goto out; + } + + (*opinfo) = opinfo_obj->opinfo; + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_set_txn_opinfo (uuid_t *txn_id, glusterd_op_info_t *opinfo) +{ + int32_t ret = -1; + txn_opinfo_obj *opinfo_obj = NULL; + + if (!txn_id) { + gf_log ("", GF_LOG_ERROR, "Empty transaction id received."); + ret = -1; + goto out; + } + + ret = dict_get_bin(txn_opinfo, uuid_utoa (*txn_id), + (void **) &opinfo_obj); + if (ret) { + opinfo_obj = GF_CALLOC (1, sizeof(txn_opinfo_obj), + gf_common_mt_txn_opinfo_obj_t); + if (!opinfo_obj) { + ret = -1; + goto out; + } + + ret = dict_set_bin(txn_opinfo, uuid_utoa (*txn_id), opinfo_obj, + sizeof(txn_opinfo_obj)); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set opinfo for transaction ID : %s", + uuid_utoa (*txn_id)); + goto out; + } + } + + opinfo_obj->opinfo = (*opinfo); + + ret = 0; +out: + if (ret) + if (opinfo_obj) + GF_FREE (opinfo_obj); + + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_clear_txn_opinfo (uuid_t *txn_id) +{ + int32_t ret = -1; + glusterd_op_info_t txn_op_info = {{0},}; + + if (!txn_id) { + gf_log ("", GF_LOG_ERROR, "Empty transaction id received."); + ret = -1; + goto out; + } + + ret = glusterd_get_txn_opinfo (txn_id, &txn_op_info); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Transaction opinfo not found"); + goto out; + } + + dict_unref (txn_op_info.op_ctx); + + dict_del(txn_opinfo, uuid_utoa (*txn_id)); + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + static int glusterfs_port = GLUSTERD_DEFAULT_PORT; static char *glusterd_op_sm_state_names[] = { "Default", @@ -147,10 +307,10 @@ glusterd_is_volume_started (glusterd_volinfo_t *volinfo) } static int -glusterd_op_sm_inject_all_acc () +glusterd_op_sm_inject_all_acc (uuid_t *txn_id) { int32_t ret = -1; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, txn_id, NULL); gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } @@ -235,20 +395,20 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin brick_req->name = gf_strdup (name); break; - -#ifdef HAVE_BD_XLATOR - case GD_OP_BD_OP: - { + case GD_OP_SNAP: brick_req = GF_CALLOC (1, sizeof (*brick_req), gf_gld_mt_mop_brick_req_t); if (!brick_req) goto out; - brick_req->op = GLUSTERD_BRICK_BD_OP; - brick_req->name = ""; - } + brick_req->op = GLUSTERD_VOLUME_BARRIER_OP; + ret = dict_get_str (dict, "volname", &volname); + if (ret) + goto out; + snprintf (name, 1024, "%s-server",volname); + brick_req->name = gf_strdup (name); + break; -#endif default: goto out; break; @@ -416,11 +576,17 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr) if (!val_dict) goto out; + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); + goto out; + } + /* Check if we can support the required op-version * This check is not done on the originator glusterd. The originator * glusterd sets this value. */ - origin_glusterd = is_origin_glusterd (); + origin_glusterd = is_origin_glusterd (dict); if (!origin_glusterd) { /* Check for v3.3.x origin glusterd */ @@ -828,7 +994,7 @@ glusterd_op_stage_sync_volume (dict_t *dict, char **op_errstr) goto out; } - if (glusterd_is_local_addr (hostname)) { + if (gf_is_local_addr (hostname)) { //volname is not present in case of sync all ret = dict_get_str (dict, "volname", &volname); if (!ret) { @@ -1101,14 +1267,17 @@ _delete_reconfig_opt (dict_t *this, char *key, data_t *value, void *data) GF_ASSERT (data); is_force = (int32_t*)data; - if (*is_force != 1 && - (_gf_true == glusterd_check_voloption_flags (key, - OPT_FLAG_FORCE))) { + if (*is_force != 1) { + if (_gf_true == glusterd_check_voloption_flags (key, + OPT_FLAG_FORCE)) { /* indicate to caller that we don't set the option * due to being protected */ - *is_force = -1; - goto out; + *is_force = *is_force | GD_OP_PROTECTED; + goto out; + } else { + *is_force = *is_force | GD_OP_UNPROTECTED; + } } gf_log ("", GF_LOG_DEBUG, "deleting dict with key=%s,value=%s", @@ -1160,8 +1329,9 @@ glusterd_options_reset (glusterd_volinfo_t *volinfo, char *key, _delete_reconfig_opt (volinfo->dict, key, value, is_force); } - ret = glusterd_create_volfiles_and_notify_services (volinfo); + gd_update_volume_op_versions (volinfo); + ret = glusterd_create_volfiles_and_notify_services (volinfo); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Unable to create volfile for" " 'volume reset'"); @@ -1276,7 +1446,7 @@ out: } static int -glusterd_op_reset_volume (dict_t *dict, char **op_errstr) +glusterd_op_reset_volume (dict_t *dict, char **op_rspstr) { glusterd_volinfo_t *volinfo = NULL; int ret = -1; @@ -1331,10 +1501,18 @@ glusterd_op_reset_volume (dict_t *dict, char **op_errstr) quorum_action = _gf_true; ret = glusterd_options_reset (volinfo, key, &is_force); - if (is_force == -1) { - ret = -1; - gf_asprintf(op_errstr, "'%s' is protected. To reset use 'force'.", - key); + if (ret == -1) { + gf_asprintf(op_rspstr, "Volume reset : failed"); + } else if (is_force & GD_OP_PROTECTED) { + if (is_force & GD_OP_UNPROTECTED) { + gf_asprintf (op_rspstr, "All unprotected fields were" + " reset. To reset the protected fields," + " use 'force'."); + } else { + ret = -1; + gf_asprintf (op_rspstr, "'%s' is protected. To reset" + " use 'force'.", key); + } } out: @@ -1364,14 +1542,25 @@ glusterd_stop_bricks (glusterd_volinfo_t *volinfo) int glusterd_start_bricks (glusterd_volinfo_t *volinfo) { - glusterd_brickinfo_t *brickinfo = NULL; + int ret = -1; + glusterd_brickinfo_t *brickinfo = NULL; + + GF_ASSERT (volinfo); list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - if (glusterd_brick_start (volinfo, brickinfo, _gf_false)) - return -1; + ret = glusterd_brick_start (volinfo, brickinfo, _gf_false); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "Failed to start %s:%s for %s", + brickinfo->hostname, brickinfo->path, + volinfo->volname); + goto out; + } } - return 0; + ret = 0; +out: + return ret; } static int @@ -1474,6 +1663,7 @@ glusterd_op_set_volume (dict_t *dict) char str[50] = {0, }; char *op_errstr = NULL; gf_boolean_t global_opt = _gf_false; + gf_boolean_t global_opts_set = _gf_false; glusterd_volinfo_t *voliter = NULL; int32_t dict_count = 0; gf_boolean_t check_op_version = _gf_false; @@ -1520,6 +1710,7 @@ glusterd_op_set_volume (dict_t *dict) goto out; } + // TODO: Remove this once v3.3 compatability is not required check_op_version = dict_get_str_boolean (dict, "check-op-version", _gf_false); @@ -1534,7 +1725,6 @@ glusterd_op_set_volume (dict_t *dict) for (count = 1; ret != -1 ; count++) { - global_opt = _gf_false; sprintf (str, "key%d", count); ret = dict_get_str (dict, str, &key); if (ret) @@ -1582,8 +1772,11 @@ glusterd_op_set_volume (dict_t *dict) } } - if (glusterd_check_globaloption (key)) + global_opt = _gf_false; + if (glusterd_check_globaloption (key)) { global_opt = _gf_true; + global_opts_set = _gf_true; + } if (!global_opt) value = gf_strdup (value); @@ -1626,7 +1819,21 @@ glusterd_op_set_volume (dict_t *dict) goto out; } - if (!global_opt) { + /* Update the cluster op-version before regenerating volfiles so that + * correct volfiles are generated + */ + if (new_op_version > priv->op_version) { + priv->op_version = new_op_version; + ret = glusterd_store_global_info (this); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to store op-version"); + goto out; + } + } + + if (!global_opts_set) { + gd_update_volume_op_versions (volinfo); ret = glusterd_create_volfiles_and_notify_services (volinfo); if (ret) { gf_log (this->name, GF_LOG_ERROR, @@ -1652,6 +1859,7 @@ glusterd_op_set_volume (dict_t *dict) } else { list_for_each_entry (voliter, &priv->volumes, vol_list) { volinfo = voliter; + gd_update_volume_op_versions (volinfo); ret = glusterd_create_volfiles_and_notify_services (volinfo); if (ret) { gf_log (this->name, GF_LOG_ERROR, @@ -1677,16 +1885,6 @@ glusterd_op_set_volume (dict_t *dict) } } - if (new_op_version > priv->op_version) { - priv->op_version = new_op_version; - ret = glusterd_store_global_info (this); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to store op-version"); - goto out; - } - } - out: GF_FREE (key_fixed); gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret); @@ -1723,7 +1921,7 @@ glusterd_op_sync_volume (dict_t *dict, char **op_errstr, goto out; } - if (!glusterd_is_local_addr (hostname)) { + if (!gf_is_local_addr (hostname)) { ret = 0; goto out; } @@ -1894,6 +2092,105 @@ out: } static int +_add_brick_name_to_dict (dict_t *dict, char *key, glusterd_brickinfo_t *brick) +{ + int ret = -1; + char tmp[1024] = {0,}; + char *brickname = NULL; + xlator_t *this = NULL; + + GF_ASSERT (dict); + GF_ASSERT (key); + GF_ASSERT (brick); + + this = THIS; + GF_ASSERT (this); + + snprintf (tmp, sizeof (tmp), "%s:%s", brick->hostname, brick->path); + brickname = gf_strdup (tmp); + if (!brickname) { + gf_log (this->name, GF_LOG_ERROR, "Failed to dup brick name"); + goto out; + } + + ret = dict_set_dynstr (dict, key, brickname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add brick name to dict"); + goto out; + } + brickname = NULL; +out: + if (brickname) + GF_FREE (brickname); + return ret; +} + +static int +_add_remove_bricks_to_dict (dict_t *dict, glusterd_volinfo_t *volinfo, + char *prefix) +{ + int ret = -1; + int count = 0; + int i = 0; + char brick_key[1024] = {0,}; + char dict_key[1024] ={0,}; + char *brick = NULL; + xlator_t *this = NULL; + + GF_ASSERT (dict); + GF_ASSERT (volinfo); + GF_ASSERT (prefix); + + this = THIS; + GF_ASSERT (this); + + ret = dict_get_int32 (volinfo->rebal.dict, "count", &count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get brick count"); + goto out; + } + + snprintf (dict_key, sizeof (dict_key), "%s.count", prefix); + ret = dict_set_int32 (dict, dict_key, count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set brick count in dict"); + goto out; + } + + for (i = 1; i <= count; i++) { + memset (brick_key, 0, sizeof (brick_key)); + snprintf (brick_key, sizeof (brick_key), "brick%d", i); + + ret = dict_get_str (volinfo->rebal.dict, brick_key, &brick); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to get %s", brick_key); + goto out; + } + + memset (dict_key, 0, sizeof (dict_key)); + snprintf (dict_key, sizeof (dict_key), "%s.%s", prefix, + brick_key); + ret = dict_set_str (dict, dict_key, brick); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add brick to dict"); + goto out; + } + brick = NULL; + } + +out: + return ret; +} + +/* This adds the respective task-id and all available parameters of a task into + * a dictionary + */ +static int _add_task_to_dict (dict_t *dict, glusterd_volinfo_t *volinfo, int op, int index) { @@ -1910,13 +2207,34 @@ _add_task_to_dict (dict_t *dict, glusterd_volinfo_t *volinfo, int op, int index) GF_ASSERT (this); switch (op) { - case GD_OP_REBALANCE: case GD_OP_REMOVE_BRICK: + snprintf (key, sizeof (key), "task%d", index); + ret = _add_remove_bricks_to_dict (dict, volinfo, key); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add remove bricks to dict"); + goto out; + } + case GD_OP_REBALANCE: uuid_str = gf_strdup (uuid_utoa (volinfo->rebal.rebalance_id)); status = volinfo->rebal.defrag_status; break; case GD_OP_REPLACE_BRICK: + snprintf (key, sizeof (key), "task%d.src-brick", index); + ret = _add_brick_name_to_dict (dict, key, + volinfo->rep_brick.src_brick); + if (ret) + goto out; + memset (key, 0, sizeof (key)); + + snprintf (key, sizeof (key), "task%d.dst-brick", index); + ret = _add_brick_name_to_dict (dict, key, + volinfo->rep_brick.dst_brick); + if (ret) + goto out; + memset (key, 0, sizeof (key)); + uuid_str = gf_strdup (uuid_utoa (volinfo->rep_brick.rb_id)); status = volinfo->rep_brick.rb_status; break; @@ -1929,8 +2247,7 @@ _add_task_to_dict (dict_t *dict, glusterd_volinfo_t *volinfo, int op, int index) } snprintf (key, sizeof (key), "task%d.type", index); - ret = dict_set_str (dict, key, - (char *)gd_op_list[op]); + ret = dict_set_str (dict, key, (char *)gd_op_list[op]); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Error setting task type in dict"); @@ -1940,7 +2257,6 @@ _add_task_to_dict (dict_t *dict, glusterd_volinfo_t *volinfo, int op, int index) memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "task%d.id", index); - if (!uuid_str) goto out; ret = dict_set_dynstr (dict, key, uuid_str); @@ -1967,6 +2283,50 @@ out: } static int +glusterd_aggregate_task_status (dict_t *rsp_dict, glusterd_volinfo_t *volinfo) +{ + int ret = -1; + int tasks = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + if (!uuid_is_null (volinfo->rebal.rebalance_id)) { + ret = _add_task_to_dict (rsp_dict, volinfo, volinfo->rebal.op, + tasks); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add task details to dict"); + goto out; + } + tasks++; + } + + if (!uuid_is_null (volinfo->rep_brick.rb_id)) { + ret = _add_task_to_dict (rsp_dict, volinfo, GD_OP_REPLACE_BRICK, + tasks); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add task details to dict"); + goto out; + } + tasks++; + } + + ret = dict_set_int32 (rsp_dict, "tasks", tasks); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Error setting tasks count in dict"); + goto out; + } + ret = 0; + +out: + return ret; +} + +static int glusterd_op_status_volume (dict_t *dict, char **op_errstr, dict_t *rsp_dict) { @@ -1986,7 +2346,6 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr, gf_boolean_t nfs_disabled = _gf_false; gf_boolean_t shd_enabled = _gf_true; gf_boolean_t origin_glusterd = _gf_false; - int tasks = 0; this = THIS; GF_ASSERT (this); @@ -1996,13 +2355,13 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr, GF_ASSERT (dict); - origin_glusterd = is_origin_glusterd (); + origin_glusterd = is_origin_glusterd (dict); ret = dict_get_uint32 (dict, "cmd", &cmd); if (ret) goto out; - if (is_origin_glusterd ()) { + if (origin_glusterd) { ret = 0; if ((cmd & GF_CLI_STATUS_ALL)) { ret = glusterd_get_all_volnames (rsp_dict); @@ -2069,6 +2428,10 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr, brick_index); node_count++; + } else if ((cmd & GF_CLI_STATUS_TASKS) != 0) { + ret = glusterd_aggregate_task_status (rsp_dict, volinfo); + goto out; + } else { list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { brick_index++; @@ -2142,35 +2505,18 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr, } /* Active tasks */ + /* Tasks are added only for normal volume status request for either a + * single volume or all volumes, and only by the origin glusterd + */ if (((cmd & GF_CLI_STATUS_MASK) != GF_CLI_STATUS_NONE) || + !(cmd & (GF_CLI_STATUS_VOL | GF_CLI_STATUS_ALL)) || !origin_glusterd) goto out; - if (!uuid_is_null (volinfo->rebal.rebalance_id)) { - ret = _add_task_to_dict (rsp_dict, volinfo, volinfo->rebal.op, - tasks); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to add task details to dict"); - goto out; - } - tasks++; - } - if (!uuid_is_null (volinfo->rep_brick.rb_id)) { - ret = _add_task_to_dict (rsp_dict, volinfo, GD_OP_REPLACE_BRICK, - tasks); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to add task details to dict"); - goto out; - } - tasks++; - } - - ret = dict_set_int32 (rsp_dict, "tasks", tasks); + ret = glusterd_aggregate_task_status (rsp_dict, volinfo); if (ret) - gf_log (this->name, GF_LOG_ERROR, - "Error setting tasks count in dict"); + goto out; + ret = 0; out: gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -2197,6 +2543,7 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx) xlator_t *this = NULL; glusterd_peerinfo_t *peerinfo = NULL; uint32_t pending_count = 0; + dict_t *dict = NULL; this = THIS; priv = this->private; @@ -2211,27 +2558,60 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx) (glusterd_op_get_op() != GD_OP_SYNC_VOLUME)) continue; - proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_CLUSTER_LOCK]; - if (proc->fn) { - ret = proc->fn (NULL, this, peerinfo); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, "Failed to " - "send lock request for operation " - "'Volume %s' to peer %s", - gd_op_list[opinfo.op], - peerinfo->hostname); - continue; + /* Based on the op_version, acquire a cluster or mgmt_v3 lock */ + if (priv->op_version < 3) { + proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_CLUSTER_LOCK]; + if (proc->fn) { + ret = proc->fn (NULL, this, peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Failed to send lock request " + "for operation 'Volume %s' to " + "peer %s", + gd_op_list[opinfo.op], + peerinfo->hostname); + continue; + } + pending_count++; + } + } else { + dict = glusterd_op_get_ctx (); + dict_ref (dict); + + proc = &peerinfo->mgmt_v3->proctable + [GLUSTERD_MGMT_V3_LOCK]; + if (proc->fn) { + ret = dict_set_static_ptr (dict, "peerinfo", + peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to set peerinfo"); + dict_unref (dict); + goto out; + } + + ret = proc->fn (NULL, this, dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Failed to send mgmt_v3 lock " + "request for operation " + "'Volume %s' to peer %s", + gd_op_list[opinfo.op], + peerinfo->hostname); + dict_unref (dict); + continue; + } + pending_count++; } - pending_count++; } } opinfo.pending_count = pending_count; if (!opinfo.pending_count) - ret = glusterd_op_sm_inject_all_acc (); + ret = glusterd_op_sm_inject_all_acc (&event->txn_id); +out: gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); - return ret; } @@ -2244,17 +2624,12 @@ glusterd_op_ac_send_unlock (glusterd_op_sm_event_t *event, void *ctx) xlator_t *this = NULL; glusterd_peerinfo_t *peerinfo = NULL; uint32_t pending_count = 0; + dict_t *dict = NULL; this = THIS; priv = this->private; GF_ASSERT (priv); - /*ret = glusterd_unlock (MY_UUID); - - if (ret) - goto out; - */ - list_for_each_entry (peerinfo, &priv->peers, uuid_list) { GF_ASSERT (peerinfo); @@ -2264,29 +2639,62 @@ glusterd_op_ac_send_unlock (glusterd_op_sm_event_t *event, void *ctx) (glusterd_op_get_op() != GD_OP_SYNC_VOLUME)) continue; - proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_CLUSTER_UNLOCK]; - if (proc->fn) { - ret = proc->fn (NULL, this, peerinfo); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, "Failed to " - "send unlock request for operation " - "'Volume %s' to peer %s", - gd_op_list[opinfo.op], - peerinfo->hostname); - continue; + /* Based on the op_version, release the * + * cluster or mgmt_v3 lock */ + if (priv->op_version < 3) { + proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_CLUSTER_UNLOCK]; + if (proc->fn) { + ret = proc->fn (NULL, this, peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Failed to send unlock request " + "for operation 'Volume %s' to " + "peer %s", + gd_op_list[opinfo.op], + peerinfo->hostname); + continue; + } + pending_count++; + } + } else { + dict = glusterd_op_get_ctx (); + dict_ref (dict); + + proc = &peerinfo->mgmt_v3->proctable + [GLUSTERD_MGMT_V3_UNLOCK]; + if (proc->fn) { + ret = dict_set_static_ptr (dict, "peerinfo", + peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to set peerinfo"); + dict_unref (dict); + goto out; + } + + ret = proc->fn (NULL, this, dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Failed to send mgmt_v3 unlock " + "request for operation " + "'Volume %s' to peer %s", + gd_op_list[opinfo.op], + peerinfo->hostname); + dict_unref (dict); + continue; + } + pending_count++; } - pending_count++; } } opinfo.pending_count = pending_count; if (!opinfo.pending_count) - ret = glusterd_op_sm_inject_all_acc (); + ret = glusterd_op_sm_inject_all_acc (&event->txn_id); +out: gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); - return ret; - } static int @@ -2298,7 +2706,8 @@ glusterd_op_ac_ack_drain (glusterd_op_sm_event_t *event, void *ctx) opinfo.pending_count--; if (!opinfo.pending_count) - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, + &event->txn_id, NULL); gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret); @@ -2314,43 +2723,94 @@ glusterd_op_ac_send_unlock_drain (glusterd_op_sm_event_t *event, void *ctx) static int glusterd_op_ac_lock (glusterd_op_sm_event_t *event, void *ctx) { - glusterd_op_lock_ctx_t *lock_ctx = NULL; - int32_t ret = 0; + int32_t ret = 0; + char *volname = NULL; + glusterd_op_lock_ctx_t *lock_ctx = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; GF_ASSERT (event); GF_ASSERT (ctx); + this = THIS; + priv = this->private; + lock_ctx = (glusterd_op_lock_ctx_t *)ctx; - ret = glusterd_lock (lock_ctx->uuid); + /* If the req came from a node running on older op_version + * the dict won't be present. Based on it acquiring a cluster + * or mgmt_v3 lock */ + if (lock_ctx->dict == NULL) { + ret = glusterd_lock (lock_ctx->uuid); + glusterd_op_lock_send_resp (lock_ctx->req, ret); + } else { + ret = dict_get_str (lock_ctx->dict, "volname", &volname); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire volname"); + else { + ret = glusterd_mgmt_v3_lock (volname, lock_ctx->uuid, + "vol"); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire lock for %s", + volname); + } - gf_log (THIS->name, GF_LOG_DEBUG, "Lock Returned %d", ret); + glusterd_op_mgmt_v3_lock_send_resp (lock_ctx->req, + &event->txn_id, ret); - glusterd_op_lock_send_resp (lock_ctx->req, ret); + dict_unref (lock_ctx->dict); + } + gf_log (THIS->name, GF_LOG_DEBUG, "Lock Returned %d", ret); return ret; } static int glusterd_op_ac_unlock (glusterd_op_sm_event_t *event, void *ctx) { - int ret = 0; - glusterd_op_lock_ctx_t *lock_ctx = NULL; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; + int32_t ret = 0; + char *volname = NULL; + glusterd_op_lock_ctx_t *lock_ctx = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + GF_ASSERT (event); GF_ASSERT (ctx); this = THIS; priv = this->private; + lock_ctx = (glusterd_op_lock_ctx_t *)ctx; - ret = glusterd_unlock (lock_ctx->uuid); + /* If the req came from a node running on older op_version + * the dict won't be present. Based on it releasing the cluster + * or mgmt_v3 lock */ + if (lock_ctx->dict == NULL) { + ret = glusterd_unlock (lock_ctx->uuid); + glusterd_op_unlock_send_resp (lock_ctx->req, ret); + } else { + ret = dict_get_str (lock_ctx->dict, "volname", &volname); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire volname"); + else { + ret = glusterd_mgmt_v3_unlock (volname, lock_ctx->uuid, + "vol"); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to release lock for %s", volname); + } + + glusterd_op_mgmt_v3_unlock_send_resp (lock_ctx->req, + &event->txn_id, ret); - gf_log (this->name, GF_LOG_DEBUG, "Unlock Returned %d", ret); + dict_unref (lock_ctx->dict); + } - glusterd_op_unlock_send_resp (lock_ctx->req, ret); + gf_log (this->name, GF_LOG_DEBUG, "Unlock Returned %d", ret); if (priv->pending_quorum_action) glusterd_do_quorum_action (); @@ -2388,7 +2848,8 @@ glusterd_op_ac_rcvd_lock_acc (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.pending_count > 0) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, + &event->txn_id, NULL); gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -2496,12 +2957,13 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) } break; + case GD_OP_GSYNC_CREATE: case GD_OP_GSYNC_SET: { ret = glusterd_op_gsync_args_get (dict, &errstr, &volname, - NULL); + NULL, NULL); if (ret == 0) { ret = glusterd_dict_set_volid (dict, volname, op_errstr); @@ -2591,9 +3053,6 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) case GD_OP_STATEDUMP_VOLUME: case GD_OP_CLEARLOCKS_VOLUME: case GD_OP_DEFRAG_BRICK_VOLUME: -#ifdef HAVE_BD_XLATOR - case GD_OP_BD_OP: -#endif { ret = dict_get_str (dict, "volname", &volname); if (ret) { @@ -2614,6 +3073,18 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) } break; + case GD_OP_COPY_FILE: + { + dict_copy (dict, req_dict); + break; + } + + case GD_OP_SYS_EXEC: + { + dict_copy (dict, req_dict); + break; + } + default: break; } @@ -2810,7 +3281,8 @@ out: if (dict) dict_unref (dict); if (ret) { - glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, NULL); + glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, + &event->txn_id, NULL); opinfo.op_ret = ret; } @@ -2819,7 +3291,7 @@ out: opinfo.pending_count); if (!opinfo.pending_count) - ret = glusterd_op_sm_inject_all_acc (); + ret = glusterd_op_sm_inject_all_acc (&event->txn_id); gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); @@ -2828,10 +3300,10 @@ out: } static int32_t -glusterd_op_start_rb_timer (dict_t *dict) +glusterd_op_start_rb_timer (dict_t *dict, uuid_t *txn_id) { int32_t op = 0; - struct timeval timeout = {0, }; + struct timespec timeout = {0, }; glusterd_conf_t *priv = NULL; int32_t ret = -1; dict_t *rb_ctx = NULL; @@ -2847,12 +3319,12 @@ glusterd_op_start_rb_timer (dict_t *dict) } if (op != GF_REPLACE_OP_START) { - ret = glusterd_op_sm_inject_all_acc (); + ret = glusterd_op_sm_inject_all_acc (txn_id); goto out; } timeout.tv_sec = 5; - timeout.tv_usec = 0; + timeout.tv_nsec = 0; rb_ctx = dict_copy (dict, rb_ctx); @@ -2862,6 +3334,17 @@ glusterd_op_start_rb_timer (dict_t *dict) ret = -1; goto out; } + + ret = dict_set_bin (rb_ctx, "transaction_id", + txn_id, sizeof(*txn_id)); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Failed to set transaction id."); + goto out; + } else + gf_log ("", GF_LOG_DEBUG, + "transaction_id = %s", uuid_utoa (*txn_id)); + priv->timer = gf_timer_call_after (THIS->ctx, timeout, glusterd_do_replace_brick, (void *) rb_ctx); @@ -2930,6 +3413,97 @@ out: return ret; } +static int +reassign_defrag_status (dict_t *dict, char *key, gf_defrag_status_t *status) +{ + int ret = 0; + + if (!*status) + return ret; + + switch (*status) { + case GF_DEFRAG_STATUS_STARTED: + *status = GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED; + break; + + case GF_DEFRAG_STATUS_STOPPED: + *status = GF_DEFRAG_STATUS_LAYOUT_FIX_STOPPED; + break; + + case GF_DEFRAG_STATUS_COMPLETE: + *status = GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE; + break; + + case GF_DEFRAG_STATUS_FAILED: + *status = GF_DEFRAG_STATUS_LAYOUT_FIX_FAILED; + break; + default: + break; + } + + ret = dict_set_int32(dict, key, *status); + if (ret) + gf_log (THIS->name, GF_LOG_WARNING, + "failed to reset defrag %s in dict", key); + + return ret; +} + +/* Check and reassign the defrag_status enum got from the rebalance process + * of all peers so that the rebalance-status CLI command can display if a + * full-rebalance or just a fix-layout was carried out. + */ +static int +glusterd_op_check_peer_defrag_status (dict_t *dict, int count) +{ + glusterd_volinfo_t *volinfo = NULL; + gf_defrag_status_t status = GF_DEFRAG_STATUS_NOT_STARTED; + char key[256] = {0,}; + char *volname = NULL; + int ret = -1; + int i = 1; + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (THIS->name, GF_LOG_WARNING, "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_log (THIS->name, GF_LOG_WARNING, FMTSTR_CHECK_VOL_EXISTS, + volname); + goto out; + } + + if (volinfo->rebal.defrag_cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) { + /* Fix layout was not issued; we don't need to reassign + the status */ + ret = 0; + goto out; + } + + do { + memset (key, 0, 256); + snprintf (key, 256, "status-%d", i); + ret = dict_get_int32 (dict, key, (int32_t *)&status); + if (ret) { + gf_log (THIS->name, GF_LOG_WARNING, + "failed to get defrag %s", key); + goto out; + } + ret = reassign_defrag_status (dict, key, &status); + if (ret) + goto out; + i++; + } while (i <= count); + + ret = 0; +out: + return ret; + +} + /* This function is used to modify the op_ctx dict before sending it back * to cli. This is useful in situations like changing the peer uuids to * hostnames etc. @@ -3033,12 +3607,38 @@ glusterd_op_modify_op_ctx (glusterd_op_t op, void *ctx) goto out; } + /* add 'node-name-%d' into op_ctx with value uuid_str. + this will be used to convert to hostname later */ + { + char key[1024]; + char *uuid_str = NULL; + int i; + + for (i = 1; i <= count; i++) { + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "node-uuid-%d", i); + ret = dict_get_str (op_ctx, key, &uuid_str); + if (!ret) { + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), + "node-name-%d", i); + ret = dict_set_str (op_ctx, key, + uuid_str); + } + } + } + ret = glusterd_op_volume_dict_uuid_to_hostname (op_ctx, - "node-uuid-%d", + "node-name-%d", 1, (count + 1)); if (ret) gf_log (this->name, GF_LOG_WARNING, "Failed uuid to hostname conversion"); + + ret = glusterd_op_check_peer_defrag_status (op_ctx, count); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to reset defrag status for fix-layout"); break; default: @@ -3191,17 +3791,19 @@ out: if (dict) dict_unref (dict); if (ret) { - glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, NULL); + glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, + &event->txn_id, NULL); opinfo.op_ret = ret; } if (!opinfo.pending_count) { if (op == GD_OP_REPLACE_BRICK) { - ret = glusterd_op_start_rb_timer (op_dict); + ret = glusterd_op_start_rb_timer (op_dict, + &event->txn_id); } else { glusterd_op_modify_op_ctx (op, NULL); - ret = glusterd_op_sm_inject_all_acc (); + ret = glusterd_op_sm_inject_all_acc (&event->txn_id); } goto err; } @@ -3226,7 +3828,8 @@ glusterd_op_ac_rcvd_stage_op_acc (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.pending_count > 0) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_STAGE_ACC, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_STAGE_ACC, + &event->txn_id, NULL); out: gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -3247,7 +3850,8 @@ glusterd_op_ac_stage_op_failed (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.pending_count > 0) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, + &event->txn_id, NULL); out: gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -3268,7 +3872,8 @@ glusterd_op_ac_commit_op_failed (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.pending_count > 0) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, + &event->txn_id, NULL); out: gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -3311,7 +3916,8 @@ glusterd_op_ac_brick_op_failed (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.brick_pending_count > 0) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, ev_ctx->commit_ctx); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, + &event->txn_id, ev_ctx->commit_ctx); out: if (ev_ctx->rsp_dict) @@ -3353,7 +3959,7 @@ glusterd_op_ac_rcvd_commit_op_acc (glusterd_op_sm_event_t *event, void *ctx) goto out; } - ret = glusterd_op_start_rb_timer (op_ctx); + ret = glusterd_op_start_rb_timer (op_ctx, &event->txn_id); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Couldn't start " "replace-brick operation."); @@ -3368,10 +3974,12 @@ glusterd_op_ac_rcvd_commit_op_acc (glusterd_op_sm_event_t *event, void *ctx) out: if (commit_ack_inject) { if (ret) - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, + &event->txn_id, NULL); else if (!opinfo.pending_count) { glusterd_op_modify_op_ctx (op, NULL); - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_ACC, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_ACC, + &event->txn_id, NULL); } /*else do nothing*/ } @@ -3392,7 +4000,8 @@ glusterd_op_ac_rcvd_unlock_acc (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.pending_count > 0) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, + &event->txn_id, NULL); gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -3426,7 +4035,7 @@ glusterd_op_reset_ctx () } int32_t -glusterd_op_txn_complete () +glusterd_op_txn_complete (uuid_t *txn_id) { int32_t ret = -1; glusterd_conf_t *priv = NULL; @@ -3436,6 +4045,7 @@ glusterd_op_txn_complete () rpcsvc_request_t *req = NULL; void *ctx = NULL; char *op_errstr = NULL; + char *volname = NULL; xlator_t *this = NULL; this = THIS; @@ -3458,14 +4068,29 @@ glusterd_op_txn_complete () glusterd_op_reset_ctx (); glusterd_op_clear_errstr (); - ret = glusterd_unlock (MY_UUID); - - /* unlock cant/shouldnt fail here!! */ - if (ret) { - gf_log (this->name, GF_LOG_CRITICAL, - "Unable to clear local lock, ret: %d", ret); + /* Based on the op-version, we release the cluster or mgmt_v3 lock */ + if (priv->op_version < 3) { + ret = glusterd_unlock (MY_UUID); + /* unlock cant/shouldnt fail here!! */ + if (ret) + gf_log (this->name, GF_LOG_CRITICAL, + "Unable to clear local lock, ret: %d", ret); + else + gf_log (this->name, GF_LOG_DEBUG, "Cleared local lock"); } else { - gf_log (this->name, GF_LOG_DEBUG, "Cleared local lock"); + ret = dict_get_str (ctx, "volname", &volname); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire volname"); + + if (volname) { + ret = glusterd_mgmt_v3_unlock (volname, MY_UUID, + "vol"); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to release lock for %s", + volname); + } } ret = glusterd_op_send_cli_response (op, op_ret, @@ -3484,6 +4109,13 @@ glusterd_op_txn_complete () if (priv->pending_quorum_action) glusterd_do_quorum_action (); + + /* Clearing the transaction opinfo */ + ret = glusterd_clear_txn_opinfo (txn_id); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to clear transaction's opinfo"); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } @@ -3495,7 +4127,7 @@ glusterd_op_ac_unlocked_all (glusterd_op_sm_event_t *event, void *ctx) GF_ASSERT (event); - ret = glusterd_op_txn_complete (); + ret = glusterd_op_txn_complete (&event->txn_id); gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -3512,6 +4144,7 @@ glusterd_op_ac_stage_op (glusterd_op_sm_event_t *event, void *ctx) char *op_errstr = NULL; dict_t *dict = NULL; xlator_t *this = NULL; + uuid_t *txn_id = NULL; this = THIS; GF_ASSERT (this); @@ -3537,6 +4170,19 @@ glusterd_op_ac_stage_op (glusterd_op_sm_event_t *event, void *ctx) status); } + txn_id = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t); + + if (txn_id) + uuid_copy (*txn_id, event->txn_id); + else + gf_log (this->name, GF_LOG_ERROR, "Out of Memory"); + + ret = dict_set_bin (rsp_dict, "transaction_id", + txn_id, sizeof(*txn_id)); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to set transaction id."); + ret = glusterd_op_stage_send_resp (req_ctx->req, req_ctx->op, status, op_errstr, rsp_dict); @@ -3601,6 +4247,7 @@ glusterd_op_ac_commit_op (glusterd_op_sm_event_t *event, void *ctx) dict_t *dict = NULL; dict_t *rsp_dict = NULL; xlator_t *this = NULL; + uuid_t *txn_id = NULL; this = THIS; GF_ASSERT (this); @@ -3630,10 +4277,22 @@ glusterd_op_ac_commit_op (glusterd_op_sm_event_t *event, void *ctx) "'Volume %s' failed: %d", gd_op_list[req_ctx->op], status); + txn_id = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t); + + if (txn_id) + uuid_copy (*txn_id, event->txn_id); + else + gf_log (this->name, GF_LOG_ERROR, "Out of Memory"); + + ret = dict_set_bin (rsp_dict, "transaction_id", + txn_id, sizeof(*txn_id)); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to set transaction id."); + ret = glusterd_op_commit_send_resp (req_ctx->req, req_ctx->op, status, op_errstr, rsp_dict); - glusterd_op_fini_ctx (); if (op_errstr && (strcmp (op_errstr, ""))) GF_FREE (op_errstr); @@ -3662,7 +4321,6 @@ glusterd_op_ac_send_commit_failed (glusterd_op_sm_event_t *event, void *ctx) opinfo.op_ret, opinfo.op_errstr, op_ctx); - glusterd_op_fini_ctx (); if (opinfo.op_errstr && (strcmp (opinfo.op_errstr, ""))) { GF_FREE (opinfo.op_errstr); opinfo.op_errstr = NULL; @@ -3747,6 +4405,10 @@ glusterd_op_stage_validate (glusterd_op_t op, dict_t *dict, char **op_errstr, ret = glusterd_op_stage_sync_volume (dict, op_errstr); break; + case GD_OP_GSYNC_CREATE: + ret = glusterd_op_stage_gsync_create (dict, op_errstr); + break; + case GD_OP_GSYNC_SET: ret = glusterd_op_stage_gsync_set (dict, op_errstr); break; @@ -3780,18 +4442,21 @@ glusterd_op_stage_validate (glusterd_op_t op, dict_t *dict, char **op_errstr, ret = glusterd_op_stage_clearlocks_volume (dict, op_errstr); break; -#ifdef HAVE_BD_XLATOR - case GD_OP_BD_OP: - ret = glusterd_op_stage_bd (dict, op_errstr); + + case GD_OP_COPY_FILE: + ret = glusterd_op_stage_copy_file (dict, op_errstr); break; -#endif + + case GD_OP_SYS_EXEC: + ret = glusterd_op_stage_sys_exec (dict, op_errstr); + break; + default: gf_log (this->name, GF_LOG_ERROR, "Unknown op %s", gd_op_list[op]); } - gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); - + gf_log (this->name, GF_LOG_DEBUG, "OP = %d. Returning %d", op, ret); return ret; } @@ -3849,6 +4514,11 @@ glusterd_op_commit_perform (glusterd_op_t op, dict_t *dict, char **op_errstr, ret = glusterd_op_sync_volume (dict, op_errstr, rsp_dict); break; + case GD_OP_GSYNC_CREATE: + ret = glusterd_op_gsync_create (dict, op_errstr, + rsp_dict); + break; + case GD_OP_GSYNC_SET: ret = glusterd_op_gsync_set (dict, op_errstr, rsp_dict); break; @@ -3883,11 +4553,15 @@ glusterd_op_commit_perform (glusterd_op_t op, dict_t *dict, char **op_errstr, ret = glusterd_op_clearlocks_volume (dict, op_errstr, rsp_dict); break; -#ifdef HAVE_BD_XLATOR - case GD_OP_BD_OP: - ret = 0; + + case GD_OP_COPY_FILE: + ret = glusterd_op_copy_file (dict, op_errstr); break; -#endif + + case GD_OP_SYS_EXEC: + ret = glusterd_op_sys_exec (dict, op_errstr, rsp_dict); + break; + default: gf_log (this->name, GF_LOG_ERROR, "Unknown op %s", gd_op_list[op]); @@ -3896,11 +4570,12 @@ glusterd_op_commit_perform (glusterd_op_t op, dict_t *dict, char **op_errstr, if (ret == 0) glusterd_op_commit_hook (op, dict, GD_COMMIT_HOOK_POST); - gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } + static int glusterd_bricks_select_stop_volume (dict_t *dict, char **op_errstr, struct list_head *selected) @@ -4217,24 +4892,95 @@ out: } int +get_replica_index_for_per_replica_cmd (glusterd_volinfo_t *volinfo, + dict_t *dict) { + int ret = 0; + char *hostname = NULL; + char *path = NULL; + int index = 0; + glusterd_brickinfo_t *brickinfo = NULL; + int cmd_replica_index = -1; + int replica_count = -1; + + + if (!dict) { + ret = -1; + goto out; + } + + ret = dict_get_str (dict, "per-replica-cmd-hostname", &hostname); + if (ret) + goto out; + ret = dict_get_str (dict, "per-replica-cmd-path", &path); + if (ret) + goto out; + + replica_count = volinfo->replica_count; + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (uuid_is_null (brickinfo->uuid)) + (void)glusterd_resolve_brick (brickinfo); + if (!strcmp (brickinfo->path, path) && + !strcmp (brickinfo->hostname, hostname)) { + cmd_replica_index = index/(replica_count); + goto out; + } + index++; + } + + +out: + if (ret) + cmd_replica_index = -1; + + return cmd_replica_index; +} + +int _select_rxlators_with_local_bricks (xlator_t *this, glusterd_volinfo_t *volinfo, - dict_t *dict) + dict_t *dict, cli_cmd_type type) { glusterd_brickinfo_t *brickinfo = NULL; glusterd_conf_t *priv = NULL; - int index = 1; + int index = 0; int rxlator_count = 0; int replica_count = 0; gf_boolean_t add = _gf_false; + int ret = 0; + int cmd_replica_index = -1; priv = this->private; replica_count = volinfo->replica_count; + + if (type == PER_REPLICA) { + + cmd_replica_index = get_replica_index_for_per_replica_cmd + (volinfo, dict); + if (cmd_replica_index == -1) { + ret = -1; + goto err; + } + } + + index = 1; + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { if (uuid_is_null (brickinfo->uuid)) (void)glusterd_resolve_brick (brickinfo); - if (!uuid_compare (MY_UUID, brickinfo->uuid)) - add = _gf_true; + switch (type) { + case ALL_REPLICA: + if (!uuid_compare (MY_UUID, brickinfo->uuid)) + add = _gf_true; + break; + case PER_REPLICA: + if (!uuid_compare (MY_UUID, brickinfo->uuid) && + ((index-1)/replica_count == cmd_replica_index)) + + add = _gf_true; + break; + } + if (index % replica_count == 0) { if (add) { _add_rxlator_to_dict (dict, volinfo->volname, @@ -4247,6 +4993,10 @@ _select_rxlators_with_local_bricks (xlator_t *this, glusterd_volinfo_t *volinfo, index++; } +err: + if (ret) + rxlator_count = -1; + return rxlator_count; } @@ -4287,9 +5037,10 @@ _select_rxlators_for_full_self_heal (xlator_t *this, return rxlator_count; } -#ifdef HAVE_BD_XLATOR + static int -glusterd_bricks_select_bd (dict_t *dict, char **op_errstr) +glusterd_bricks_select_snap (dict_t *dict, char **op_errstr, + struct list_head *selected) { int ret = -1; glusterd_conf_t *priv = NULL; @@ -4307,31 +5058,31 @@ glusterd_bricks_select_bd (dict_t *dict, char **op_errstr) ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Unable to get volname"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get" + " volname"); goto out; } ret = glusterd_volinfo_find (volname, &volinfo); if (ret) goto out; - pending_node = GF_CALLOC (1, sizeof (*pending_node), - gf_gld_mt_pending_node_t); - if (!pending_node) { - ret = -1; - goto out; - } - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { brick_index++; if (uuid_compare (brickinfo->uuid, MY_UUID) || !glusterd_is_brick_started (brickinfo)) { continue; } + pending_node = GF_CALLOC (1, sizeof (*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } pending_node->node = brickinfo; pending_node->type = GD_NODE_BRICK; pending_node->index = brick_index; list_add_tail (&pending_node->list, - &opinfo.pending_bricks); + selected); pending_node = NULL; } @@ -4341,10 +5092,10 @@ out: gf_log (THIS->name, GF_LOG_DEBUG, "Returning ret %d", ret); return ret; } -#endif static int -fill_shd_status_for_local_bricks (dict_t *dict, glusterd_volinfo_t *volinfo) +fill_shd_status_for_local_bricks (dict_t *dict, glusterd_volinfo_t *volinfo, + cli_cmd_type type, dict_t *req_dict) { glusterd_brickinfo_t *brickinfo = NULL; char msg[1024] = {0,}; @@ -4353,10 +5104,22 @@ fill_shd_status_for_local_bricks (dict_t *dict, glusterd_volinfo_t *volinfo) int index = 0; int ret = 0; xlator_t *this = NULL; + int cmd_replica_index = -1; this = THIS; snprintf (msg, sizeof (msg), "self-heal-daemon is not running on"); + if (type == PER_REPLICA) { + cmd_replica_index = get_replica_index_for_per_replica_cmd + (volinfo, req_dict); + if (cmd_replica_index == -1) { + gf_log (THIS->name, GF_LOG_ERROR, "Could not find the " + "replica index for per replica type command"); + ret = -1; + goto out; + } + } + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { if (uuid_is_null (brickinfo->uuid)) (void)glusterd_resolve_brick (brickinfo); @@ -4365,6 +5128,14 @@ fill_shd_status_for_local_bricks (dict_t *dict, glusterd_volinfo_t *volinfo) index++; continue; } + + if (type == PER_REPLICA) { + if (cmd_replica_index != (index/volinfo->replica_count)) { + index++; + continue; + } + + } snprintf (key, sizeof (key), "%d-status",index); snprintf (value, sizeof (value), "%s %s",msg, uuid_utoa(MY_UUID)); @@ -4433,21 +5204,49 @@ glusterd_bricks_select_heal_volume (dict_t *dict, char **op_errstr, goto out; } + switch (heal_op) { + case GF_AFR_OP_INDEX_SUMMARY: + case GF_AFR_OP_STATISTICS_HEAL_COUNT: + if (!glusterd_is_nodesvc_online ("glustershd")) { + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, "Received " + "empty ctx."); + goto out; + } - if (!glusterd_is_nodesvc_online ("glustershd") && - (heal_op == GF_AFR_OP_INDEX_SUMMARY)) { - - if (!rsp_dict) { - gf_log (this->name, GF_LOG_ERROR, "Received empty " - "ctx."); + ret = fill_shd_status_for_local_bricks (rsp_dict, + volinfo, + ALL_REPLICA, + dict); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Unable to " + "fill the shd status for the local " + "bricks"); goto out; + } + break; + case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: + if (!glusterd_is_nodesvc_online ("glustershd")) { + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, "Received " + "empty ctx."); + goto out; + } + ret = fill_shd_status_for_local_bricks (rsp_dict, + volinfo, + PER_REPLICA, + dict); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Unable to " + "fill the shd status for the local" + " bricks."); + goto out; - ret = fill_shd_status_for_local_bricks (rsp_dict, volinfo); - if (ret) - gf_log (this->name, GF_LOG_ERROR, "Unable to fill the shd" - " status for the local bricks"); - goto out; + } + break; + default: + break; } @@ -4457,14 +5256,28 @@ glusterd_bricks_select_heal_volume (dict_t *dict, char **op_errstr, volinfo, dict); break; + case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: + rxlator_count = _select_rxlators_with_local_bricks (this, + volinfo, + dict, + PER_REPLICA); + break; default: rxlator_count = _select_rxlators_with_local_bricks (this, volinfo, - dict); + dict, + ALL_REPLICA); break; } if (!rxlator_count) goto out; + if (rxlator_count == -1){ + gf_log (this->name, GF_LOG_ERROR, "Could not determine the" + "translator count"); + ret = -1; + goto out; + } + ret = dict_set_int32 (dict, "count", rxlator_count); if (ret) goto out; @@ -4726,7 +5539,8 @@ glusterd_op_ac_send_brick_op (glusterd_op_sm_event_t *event, void *ctx) if (!opinfo.pending_count && !opinfo.brick_pending_count) { glusterd_clear_pending_nodes (&opinfo.pending_bricks); - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, req_ctx); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, + &event->txn_id, req_ctx); } out: @@ -4780,7 +5594,8 @@ glusterd_op_ac_rcvd_brick_op_acc (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.brick_pending_count > 0) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, ev_ctx->commit_ctx); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, &event->txn_id, + ev_ctx->commit_ctx); out: if (ev_ctx->rsp_dict) @@ -4832,11 +5647,9 @@ glusterd_op_bricks_select (glusterd_op_t op, dict_t *dict, char **op_errstr, ret = glusterd_bricks_select_rebalance_volume (dict, op_errstr, selected); break; -#ifdef HAVE_BD_XLATOR - case GD_OP_BD_OP: - ret = glusterd_bricks_select_bd (dict, op_errstr); + case GD_OP_SNAP: + ret = glusterd_bricks_select_snap (dict, op_errstr, selected); break; -#endif default: break; } @@ -5158,7 +5971,7 @@ glusterd_op_sm_new_event (glusterd_op_sm_event_type_t event_type, int glusterd_op_sm_inject_event (glusterd_op_sm_event_type_t event_type, - void *ctx) + uuid_t *txn_id, void *ctx) { int32_t ret = -1; glusterd_op_sm_event_t *event = NULL; @@ -5173,6 +5986,9 @@ glusterd_op_sm_inject_event (glusterd_op_sm_event_type_t event_type, event->ctx = ctx; + if (txn_id) + uuid_copy (event->txn_id, *txn_id); + gf_log (THIS->name, GF_LOG_DEBUG, "Enqueue event: '%s'", glusterd_op_sm_event_name_get (event->event)); list_add_tail (&event->list, &gd_op_sm_queue); @@ -5233,6 +6049,7 @@ glusterd_op_sm () glusterd_op_sm_t *state = NULL; glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; xlator_t *this = NULL; + glusterd_op_info_t txn_op_info; this = THIS; GF_ASSERT (this); @@ -5253,6 +6070,20 @@ glusterd_op_sm () "type: '%s'", glusterd_op_sm_event_name_get(event_type)); + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", + uuid_utoa (event->txn_id)); + + ret = glusterd_get_txn_opinfo (&event->txn_id, + &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to get transaction's opinfo"); + glusterd_destroy_op_event_ctx (event); + GF_FREE (event); + continue; + } else + opinfo = txn_op_info; + state = glusterd_op_state_table[opinfo.state.state]; GF_ASSERT (state); @@ -5283,8 +6114,27 @@ glusterd_op_sm () return ret; } + if ((state[event_type].next_state == + GD_OP_STATE_DEFAULT) && + (event_type == GD_OP_EVENT_UNLOCK)) { + /* Clearing the transaction opinfo */ + ret = glusterd_clear_txn_opinfo(&event->txn_id); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to clear " + "transaction's opinfo"); + } else { + ret = glusterd_set_txn_opinfo (&event->txn_id, + &opinfo); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to set " + "transaction's opinfo"); + } + glusterd_destroy_op_event_ctx (event); GF_FREE (event); + } } @@ -5338,52 +6188,6 @@ glusterd_op_clear_op (glusterd_op_t op) } int32_t -glusterd_op_init_ctx (glusterd_op_t op) -{ - int ret = 0; - dict_t *dict = NULL; - xlator_t *this = NULL; - - this = THIS; - GF_ASSERT (this); - GF_ASSERT (GD_OP_NONE < op && op < GD_OP_MAX); - - if (_gf_false == glusterd_need_brick_op (op)) { - gf_log (this->name, GF_LOG_DEBUG, "Received op: %s, returning", - gd_op_list[op]); - goto out; - } - dict = dict_new (); - if (dict == NULL) { - ret = -1; - goto out; - } - ret = glusterd_op_set_ctx (dict); - if (ret) - goto out; -out: - gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); - return ret; -} - - - -int32_t -glusterd_op_fini_ctx () -{ - dict_t *dict = NULL; - - dict = glusterd_op_get_ctx (); - if (dict) - dict_unref (dict); - - glusterd_op_reset_ctx (); - return 0; -} - - - -int32_t glusterd_op_free_ctx (glusterd_op_t op, void *ctx) { @@ -5409,9 +6213,6 @@ glusterd_op_free_ctx (glusterd_op_t op, void *ctx) case GD_OP_STATEDUMP_VOLUME: case GD_OP_CLEARLOCKS_VOLUME: case GD_OP_DEFRAG_BRICK_VOLUME: -#ifdef HAVE_BD_XLATOR - case GD_OP_BD_OP: -#endif dict_unref (ctx); break; default: @@ -5440,4 +6241,3 @@ glusterd_op_sm_init () pthread_mutex_init (&gd_op_sm_lock, NULL); return 0; } - diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h index df8b8c141..cf57b78e0 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h @@ -15,8 +15,8 @@ #include "config.h" #endif -#ifndef GSYNC_CONF -#define GSYNC_CONF GEOREP"/gsyncd.conf" +#ifndef GSYNC_CONF_TEMPLATE +#define GSYNC_CONF_TEMPLATE GEOREP"/gsyncd_template.conf" #endif #include <pthread.h> @@ -32,6 +32,8 @@ #include "protocol-common.h" #define GD_VOLUME_NAME_MAX 256 +#define GD_OP_PROTECTED (0x02) +#define GD_OP_UNPROTECTED (0x04) typedef enum glusterd_op_sm_state_ { GD_OP_STATE_DEFAULT = 0, @@ -75,6 +77,7 @@ struct glusterd_op_sm_event_ { struct list_head list; void *ctx; glusterd_op_sm_event_type_t event; + uuid_t txn_id; }; typedef struct glusterd_op_sm_event_ glusterd_op_sm_event_t; @@ -117,6 +120,7 @@ typedef struct glusterd_op_log_filename_ctx_ glusterd_op_log_filename_ctx_t; struct glusterd_op_lock_ctx_ { uuid_t uuid; + dict_t *dict; rpcsvc_request_t *req; }; @@ -162,12 +166,18 @@ typedef struct glusterd_gsync_status_temp { glusterd_volinfo_t *volinfo; char *node; }glusterd_gsync_status_temp_t; + +typedef enum cli_cmd_type_ { + PER_REPLICA, + ALL_REPLICA, + } cli_cmd_type; + int glusterd_op_sm_new_event (glusterd_op_sm_event_type_t event_type, glusterd_op_sm_event_t **new_event); int glusterd_op_sm_inject_event (glusterd_op_sm_event_type_t event_type, - void *ctx); + uuid_t *txn_id, void *ctx); int glusterd_op_sm_init (); @@ -251,10 +261,7 @@ glusterd_op_init_commit_rsp_dict (glusterd_op_t op); void glusterd_op_modify_op_ctx (glusterd_op_t op, void *op_ctx); -int32_t -glusterd_op_init_ctx (glusterd_op_t op); -int32_t -glusterd_op_fini_ctx (); + int32_t glusterd_volume_stats_read_perf (char *brick_path, int32_t blk_size, int32_t blk_count, double *throughput, double *time); @@ -270,7 +277,7 @@ glusterd_are_all_volumes_stopped (); int glusterd_stop_bricks (glusterd_volinfo_t *volinfo); int -gsync_status (char *master, char *slave, int *status); +gsync_status (char *master, char *slave, char *conf_path, int *status); int glusterd_check_gsync_running (glusterd_volinfo_t *volinfo, gf_boolean_t *flag); @@ -278,4 +285,15 @@ glusterd_check_gsync_running (glusterd_volinfo_t *volinfo, gf_boolean_t *flag); int glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict, dict_t *op_ctx); +int +glusterd_is_valid_vg (glusterd_brickinfo_t *brick, int check_tag, char *msg); + +int32_t +glusterd_get_txn_opinfo (uuid_t *txn_id, glusterd_op_info_t *opinfo); + +int32_t +glusterd_set_txn_opinfo (uuid_t *txn_id, glusterd_op_info_t *opinfo); + +int32_t +glusterd_clear_txn_opinfo (uuid_t *txn_id); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c index cb29462c9..a153ca1a9 100644 --- a/xlators/mgmt/glusterd/src/glusterd-pmap.c +++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com> + Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. This file is licensed to you under your choice of the GNU Lesser @@ -52,8 +52,8 @@ pmap_port_isfree (int port) } -struct pmap_registry * -pmap_registry_new (void) +static struct pmap_registry * +pmap_registry_new (xlator_t *this) { struct pmap_registry *pmap = NULL; int i = 0; @@ -69,8 +69,8 @@ pmap_registry_new (void) pmap->ports[i].type = GF_PMAP_PORT_FOREIGN; } - pmap->base_port = GF_IANA_PRIV_PORTS_START; - pmap->last_alloc = GF_IANA_PRIV_PORTS_START; + pmap->base_port = pmap->last_alloc = + ((glusterd_conf_t *)(this->private))->base_port; return pmap; } @@ -86,7 +86,7 @@ pmap_registry_get (xlator_t *this) pmap = priv->pmap; if (!pmap) { - pmap = pmap_registry_new (); + pmap = pmap_registry_new (this); if (!pmap) return NULL; priv->pmap = pmap; @@ -280,7 +280,7 @@ out: } int -gluster_pmap_portbybrick (rpcsvc_request_t *req) +__gluster_pmap_portbybrick (rpcsvc_request_t *req) { pmap_port_by_brick_req args = {0,}; pmap_port_by_brick_rsp rsp = {0,}; @@ -314,7 +314,14 @@ fail: int -gluster_pmap_brickbyport (rpcsvc_request_t *req) +gluster_pmap_portbybrick (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __gluster_pmap_portbybrick); +} + + +int +__gluster_pmap_brickbyport (rpcsvc_request_t *req) { pmap_brick_by_port_req args = {0,}; pmap_brick_by_port_rsp rsp = {0,}; @@ -340,6 +347,14 @@ fail: return 0; } + +int +gluster_pmap_brickbyport (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __gluster_pmap_brickbyport); +} + + static int glusterd_brick_update_signin (glusterd_brickinfo_t *brickinfo, gf_boolean_t value) @@ -350,7 +365,7 @@ glusterd_brick_update_signin (glusterd_brickinfo_t *brickinfo, } int -gluster_pmap_signup (rpcsvc_request_t *req) +__gluster_pmap_signup (rpcsvc_request_t *req) { pmap_signup_req args = {0,}; pmap_signup_rsp rsp = {0,}; @@ -376,7 +391,13 @@ fail: } int -gluster_pmap_signin (rpcsvc_request_t *req) +gluster_pmap_signup (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __gluster_pmap_signup); +} + +int +__gluster_pmap_signin (rpcsvc_request_t *req) { pmap_signin_req args = {0,}; pmap_signin_rsp rsp = {0,}; @@ -407,9 +428,15 @@ fail: } +int +gluster_pmap_signin (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __gluster_pmap_signin); +} + int -gluster_pmap_signout (rpcsvc_request_t *req) +__gluster_pmap_signout (rpcsvc_request_t *req) { pmap_signout_req args = {0,}; pmap_signout_rsp rsp = {0,}; @@ -440,18 +467,19 @@ fail: return 0; } +int +gluster_pmap_signout (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __gluster_pmap_signout); +} + rpcsvc_actor_t gluster_pmap_actors[] = { - [GF_PMAP_NULL] = {"NULL", GF_PMAP_NULL, NULL, NULL, 0}, - [GF_PMAP_PORTBYBRICK] = {"PORTBYBRICK", GF_PMAP_PORTBYBRICK, - gluster_pmap_portbybrick, NULL, 0}, - [GF_PMAP_BRICKBYPORT] = {"BRICKBYPORT", GF_PMAP_BRICKBYPORT, - gluster_pmap_brickbyport, NULL, 0}, - [GF_PMAP_SIGNIN] = {"SIGNIN", GF_PMAP_SIGNIN, - gluster_pmap_signin, NULL, 0}, - [GF_PMAP_SIGNOUT] = {"SIGNOUT", GF_PMAP_SIGNOUT, - gluster_pmap_signout, NULL, 0}, - [GF_PMAP_SIGNUP] = {"SIGNUP", GF_PMAP_SIGNUP, - gluster_pmap_signup, NULL, 0}, + [GF_PMAP_NULL] = {"NULL", GF_PMAP_NULL, NULL, NULL, 0, DRC_NA}, + [GF_PMAP_PORTBYBRICK] = {"PORTBYBRICK", GF_PMAP_PORTBYBRICK, gluster_pmap_portbybrick, NULL, 0, DRC_NA}, + [GF_PMAP_BRICKBYPORT] = {"BRICKBYPORT", GF_PMAP_BRICKBYPORT, gluster_pmap_brickbyport, NULL, 0, DRC_NA}, + [GF_PMAP_SIGNIN] = {"SIGNIN", GF_PMAP_SIGNIN, gluster_pmap_signin, NULL, 0, DRC_NA}, + [GF_PMAP_SIGNOUT] = {"SIGNOUT", GF_PMAP_SIGNOUT, gluster_pmap_signout, NULL, 0, DRC_NA}, + [GF_PMAP_SIGNUP] = {"SIGNUP", GF_PMAP_SIGNUP, gluster_pmap_signup, NULL, 0, DRC_NA}, }; diff --git a/xlators/mgmt/glusterd/src/glusterd-quota.c b/xlators/mgmt/glusterd/src/glusterd-quota.c index e049982a2..318267199 100644 --- a/xlators/mgmt/glusterd/src/glusterd-quota.c +++ b/xlators/mgmt/glusterd/src/glusterd-quota.c @@ -25,7 +25,7 @@ #include <sys/wait.h> int -glusterd_handle_quota (rpcsvc_request_t *req) +__glusterd_handle_quota (rpcsvc_request_t *req) { int32_t ret = -1; gf_cli_req cli_req = {{0,}}; @@ -112,6 +112,12 @@ out: return ret; } +int +glusterd_handle_quota (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_quota); +} + int32_t glusterd_check_if_quota_trans_enabled (glusterd_volinfo_t *volinfo) { @@ -257,7 +263,9 @@ glusterd_quota_initiate_fs_crawl (glusterd_conf_t *priv, char *volname) "-l", DEFAULT_LOG_FILE_DIRECTORY"/quota-crawl.log", mountdir, NULL); + synclock_unlock (&priv->big_lock); ret = runner_run_reuse (&runner); + synclock_lock (&priv->big_lock); if (ret == -1) { runner_log (&runner, "glusterd", GF_LOG_DEBUG, "command failed"); runner_end (&runner); diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index 271de6111..b7b974c68 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -42,12 +42,27 @@ glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe); int glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr, - size_t len) + size_t len, glusterd_op_t op) { - int ret = -1; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + /* Check only if operation is not remove-brick */ + if ((GD_OP_REMOVE_BRICK != op) && + !gd_is_remove_brick_committed (volinfo)) { + gf_log (this->name, GF_LOG_DEBUG, "A remove-brick task on " + "volume %s is not yet committed", volinfo->volname); + snprintf (op_errstr, len, "A remove-brick task on volume %s is" + " not yet committed. Either commit or stop the " + "remove-brick task.", volinfo->volname); + goto out; + } if (glusterd_is_defrag_on (volinfo)) { - gf_log ("glusterd", GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "rebalance on volume %s already started", volinfo->volname); snprintf (op_errstr, len, "Rebalance on %s is already started", @@ -57,7 +72,7 @@ glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr, if (glusterd_is_rb_started (volinfo) || glusterd_is_rb_paused (volinfo)) { - gf_log ("glusterd", GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "Rebalance failed as replace brick is in progress on volume %s", volinfo->volname); snprintf (op_errstr, len, "Rebalance failed as replace brick is in progress on " @@ -66,13 +81,14 @@ glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr, } ret = 0; out: - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } + int32_t -glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, - rpc_clnt_event_t event, void *data) +__glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) { glusterd_volinfo_t *volinfo = NULL; glusterd_defrag_info_t *defrag = NULL; @@ -130,8 +146,6 @@ glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, GF_DEFRAG_STATUS_STARTED) { volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_FAILED; - } else { - volinfo->rebal.defrag_cmd = 0; } } @@ -160,6 +174,14 @@ glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, return ret; } +int32_t +glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) +{ + return glusterd_big_locked_notify (rpc, mydata, event, + data, __glusterd_defrag_notify); +} + int glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, size_t len, int cmd, defrag_cbk_fn_t cbk, @@ -174,15 +196,14 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, char pidfile[PATH_MAX] = {0,}; char logfile[PATH_MAX] = {0,}; dict_t *options = NULL; -#ifdef DEBUG char valgrind_logfile[PATH_MAX] = {0,}; -#endif + priv = THIS->private; GF_ASSERT (volinfo); GF_ASSERT (op_errstr); - ret = glusterd_defrag_start_validate (volinfo, op_errstr, len); + ret = glusterd_defrag_start_validate (volinfo, op_errstr, len, op); if (ret) goto out; if (!volinfo->rebal.defrag) @@ -196,6 +217,7 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, defrag->cmd = cmd; + volinfo->rebal.defrag_cmd = cmd; volinfo->rebal.op = op; LOCK_INIT (&defrag->lock); @@ -218,7 +240,7 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, snprintf (logfile, PATH_MAX, "%s/%s-rebalance.log", DEFAULT_LOG_FILE_DIRECTORY, volinfo->volname); runinit (&runner); -#ifdef DEBUG + if (priv->valgrind) { snprintf (valgrind_logfile, PATH_MAX, "%s/valgrind-%s-rebalance.log", @@ -226,10 +248,10 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, volinfo->volname); runner_add_args (&runner, "valgrind", "--leak-check=full", - "--trace-children=yes", NULL); + "--trace-children=yes", "--track-origins=yes", + NULL); runner_argprintf (&runner, "--log-file=%s", valgrind_logfile); } -#endif runner_add_args (&runner, SBIN_DIR"/glusterfs", "-s", "localhost", "--volfile-id", volinfo->volname, @@ -241,6 +263,7 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, "*replicate*.metadata-self-heal=off", "--xlator-option", "*replicate*.entry-self-heal=off", "--xlator-option", "*replicate*.readdir-failover=off", + "--xlator-option", "*dht.readdir-optimize=on", NULL); runner_add_arg (&runner, "--xlator-option"); runner_argprintf ( &runner, "*dht.rebalance-cmd=%d",cmd); @@ -268,14 +291,16 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, * default timeout of 30mins used for unreliable network connections is * too long for unix domain socket connections. */ - ret = rpc_clnt_transport_unix_options_build (&options, sockfile, 600); + ret = rpc_transport_unix_options_build (&options, sockfile, 600); if (ret) { gf_log (THIS->name, GF_LOG_ERROR, "Unix options build failed"); goto out; } + synclock_unlock (&priv->big_lock); ret = glusterd_rpc_create (&defrag->rpc, options, glusterd_defrag_notify, volinfo); + synclock_lock (&priv->big_lock); if (ret) { gf_log (THIS->name, GF_LOG_ERROR, "RPC create failed"); goto out; @@ -320,14 +345,16 @@ glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo, * default timeout of 30mins used for unreliable network connections is * too long for unix domain socket connections. */ - ret = rpc_clnt_transport_unix_options_build (&options, sockfile, 600); + ret = rpc_transport_unix_options_build (&options, sockfile, 600); if (ret) { gf_log (THIS->name, GF_LOG_ERROR, "Unix options build failed"); goto out; } + synclock_unlock (&priv->big_lock); ret = glusterd_rpc_create (&defrag->rpc, options, glusterd_defrag_notify, volinfo); + synclock_lock (&priv->big_lock); if (ret) { gf_log (THIS->name, GF_LOG_ERROR, "RPC create failed"); goto out; @@ -376,7 +403,7 @@ out: } int -glusterd_handle_defrag_volume (rpcsvc_request_t *req) +__glusterd_handle_defrag_volume (rpcsvc_request_t *req) { int32_t ret = -1; gf_cli_req cli_req = {{0,}}; @@ -461,6 +488,12 @@ out: return 0; } +int +glusterd_handle_defrag_volume (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_defrag_volume); +} + int glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr) @@ -499,7 +532,7 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr) case GF_DEFRAG_CMD_START: case GF_DEFRAG_CMD_START_LAYOUT_FIX: case GF_DEFRAG_CMD_START_FORCE: - if (is_origin_glusterd ()) { + if (is_origin_glusterd (dict)) { op_ctx = glusterd_op_get_ctx (); if (!op_ctx) { ret = -1; @@ -525,8 +558,9 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr) ret = 0; } } - ret = glusterd_defrag_start_validate (volinfo, - msg, sizeof (msg)); + ret = glusterd_defrag_start_validate (volinfo, msg, + sizeof (msg), + GD_OP_REBALANCE); if (ret) { gf_log (this->name, GF_LOG_DEBUG, "start validate failed"); @@ -629,15 +663,18 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) ret = 0; } else { uuid_parse (task_id_str, volinfo->rebal.rebalance_id) ; + volinfo->rebal.op = GD_OP_REBALANCE; } ret = glusterd_handle_defrag_start (volinfo, msg, sizeof (msg), cmd, NULL, GD_OP_REBALANCE); break; case GF_DEFRAG_CMD_STOP: - /* Clear task-id only on explicitly stopping the - * rebalance process. + /* Clear task-id only on explicitly stopping rebalance. + * Also clear the stored operation, so it doesn't cause trouble + * with future rebalance/remove-brick starts */ uuid_clear (volinfo->rebal.rebalance_id); + volinfo->rebal.op = GD_OP_NONE; /* Fall back to the old volume file in case of decommission*/ list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c index ec69b3637..54b830870 100644 --- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c +++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c @@ -31,6 +31,7 @@ DEFAULT_VAR_RUN_DIRECTORY"/%s-"RB_CLIENT_MOUNTPOINT, \ volinfo->volname); +extern uuid_t global_txn_id; int glusterd_get_replace_op_str (gf1_cli_replace_op op, char *op_str) @@ -70,7 +71,7 @@ out: } int -glusterd_handle_replace_brick (rpcsvc_request_t *req) +__glusterd_handle_replace_brick (rpcsvc_request_t *req) { int32_t ret = -1; gf_cli_req cli_req = {{0,}}; @@ -171,6 +172,13 @@ out: return ret; } +int +glusterd_handle_replace_brick (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_replace_brick); +} + static int glusterd_get_rb_dst_brickinfo (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t **brickinfo) @@ -210,10 +218,10 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, dict_t *ctx = NULL; glusterd_conf_t *priv = NULL; char *savetok = NULL; - char voldir[PATH_MAX] = {0}; char pidfile[PATH_MAX] = {0}; char *task_id_str = NULL; xlator_t *this = NULL; + gf_boolean_t is_force = _gf_false; this = THIS; GF_ASSERT (this); @@ -261,13 +269,6 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, goto out; } - if (volinfo->backend == GD_VOL_BK_BD) { - snprintf (msg, sizeof (msg), "replace brick not supported " - "for Block backend volume"); - *op_errstr = gf_strdup (msg); - goto out; - } - if (GLUSTERD_STATUS_STARTED != volinfo->status) { ret = -1; snprintf (msg, sizeof (msg), "volume: %s is not started", @@ -325,7 +326,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, ret = -1; goto out; } - if (is_origin_glusterd ()) { + if (is_origin_glusterd (dict)) { if (!ctx) { ret = -1; gf_log (this->name, GF_LOG_ERROR, @@ -350,6 +351,8 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, ret = 0; } } + is_force = dict_get_str_boolean (dict, "force", _gf_false); + break; case GF_REPLACE_OP_PAUSE: @@ -384,7 +387,9 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, } break; - case GF_REPLACE_OP_COMMIT_FORCE: break; + case GF_REPLACE_OP_COMMIT_FORCE: + is_force = _gf_true; + break; case GF_REPLACE_OP_STATUS: @@ -430,7 +435,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, } } - if (glusterd_is_local_addr (src_brickinfo->hostname)) { + if (gf_is_local_addr (src_brickinfo->hostname)) { gf_log (this->name, GF_LOG_DEBUG, "I AM THE SOURCE HOST"); if (src_brickinfo->port && rsp_dict) { @@ -443,10 +448,8 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, } } - GLUSTERD_GET_VOLUME_DIR (voldir, volinfo, priv); - GLUSTERD_GET_BRICK_PIDFILE (pidfile, voldir, - src_brickinfo->hostname, - src_brickinfo->path); + GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, src_brickinfo, + priv); if ((replace_op != GF_REPLACE_OP_COMMIT_FORCE) && !glusterd_is_service_running (pidfile, NULL)) { snprintf(msg, sizeof(msg), "Source brick %s:%s " @@ -509,15 +512,15 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, } if (!glusterd_is_rb_ongoing (volinfo) && - glusterd_is_local_addr (host)) { - ret = glusterd_brick_create_path (host, path, + gf_is_local_addr (host)) { + ret = glusterd_validate_and_create_brickpath (dst_brickinfo, volinfo->volume_id, - op_errstr); + op_errstr, is_force); if (ret) goto out; } - if (!glusterd_is_local_addr (host)) { + if (!gf_is_local_addr (host)) { ret = glusterd_friend_find (NULL, host, &peerinfo); if (ret) { snprintf (msg, sizeof (msg), "%s, is not a friend", @@ -544,7 +547,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, } if (replace_op == GF_REPLACE_OP_START && - glusterd_is_local_addr (volinfo->rep_brick.dst_brick->hostname)) { + gf_is_local_addr (volinfo->rep_brick.dst_brick->hostname)) { port = pmap_registry_alloc (THIS); if (!port) { gf_log (THIS->name, GF_LOG_CRITICAL, @@ -685,7 +688,7 @@ rb_src_brick_restart (glusterd_volinfo_t *volinfo, sleep (2); ret = glusterd_volume_start_glusterfs (volinfo, src_brickinfo, - _gf_false); + _gf_false); if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to start " "glusterfs, ret: %d", ret); @@ -748,7 +751,7 @@ rb_spawn_dst_brick (glusterd_volinfo_t *volinfo, if (volinfo->memory_accounting) runner_add_arg (&runner, "--mem-accounting"); - ret = runner_run (&runner); + ret = runner_run_nowait (&runner); if (ret) { pmap_registry_remove (THIS, 0, brickinfo->path, GF_PMAP_PORT_BRICKSERVER, NULL); @@ -1405,7 +1408,7 @@ rb_update_srcbrick_port (glusterd_brickinfo_t *src_brickinfo, dict_t *rsp_dict, if (src_port) src_brickinfo->port = src_port; - if (glusterd_is_local_addr (src_brickinfo->hostname)) { + if (gf_is_local_addr (src_brickinfo->hostname)) { gf_log ("", GF_LOG_INFO, "adding src-brick port no"); @@ -1459,7 +1462,7 @@ rb_update_dstbrick_port (glusterd_brickinfo_t *dst_brickinfo, dict_t *rsp_dict, dst_brickinfo->port = dst_port; - if (glusterd_is_local_addr (dst_brickinfo->hostname)) { + if (gf_is_local_addr (dst_brickinfo->hostname)) { gf_log ("", GF_LOG_INFO, "adding dst-brick port no"); @@ -1629,7 +1632,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) /* Set task-id, if available, in op_ctx dict for operations * other than start */ - if (is_origin_glusterd ()) { + if (is_origin_glusterd (dict)) { ctx = glusterd_op_get_ctx(); if (!ctx) { gf_log (this->name, GF_LOG_ERROR, "Failed to " @@ -1667,7 +1670,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) uuid_parse (task_id_str, volinfo->rep_brick.rb_id); } - if (glusterd_is_local_addr (dst_brickinfo->hostname)) { + if (gf_is_local_addr (dst_brickinfo->hostname)) { gf_log (this->name, GF_LOG_INFO, "I AM THE DESTINATION HOST"); if (!glusterd_is_rb_paused (volinfo)) { @@ -1687,7 +1690,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) } - if (glusterd_is_local_addr (src_brickinfo->hostname)) { + if (gf_is_local_addr (src_brickinfo->hostname)) { ret = rb_src_brick_restart (volinfo, src_brickinfo, 1); if (ret) { @@ -1697,7 +1700,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) } } - if (glusterd_is_local_addr (dst_brickinfo->hostname)) { + if (gf_is_local_addr (dst_brickinfo->hostname)) { gf_log (this->name, GF_LOG_INFO, "adding dst-brick port no"); @@ -1728,7 +1731,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) /* fall through */ case GF_REPLACE_OP_COMMIT_FORCE: { - if (glusterd_is_local_addr (dst_brickinfo->hostname)) { + if (gf_is_local_addr (dst_brickinfo->hostname)) { gf_log (this->name, GF_LOG_DEBUG, "I AM THE DESTINATION HOST"); ret = rb_kill_destination_brick (volinfo, @@ -1809,7 +1812,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) } } - if (glusterd_is_local_addr (src_brickinfo->hostname)) { + if (gf_is_local_addr (src_brickinfo->hostname)) { ret = rb_src_brick_restart (volinfo, src_brickinfo, 0); if (ret) { @@ -1820,7 +1823,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) } } - if (glusterd_is_local_addr (dst_brickinfo->hostname)) { + if (gf_is_local_addr (dst_brickinfo->hostname)) { gf_log (this->name, GF_LOG_INFO, "I AM THE DESTINATION HOST"); ret = rb_kill_destination_brick (volinfo, dst_brickinfo); @@ -1892,6 +1895,7 @@ glusterd_do_replace_brick (void *data) glusterd_brickinfo_t *src_brickinfo = NULL; glusterd_brickinfo_t *dst_brickinfo = NULL; glusterd_conf_t *priv = NULL; + uuid_t *txn_id = &global_txn_id; int ret = 0; @@ -1911,6 +1915,10 @@ glusterd_do_replace_brick (void *data) gf_log ("", GF_LOG_DEBUG, "Replace brick operation detected"); + ret = dict_get_bin (dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + ret = dict_get_int32 (dict, "operation", &op); if (ret) { gf_log ("", GF_LOG_DEBUG, @@ -2006,9 +2014,11 @@ glusterd_do_replace_brick (void *data) out: if (ret) - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, + txn_id, NULL); else - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_ACC, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_ACC, + txn_id, NULL); glusterd_op_sm (); } diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c index 124972857..d5200a4ae 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c @@ -33,6 +33,7 @@ extern glusterd_op_info_t opinfo; +extern uuid_t global_txn_id; int32_t glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret, @@ -84,6 +85,7 @@ glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret, } break; } + case GD_OP_GSYNC_CREATE: case GD_OP_GSYNC_SET: { if (ctx) { @@ -141,11 +143,26 @@ glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret, case GD_OP_LIST_VOLUME: case GD_OP_CLEARLOCKS_VOLUME: case GD_OP_HEAL_VOLUME: - case GD_OP_BD_OP: + case GD_OP_SNAP: { /*nothing specific to be done*/ break; } + case GD_OP_COPY_FILE: + { + if (ctx) + ret = dict_get_str (ctx, "errstr", &errstr); + break; + } + case GD_OP_SYS_EXEC: + { + if (ctx) { + ret = dict_get_str (ctx, "errstr", &errstr); + ret = dict_set_str (ctx, "glusterd_workdir", + conf->workdir); + } + break; + } } rsp.op_ret = op_ret; @@ -185,7 +202,21 @@ glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret, } int -glusterd_probe_cbk (struct rpc_req *req, struct iovec *iov, +glusterd_big_locked_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe, fop_cbk_fn_t fn) +{ + glusterd_conf_t *priv = THIS->private; + int ret = -1; + + synclock_lock (&priv->big_lock); + ret = fn (req, iov, count, myframe); + synclock_unlock (&priv->big_lock); + + return ret; +} + +int +__glusterd_probe_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { gd1_mgmt_probe_rsp rsp = {{0},}; @@ -219,7 +250,8 @@ glusterd_probe_cbk (struct rpc_req *req, struct iovec *iov, glusterd_xfer_cli_probe_resp (ctx->req, rsp.op_ret, rsp.op_errno, rsp.op_errstr, - ctx->hostname, ctx->port); + ctx->hostname, ctx->port, + ctx->dict); } glusterd_destroy_probe_ctx (ctx); @@ -247,7 +279,8 @@ glusterd_probe_cbk (struct rpc_req *req, struct iovec *iov, glusterd_xfer_cli_probe_resp (ctx->req, rsp.op_ret, rsp.op_errno, rsp.op_errstr, - ctx->hostname, ctx->port); + ctx->hostname, ctx->port, + ctx->dict); } glusterd_destroy_probe_ctx (ctx); @@ -287,7 +320,16 @@ out: } int -glusterd_friend_add_cbk (struct rpc_req * req, struct iovec *iov, +glusterd_probe_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + __glusterd_probe_cbk); +} + + +int +__glusterd_friend_add_cbk (struct rpc_req * req, struct iovec *iov, int count, void *myframe) { gd1_mgmt_friend_rsp rsp = {{0},}; @@ -367,7 +409,7 @@ out: if (ctx->req)//reverse probe doesn't have req ret = glusterd_xfer_cli_probe_resp (ctx->req, op_ret, op_errno, NULL, ctx->hostname, - ctx->port); + ctx->port, ctx->dict); if (!ret) { glusterd_friend_sm (); glusterd_op_sm (); @@ -380,7 +422,15 @@ out: } int -glusterd_friend_remove_cbk (struct rpc_req * req, struct iovec *iov, +glusterd_friend_add_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + __glusterd_friend_add_cbk); +} + +int +__glusterd_friend_remove_cbk (struct rpc_req * req, struct iovec *iov, int count, void *myframe) { gd1_mgmt_friend_rsp rsp = {{0},}; @@ -457,7 +507,7 @@ inject: respond: ret = glusterd_xfer_cli_deprobe_resp (ctx->req, op_ret, op_errno, NULL, - ctx->hostname); + ctx->hostname, ctx->dict); if (!ret && move_sm_now) { glusterd_friend_sm (); glusterd_op_sm (); @@ -473,8 +523,16 @@ respond: return ret; } +int +glusterd_friend_remove_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + __glusterd_friend_remove_cbk); +} + int32_t -glusterd_friend_update_cbk (struct rpc_req *req, struct iovec *iov, +__glusterd_friend_update_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { int ret = -1; @@ -506,8 +564,16 @@ out: return ret; } +int +glusterd_friend_update_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + __glusterd_friend_update_cbk); +} + int32_t -glusterd_cluster_lock_cbk (struct rpc_req *req, struct iovec *iov, +__glusterd_cluster_lock_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { gd1_mgmt_cluster_lock_rsp rsp = {{0},}; @@ -516,6 +582,7 @@ glusterd_cluster_lock_cbk (struct rpc_req *req, struct iovec *iov, glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; glusterd_peerinfo_t *peerinfo = NULL; xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; this = THIS; GF_ASSERT (this); @@ -560,7 +627,7 @@ out: event_type = GD_OP_EVENT_RCVD_ACC; } - ret = glusterd_op_sm_inject_event (event_type, NULL); + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); if (!ret) { glusterd_friend_sm (); @@ -572,7 +639,173 @@ out: } int32_t -glusterd_cluster_unlock_cbk (struct rpc_req *req, struct iovec *iov, +glusterd_cluster_lock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + __glusterd_cluster_lock_cbk); +} + +static int32_t +glusterd_mgmt_v3_lock_peers_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + gd1_mgmt_v3_lock_rsp rsp = {{0},}; + int ret = -1; + int32_t op_ret = -1; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + uuid_t *txn_id = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + if (-1 == req->rpc_status) { + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to decode mgmt_v3 lock " + "response received from peer"); + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + +out: + op_ret = rsp.op_ret; + + txn_id = &rsp.txn_id; + + gf_log (this->name, (op_ret) ? GF_LOG_ERROR : GF_LOG_DEBUG, + "Received mgmt_v3 lock %s from uuid: %s", + (op_ret) ? "RJT" : "ACC", uuid_utoa (rsp.uuid)); + + ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "mgmt_v3 lock response received " + "from unknown peer: %s", uuid_utoa (rsp.uuid)); + } + + if (op_ret) { + event_type = GD_OP_EVENT_RCVD_RJT; + opinfo.op_ret = op_ret; + opinfo.op_errstr = gf_strdup ("Another transaction could be in " + "progress. Please try again after" + " sometime."); + } else { + event_type = GD_OP_EVENT_RCVD_ACC; + } + + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); + + if (!ret) { + glusterd_friend_sm (); + glusterd_op_sm (); + } + + GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); + return ret; +} + +int32_t +glusterd_mgmt_v3_lock_peers_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + glusterd_mgmt_v3_lock_peers_cbk_fn); +} + +static int32_t +glusterd_mgmt_v3_unlock_peers_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + gd1_mgmt_v3_unlock_rsp rsp = {{0},}; + int ret = -1; + int32_t op_ret = -1; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + uuid_t *txn_id = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + if (-1 == req->rpc_status) { + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to decode mgmt_v3 unlock " + "response received from peer"); + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + +out: + op_ret = rsp.op_ret; + + txn_id = &rsp.txn_id; + + gf_log (this->name, (op_ret) ? GF_LOG_ERROR : GF_LOG_DEBUG, + "Received mgmt_v3 unlock %s from uuid: %s", + (op_ret) ? "RJT" : "ACC", + uuid_utoa (rsp.uuid)); + + ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); + + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "mgmt_v3 unlock response received " + "from unknown peer: %s", uuid_utoa (rsp.uuid)); + } + + if (op_ret) { + event_type = GD_OP_EVENT_RCVD_RJT; + opinfo.op_ret = op_ret; + opinfo.op_errstr = gf_strdup ("Another transaction could be in " + "progress. Please try again after" + " sometime."); + } else { + event_type = GD_OP_EVENT_RCVD_ACC; + } + + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); + + if (!ret) { + glusterd_friend_sm (); + glusterd_op_sm (); + } + + GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); + return ret; +} + +int32_t +glusterd_mgmt_v3_unlock_peers_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + glusterd_mgmt_v3_unlock_peers_cbk_fn); +} + +int32_t +__glusterd_cluster_unlock_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { gd1_mgmt_cluster_lock_rsp rsp = {{0},}; @@ -581,6 +814,7 @@ glusterd_cluster_unlock_cbk (struct rpc_req *req, struct iovec *iov, glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; glusterd_peerinfo_t *peerinfo = NULL; xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; this = THIS; GF_ASSERT (this); @@ -622,7 +856,7 @@ out: event_type = GD_OP_EVENT_RCVD_ACC; } - ret = glusterd_op_sm_inject_event (event_type, NULL); + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); if (!ret) { glusterd_friend_sm (); @@ -634,7 +868,15 @@ out: } int32_t -glusterd_stage_op_cbk (struct rpc_req *req, struct iovec *iov, +glusterd_cluster_unlock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + __glusterd_cluster_unlock_cbk); +} + +int32_t +__glusterd_stage_op_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { gd1_mgmt_stage_op_rsp rsp = {{0},}; @@ -646,6 +888,7 @@ glusterd_stage_op_cbk (struct rpc_req *req, struct iovec *iov, char err_str[2048] = {0}; char *peer_str = NULL; xlator_t *this = NULL; + uuid_t *txn_id = NULL; this = THIS; GF_ASSERT (this); @@ -698,6 +941,10 @@ out: "Received stage %s from uuid: %s", (op_ret) ? "RJT" : "ACC", uuid_utoa (rsp.uuid)); + ret = dict_get_bin (dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); if (ret) { @@ -733,7 +980,7 @@ out: break; } - ret = glusterd_op_sm_inject_event (event_type, NULL); + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); if (!ret) { glusterd_friend_sm (); @@ -753,7 +1000,15 @@ out: } int32_t -glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov, +glusterd_stage_op_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + __glusterd_stage_op_cbk); +} + +int32_t +__glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { gd1_mgmt_commit_op_rsp rsp = {{0},}; @@ -765,6 +1020,8 @@ glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov, char err_str[2048] = {0}; char *peer_str = NULL; xlator_t *this = NULL; + uuid_t *txn_id = NULL; + this = THIS; GF_ASSERT (this); @@ -818,6 +1075,10 @@ glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov, "Received commit %s from uuid: %s", (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid)); + ret = dict_get_bin (dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); if (ret) { @@ -897,7 +1158,7 @@ glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov, } out: - ret = glusterd_op_sm_inject_event (event_type, NULL); + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); if (!ret) { glusterd_friend_sm (); @@ -911,7 +1172,13 @@ out: return ret; } - +int32_t +glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + __glusterd_commit_op_cbk); +} int32_t glusterd_rpc_probe (call_frame_t *frame, xlator_t *this, @@ -1128,6 +1395,134 @@ out: } int32_t +glusterd_mgmt_v3_lock_peers (call_frame_t *frame, xlator_t *this, + void *data) +{ + gd1_mgmt_v3_lock_req req = {{0},}; + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + call_frame_t *dummy_frame = NULL; + dict_t *dict = NULL; + uuid_t *txn_id = NULL; + + if (!this) + goto out; + + dict = data; + + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo)); + if (ret) + goto out; + + //peerinfo should not be in payload + dict_del (dict, "peerinfo"); + + glusterd_get_uuid (&req.uuid); + + ret = dict_allocate_and_serialize (dict, &req.dict.dict_val, + &req.dict.dict_len); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to serialize dict " + "to request buffer"); + goto out; + } + + /* Sending valid transaction ID to peers */ + ret = dict_get_bin (dict, "transaction_id", + (void **)&txn_id); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get transaction id."); + goto out; + } else { + gf_log (this->name, GF_LOG_DEBUG, + "Transaction_id = %s", uuid_utoa (*txn_id)); + uuid_copy (req.txn_id, *txn_id); + } + + dummy_frame = create_frame (this, this->ctx->pool); + if (!dummy_frame) + goto out; + + ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame, + peerinfo->mgmt_v3, + GLUSTERD_MGMT_V3_LOCK, NULL, + this, glusterd_mgmt_v3_lock_peers_cbk, + (xdrproc_t)xdr_gd1_mgmt_v3_lock_req); +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_mgmt_v3_unlock_peers (call_frame_t *frame, xlator_t *this, + void *data) +{ + gd1_mgmt_v3_unlock_req req = {{0},}; + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + call_frame_t *dummy_frame = NULL; + dict_t *dict = NULL; + uuid_t *txn_id = NULL; + + if (!this) + goto out; + + dict = data; + + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo)); + if (ret) + goto out; + + //peerinfo should not be in payload + dict_del (dict, "peerinfo"); + + glusterd_get_uuid (&req.uuid); + + ret = dict_allocate_and_serialize (dict, &req.dict.dict_val, + &req.dict.dict_len); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to serialize dict " + "to request buffer"); + goto out; + } + + /* Sending valid transaction ID to peers */ + ret = dict_get_bin (dict, "transaction_id", + (void **)&txn_id); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get transaction id."); + goto out; + } else { + gf_log (this->name, GF_LOG_DEBUG, + "Transaction_id = %s", uuid_utoa (*txn_id)); + uuid_copy (req.txn_id, *txn_id); + } + + dummy_frame = create_frame (this, this->ctx->pool); + if (!dummy_frame) + goto out; + + ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame, + peerinfo->mgmt_v3, + GLUSTERD_MGMT_V3_UNLOCK, NULL, + this, glusterd_mgmt_v3_unlock_peers_cbk, + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_req); +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t glusterd_cluster_unlock (call_frame_t *frame, xlator_t *this, void *data) { @@ -1276,7 +1671,7 @@ out: } int32_t -glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov, +__glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { gd1_mgmt_brick_op_rsp rsp = {0}; @@ -1290,6 +1685,7 @@ glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov, glusterd_req_ctx_t *req_ctx = NULL; glusterd_pending_node_t *node = NULL; xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; this = THIS; GF_ASSERT (this); @@ -1352,6 +1748,11 @@ glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov, } } out: + + ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + ev_ctx = GF_CALLOC (1, sizeof (*ev_ctx), gf_gld_mt_brick_rsp_ctx_t); GF_ASSERT (ev_ctx); if (op_ret) { @@ -1364,7 +1765,7 @@ out: ev_ctx->pending_node = frame->cookie; ev_ctx->rsp_dict = dict; ev_ctx->commit_ctx = frame->local; - ret = glusterd_op_sm_inject_event (event_type, ev_ctx); + ret = glusterd_op_sm_inject_event (event_type, txn_id, ev_ctx); if (!ret) { glusterd_friend_sm (); glusterd_op_sm (); @@ -1378,9 +1779,18 @@ out: } int32_t +glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + __glusterd_brick_op_cbk); +} + +int32_t glusterd_brick_op (call_frame_t *frame, xlator_t *this, void *data) { + gd1_mgmt_brick_op_req *req = NULL; int ret = 0; glusterd_conf_t *priv = NULL; @@ -1391,6 +1801,7 @@ glusterd_brick_op (call_frame_t *frame, xlator_t *this, glusterd_req_ctx_t *req_ctx = NULL; struct rpc_clnt *rpc = NULL; dict_t *op_ctx = NULL; + uuid_t *txn_id = &global_txn_id; if (!this) { ret = -1; @@ -1413,6 +1824,11 @@ glusterd_brick_op (call_frame_t *frame, xlator_t *this, goto out; } + + ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + list_for_each_entry (pending_node, &opinfo.pending_bricks, list) { dummy_frame = create_frame (this, this->ctx->pool); if (!dummy_frame) @@ -1490,7 +1906,8 @@ glusterd_brick_op (call_frame_t *frame, xlator_t *this, out: if (ret) { - glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, data); + glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, + txn_id, data); opinfo.op_ret = ret; } gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -1518,6 +1935,12 @@ struct rpc_clnt_procedure gd_mgmt_actors[GLUSTERD_MGMT_MAXVALUE] = { [GLUSTERD_MGMT_COMMIT_OP] = {"COMMIT_OP", glusterd_commit_op}, }; +struct rpc_clnt_procedure gd_mgmt_v3_actors[GLUSTERD_MGMT_V3_MAXVALUE] = { + [GLUSTERD_MGMT_V3_NULL] = {"NULL", NULL }, + [GLUSTERD_MGMT_V3_LOCK] = {"MGMT_V3_LOCK", glusterd_mgmt_v3_lock_peers}, + [GLUSTERD_MGMT_V3_UNLOCK] = {"MGMT_V3_UNLOCK", glusterd_mgmt_v3_unlock_peers}, +}; + struct rpc_clnt_program gd_mgmt_prog = { .progname = "glusterd mgmt", .prognum = GD_MGMT_PROGRAM, @@ -1542,3 +1965,10 @@ struct rpc_clnt_program gd_peer_prog = { .numproc = GLUSTERD_FRIEND_MAXVALUE, }; +struct rpc_clnt_program gd_mgmt_v3_prog = { + .progname = "glusterd mgmt v3", + .prognum = GD_MGMT_V3_PROGRAM, + .progver = GD_MGMT_V3_VERSION, + .proctable = gd_mgmt_v3_actors, + .numproc = GLUSTERD_MGMT_V3_MAXVALUE, +}; diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c index 5a38fdfec..c671edf68 100644 --- a/xlators/mgmt/glusterd/src/glusterd-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-sm.c @@ -400,7 +400,8 @@ glusterd_ac_send_friend_remove_req (glusterd_friend_sm_event_t *event, if (ctx) ret = glusterd_xfer_cli_deprobe_resp (ctx->req, ret, 0, NULL, - ctx->hostname); + ctx->hostname, + ctx->dict); glusterd_friend_sm (); glusterd_op_sm (); @@ -650,7 +651,8 @@ glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx) uuid_copy (peerinfo->uuid, ev_ctx->uuid); //Build comparison logic here. - ret = glusterd_compare_friend_data (ev_ctx->vols, &status); + ret = glusterd_compare_friend_data (ev_ctx->vols, &status, + peerinfo->hostname); if (ret) goto out; @@ -687,7 +689,8 @@ glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx) glusterd_friend_sm_inject_event (new_event); ret = glusterd_xfer_friend_add_resp (ev_ctx->req, ev_ctx->hostname, - ev_ctx->port, op_ret, op_errno); + peerinfo->hostname, ev_ctx->port, + op_ret, op_errno); out: gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); @@ -1075,8 +1078,25 @@ glusterd_friend_sm () ret = 0; out: - if (quorum_action) + if (quorum_action) { + /* When glusterd is restarted, it needs to wait until the 'friends' view + * of the volumes settle, before it starts any of the internal daemons. + * + * Every friend that was part of the cluster, would send its + * cluster-view, 'our' way. For every friend, who belongs to + * a partition which has a different cluster-view from our + * partition, we may update our cluster-view. For subsequent + * friends from that partition would agree with us, if the first + * friend wasn't rejected. For every first friend, whom we agreed with, + * we would need to start internal daemons/bricks belonging to the + * new volumes. + * glusterd_spawn_daemons calls functions that are idempotent. ie, + * the functions spawn process(es) only if they are not started yet. + * + * */ + glusterd_spawn_daemons (NULL); glusterd_do_quorum_action (); + } return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.h b/xlators/mgmt/glusterd/src/glusterd-sm.h index 0af45deb6..b9bedbe69 100644 --- a/xlators/mgmt/glusterd/src/glusterd-sm.h +++ b/xlators/mgmt/glusterd/src/glusterd-sm.h @@ -27,14 +27,7 @@ #include "byte-order.h" //#include "glusterd.h" #include "rpcsvc.h" - -struct glusterd_store_handle_ { - char *path; - int fd; - FILE *read; -}; - -typedef struct glusterd_store_handle_ glusterd_store_handle_t; +#include "store.h" typedef enum gd_quorum_contribution_ { QUORUM_NONE, @@ -93,7 +86,9 @@ typedef struct glusterd_sm_tr_log_ { struct glusterd_peerinfo_ { uuid_t uuid; - char uuid_str[50]; + char uuid_str[50]; /* Retrieve this using + * gd_peer_uuid_str () + */ glusterd_peer_state_info_t state; char *hostname; int port; @@ -102,11 +97,13 @@ struct glusterd_peerinfo_ { struct rpc_clnt *rpc; rpc_clnt_prog_t *mgmt; rpc_clnt_prog_t *peer; + rpc_clnt_prog_t *mgmt_v3; int connected; - glusterd_store_handle_t *shandle; + gf_store_handle_t *shandle; glusterd_sm_tr_log_t sm_log; gf_boolean_t quorum_action; gd_quorum_contrib_t quorum_contrib; + gf_boolean_t locked; }; typedef struct glusterd_peerinfo_ glusterd_peerinfo_t; @@ -120,6 +117,7 @@ typedef enum glusterd_ev_gen_mode_ { typedef struct glusterd_peer_ctx_args_ { rpcsvc_request_t *req; glusterd_ev_gen_mode_t mode; + dict_t *dict; } glusterd_peerctx_args_t; typedef struct glusterd_peer_ctx_ { @@ -186,6 +184,7 @@ typedef struct glusterd_probe_ctx_ { char *hostname; rpcsvc_request_t *req; int port; + dict_t *dict; } glusterd_probe_ctx_t; int glusterd_friend_sm_new_event (glusterd_friend_sm_event_type_t event_type, diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c new file mode 100644 index 000000000..9b811cd05 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c @@ -0,0 +1,5590 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <inttypes.h> +#include <sys/types.h> +#include <unistd.h> +#include <sys/resource.h> +#include <sys/statvfs.h> +#include <sys/mount.h> + +#include "globals.h" +#include "compat.h" +#include "protocol-common.h" +#include "xlator.h" +#include "logging.h" +#include "timer.h" +#include "glusterd-mem-types.h" +#include "glusterd.h" +#include "glusterd-sm.h" +#include "glusterd-op-sm.h" +#include "glusterd-utils.h" +#include "glusterd-store.h" +#include "run.h" +#include "glusterd-volgen.h" +#include "glusterd-mgmt.h" +#include "glusterd-syncop.h" + +#include "syscall.h" +#include "cli1-xdr.h" +#include "xdr-generic.h" + +#ifdef GF_LINUX_HOST_OS +#include <mntent.h> +#endif + +char snap_mount_folder[PATH_MAX]; + +static int32_t +glusterd_find_missed_snap (dict_t *rsp_dict, glusterd_volinfo_t *vol, + char *snap_uuid, struct list_head *peers, + int32_t op); + +/* This function will restore a snapshot volumes + * + * @param dict dictionary containing snapshot restore request + * @param op_errstr In case of any failure error message will be returned + * in this variable + * @return Negative value on Failure and 0 in success + */ +int +glusterd_snapshot_restore (dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + int ret = -1; + char *volname = NULL; + char *snapname = NULL; + xlator_t *this = NULL; + glusterd_volinfo_t *snap_volinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "snap name"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (NULL == snap) { + ret = gf_asprintf (op_errstr, "Snap (%s) not found", + snapname); + if (ret < 0) { + goto out; + } + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); + ret = -1; + goto out; + } + + /* TODO : As of now there is only volume in snapshot. + * Change this when multiple volume snapshot is introduced + */ + snap_volinfo = list_entry (snap->volumes.next, glusterd_volinfo_t, + vol_list); + + ret = glusterd_volinfo_find (snap_volinfo->parent_volname, &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not get volinfo of " + "%s", snap_volinfo->parent_volname); + goto out; + } + + if (is_origin_glusterd (dict) == _gf_true) { + /* From origin glusterd check if * + * any peers with snap bricks is down */ + ret = glusterd_find_missed_snap (rsp_dict, snap_volinfo, + snap_volinfo->volname, + &priv->peers, + GF_SNAP_OPTION_TYPE_RESTORE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to find missed snap restores"); + goto out; + } + } + + ret = gd_restore_snap_volume (rsp_dict, volinfo, snap_volinfo); + if (ret) { + /* No need to update op_errstr because it is assumed + * that the called function will do that in case of + * failure. + */ + gf_log (this->name, GF_LOG_ERROR, "Failed to restore " + "snap for %s volume", volname); + goto out; + } + + ret = 0; + + /* TODO: Need to check if we need to delete the snap after the + * operation is successful or not. Also need to persist the state + * of restore operation in the store. + */ +out: + return ret; +} + +/* This function is called before actual restore is taken place. This function + * will validate whether the snapshot volumes are ready to be restored or not. + * + * @param dict dictionary containing snapshot restore request + * @param op_errstr In case of any failure error message will be returned + * in this variable + * @param rsp_dict response dictionary + * @return Negative value on Failure and 0 in success + */ +int +glusterd_snapshot_restore_prevalidate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int ret = -1; + int32_t i = 0; + int32_t volcount = 0; + gf_boolean_t snap_restored = _gf_false; + char key[PATH_MAX] = {0, }; + char *volname = NULL; + char *snapname = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "snap name"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (NULL == snap) { + ret = gf_asprintf (op_errstr, "Snap (%s) not found", + snapname); + if (ret < 0) { + goto out; + } + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); + ret = -1; + goto out; + } + + snap_restored = snap->snap_restored; + + if (snap_restored) { + ret = gf_asprintf (op_errstr, "Snap (%s) is already " + "restored", snapname); + if (ret < 0) { + goto out; + } + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); + ret = -1; + goto out; + } + + ret = dict_set_str (rsp_dict, "snapname", snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "snap name"); + goto out; + } + + ret = dict_get_int32 (dict, "volcount", &volcount); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get volume count"); + goto out; + } + + /* Snapshot restore will only work if all the volumes, + that are part of the snapshot, are stopped. */ + for (i = 1; i <= volcount; ++i) { + snprintf (key, sizeof (key), "volname%d", i); + ret = dict_get_str (dict, key, &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "get volume name"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + ret = gf_asprintf (op_errstr, "Volume (%s) not found", + volname); + if (ret < 0) { + goto out; + } + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); + ret = -1; + goto out; + } + + if (glusterd_is_volume_started (volinfo)) { + ret = gf_asprintf (op_errstr, "Volume (%s) has been " + "started. Volume needs to be stopped before restoring " + "a snapshot.", volname); + if (ret < 0) { + goto out; + } + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); + ret = -1; + goto out; + } + } + + ret = 0; +out: + return ret; +} + +int +snap_max_hard_limits_validate (dict_t *dict, char *volname, + uint64_t value, char **op_errstr) +{ + char err_str[PATH_MAX] = ""; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + int ret = -1; + uint64_t max_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + conf = this->private; + + GF_ASSERT (conf); + + if (volname) { + ret = glusterd_volinfo_find (volname, &volinfo); + if (!ret) { + if (volinfo->is_snap_volume) { + ret = -1; + snprintf (err_str, PATH_MAX, + "%s is a snap volume. Configuring " + "snap-max-hard-limit for a snap " + "volume is prohibited.", volname); + goto out; + } + } + } + + if (value) { + /* Max limit for the system is GLUSTERD_SNAPS_MAX_HARD_LIMIT + * but max limit for a volume is conf->snap_max_hard_limit. + */ + if (volname) { + max_limit = conf->snap_max_hard_limit; + } else { + max_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + } + } + + if ((value < 0) || (value > max_limit)) { + ret = -1; + snprintf (err_str, PATH_MAX, "Invalid snap-max-hard-limit" + "%"PRIu64 ". Expected range 0 - %"PRIu64, + value, max_limit); + goto out; + } + + ret = 0; +out: + if (ret) { + *op_errstr = gf_strdup (err_str); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + } + return ret; +} + +int +glusterd_snapshot_config_prevalidate (dict_t *dict, char **op_errstr) +{ + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + int ret = -1; + int config_command = 0; + char err_str[PATH_MAX] = {0,}; + glusterd_conf_t *conf = NULL; + uint64_t value = 0; + uint64_t hard_limit = 0; + uint64_t soft_limit = 0; + gf_loglevel_t loglevel = GF_LOG_ERROR; + uint64_t max_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + conf = this->private; + + GF_ASSERT (conf); + + ret = dict_get_int32 (dict, "config-command", &config_command); + if (ret) { + snprintf (err_str, sizeof (err_str), + "failed to get config-command type"); + goto out; + } + + ret = dict_get_uint64 (dict, "snap-max-hard-limit", &hard_limit); + + ret = dict_get_uint64 (dict, "snap-max-soft-limit", &soft_limit); + + ret = dict_get_str (dict, "volname", &volname); + + if (volname) { + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, sizeof (err_str), + "Volume %s does not exist.", volname); + goto out; + } + } + + switch (config_command) { + case GF_SNAP_CONFIG_TYPE_SET: + if (hard_limit) { + /* Validations for snap-max-hard-limits */ + ret = snap_max_hard_limits_validate (dict, volname, + hard_limit, op_errstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "snap-max-hard-limit validation " + "failed."); + goto out; + } + } + + if (soft_limit) { + max_limit = GLUSTERD_SNAPS_MAX_SOFT_LIMIT_PERCENT; + if ((soft_limit < 0) || (soft_limit > max_limit)) { + ret = -1; + snprintf (err_str, PATH_MAX, "Invalid " + "snap-max-soft-limit ""%" + PRIu64 ". Expected range 0 - %"PRIu64, + value, max_limit); + goto out; + } + break; + } + default: + break; + } + + ret = 0; +out: + + if (ret && err_str[0] != '\0') { + gf_log (this->name, loglevel, "%s", err_str); + *op_errstr = gf_strdup (err_str); + } + + return ret; +} + +int +glusterd_snap_create_pre_val_use_rsp_dict (dict_t *dst, dict_t *src) +{ + char *snap_brick_dir = NULL; + char *snap_device = NULL; + char *tmpstr = NULL; + char key[PATH_MAX] = ""; + char snapbrckcnt[PATH_MAX] = ""; + char snapbrckord[PATH_MAX] = ""; + int ret = -1; + int64_t i = -1; + int64_t j = -1; + int64_t volume_count = 0; + int64_t brick_count = 0; + int64_t brick_order = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dst); + GF_ASSERT (src); + + ret = dict_get_int64 (src, "volcount", &volume_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "get the volume count"); + goto out; + } + + for (i = 0; i < volume_count; i++) { + memset (snapbrckcnt, '\0', sizeof(snapbrckcnt)); + ret = snprintf (snapbrckcnt, sizeof(snapbrckcnt) - 1, + "vol%ld_brickcount", i+1); + ret = dict_get_int64 (src, snapbrckcnt, &brick_count); + if (ret) { + gf_log (this->name, GF_LOG_TRACE, + "No bricks for this volume in this dict"); + continue; + } + + for (j = 0; j < brick_count; j++) { + /* Fetching data from source dict */ + snprintf (key, sizeof(key) - 1, + "vol%ld.brickdir%ld", i+1, j); + + ret = dict_get_ptr (src, key, + (void **)&snap_brick_dir); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Unable to fetch %s", key); + continue; + } + + snprintf (key, sizeof(key) - 1, + "vol%ld.brick_snapdevice%ld", i+1, j); + + ret = dict_get_ptr (src, key, + (void **)&snap_device); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch snap_device"); + goto out; + } + + snprintf (snapbrckord, sizeof(snapbrckord) - 1, + "vol%ld.brick%ld.order", i+1, j); + + ret = dict_get_int64 (src, snapbrckord, &brick_order); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get brick order"); + goto out; + } + + /* Adding the data in the dst dict */ + snprintf (key, sizeof(key) - 1, + "vol%ld.brickdir%ld", i+1, brick_order); + + tmpstr = gf_strdup (snap_brick_dir); + if (!tmpstr) { + gf_log (this->name, GF_LOG_ERROR, + "Out Of Memory"); + ret = -1; + goto out; + } + ret = dict_set_dynstr (dst, key, tmpstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set %s", key); + GF_FREE (tmpstr); + goto out; + } + + snprintf (key, sizeof(key) - 1, + "vol%ld.brick_snapdevice%ld", + i+1, brick_order); + + tmpstr = gf_strdup (snap_device); + if (!tmpstr) { + ret = -1; + goto out; + } + ret = dict_set_dynstr (dst, key, tmpstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set %s", key); + GF_FREE (tmpstr); + goto out; + } + + } + } + + ret = 0; +out: + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_snap_pre_validate_use_rsp_dict (dict_t *dst, dict_t *src) +{ + int ret = -1; + int32_t snap_command = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + if (!dst || !src) { + gf_log (this->name, GF_LOG_ERROR, "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32 (dst, "type", &snap_command); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case GF_SNAP_OPTION_TYPE_CREATE: + ret = glusterd_snap_create_pre_val_use_rsp_dict (dst, src); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to use " + "rsp dict"); + goto out; + } + break; + default: + break; + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_snapshot_create_prevalidate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + char *volname = NULL; + char *snapname = NULL; + char *device = NULL; + char *tmpstr = NULL; + char *brick_dir = NULL; + char snap_brick_dir[PATH_MAX] = ""; + char *mnt_pt = NULL; + char key[PATH_MAX] = ""; + char snap_mount[PATH_MAX] = ""; + char snap_volname[64] = ""; + char err_str[PATH_MAX] = ""; + int ret = -1; + int64_t i = 0; + int64_t volcount = 0; + int64_t brick_count = 0; + int64_t brick_order = 0; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + uuid_t *snap_volid = NULL; + gf_loglevel_t loglevel = GF_LOG_ERROR; + glusterd_conf_t *conf = NULL; + int64_t effective_max_limit = 0; + + this = THIS; + GF_ASSERT (op_errstr); + conf = this->private; + GF_ASSERT (conf); + + ret = dict_get_int64 (dict, "volcount", &volcount); + if (ret) { + snprintf (err_str, sizeof (err_str), "Failed to " + "get the volume count"); + goto out; + } + if (volcount <= 0) { + snprintf (err_str, sizeof (err_str), "Invalid volume count %ld " + "supplied", volcount); + ret = -1; + goto out; + } + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + snprintf (err_str, sizeof (err_str), "Failed to get snapname"); + goto out; + } + + if (glusterd_find_snap_by_name (snapname)) { + ret = -1; + snprintf (err_str, sizeof (err_str), "Snap %s already exists", + snapname); + goto out; + } + + for (i = 1; i <= volcount; i++) { + snprintf (key, sizeof (key), "volname%ld", i); + ret = dict_get_str (dict, key, &volname); + if (ret) { + snprintf (err_str, sizeof (err_str), + "failed to get volume name"); + goto out; + } + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, sizeof (err_str), + "Volume (%s) does not exist ", volname); + goto out; + } + + ret = -1; + if (!glusterd_is_volume_started (volinfo)) { + snprintf (err_str, sizeof (err_str), "volume %s is " + "not started", volinfo->volname); + loglevel = GF_LOG_WARNING; + goto out; + } + if (glusterd_is_defrag_on (volinfo)) { + snprintf (err_str, sizeof (err_str), + "rebalance process is running for the " + "volume %s", volname); + loglevel = GF_LOG_WARNING; + goto out; + } + /* TODO: Also check whether geo replication is running */ + + if (volinfo->is_snap_volume == _gf_true) { + snprintf (err_str, sizeof (err_str), + "Volume %s is a snap volume", volname); + loglevel = GF_LOG_WARNING; + goto out; + } + + if (volinfo->snap_max_hard_limit < conf->snap_max_hard_limit) + effective_max_limit = volinfo->snap_max_hard_limit; + else + effective_max_limit = conf->snap_max_hard_limit; + + if (volinfo->snap_count >= effective_max_limit) { + snprintf (err_str, sizeof (err_str), + "The number of existing snaps has reached " + "the effective maximum limit of %"PRIu64" ," + "for the volume %s", effective_max_limit, + volname); + loglevel = GF_LOG_WARNING; + goto out; + } + + snprintf (key, sizeof(key) - 1, "vol%ld_volid", i); + ret = dict_get_bin (dict, key, (void **)&snap_volid); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch snap_volid"); + goto out; + } + + /* snap volume uuid is used as lvm snapshot name. + This will avoid restrictions on snapshot names + provided by user */ + GLUSTERD_GET_UUID_NOHYPHEN (snap_volname, *snap_volid); + + brick_count = 0; + brick_order = 0; + /* Adding snap bricks mount paths to the dict */ + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (uuid_compare (brickinfo->uuid, MY_UUID)) { + brick_order++; + continue; + } + + if (!glusterd_is_brick_started (brickinfo)) { + gf_log (this->name, GF_LOG_WARNING, + "brick %s:%s is not started", + brickinfo->hostname, + brickinfo->path); + brick_order++; + brick_count++; + continue; + } + + device = glusterd_get_brick_mount_details (brickinfo); + if (!device) { + snprintf (err_str, sizeof (err_str), + "getting device name for the brick " + "%s:%s failed", brickinfo->hostname, + brickinfo->path); + ret = -1; + goto out; + } + + device = glusterd_build_snap_device_path (device, + snap_volname); + if (!device) { + snprintf (err_str, sizeof (err_str), + "cannot copy the snapshot device " + "name (volname: %s, snapname: %s)", + volinfo->volname, snapname); + loglevel = GF_LOG_WARNING; + ret = -1; + goto out; + } + + snprintf (key, sizeof(key), + "vol%ld.brick_snapdevice%ld", i, + brick_count); + ret = dict_set_dynstr (rsp_dict, key, device); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set %s", key); + GF_FREE (device); + goto out; + } + + ret = glusterd_get_brick_root (brickinfo->path, + &mnt_pt); + if (ret) { + snprintf (err_str, sizeof (err_str), + "could not get the root of the brick path %s", + brickinfo->path); + loglevel = GF_LOG_WARNING; + goto out; + } + if (strncmp (brickinfo->path, mnt_pt, strlen(mnt_pt))) { + snprintf (err_str, sizeof (err_str), + "brick: %s brick mount: %s", + brickinfo->path, mnt_pt); + loglevel = GF_LOG_WARNING; + goto out; + } + + brick_dir = &brickinfo->path[strlen (mnt_pt)]; + brick_dir++; + + snprintf (snap_brick_dir, sizeof (snap_brick_dir), + "/%s", brick_dir); + + tmpstr = gf_strdup (snap_brick_dir); + if (!tmpstr) { + ret = -1; + goto out; + } + snprintf (key, sizeof(key), "vol%ld.brickdir%ld", i, + brick_count); + ret = dict_set_dynstr (rsp_dict, key, tmpstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set %s", snap_mount); + goto out; + } + tmpstr = NULL; + + snprintf (key, sizeof(key) - 1, "vol%ld.brick%ld.order", + i, brick_count); + ret = dict_set_int64 (rsp_dict, key, brick_order); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set %s", key); + goto out; + } + + brick_count++; + brick_order++; + } + snprintf (key, sizeof(key) - 1, "vol%ld_brickcount", i); + ret = dict_set_int64 (rsp_dict, key, brick_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set %s", + key); + goto out; + } + } + + ret = dict_set_int64 (rsp_dict, "volcount", volcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set volcount"); + goto out; + } + + ret = 0; +out: + if (ret) + GF_FREE (tmpstr); + + if (ret && err_str[0] != '\0') { + gf_log (this->name, loglevel, "%s", err_str); + *op_errstr = gf_strdup (err_str); + } + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +glusterd_snap_t* +glusterd_new_snap_object() +{ + glusterd_snap_t *snap = NULL; + + snap = GF_CALLOC (1, sizeof (*snap), gf_gld_mt_snap_t); + + if (snap) { + if (LOCK_INIT (&snap->lock)) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed initiating" + " snap lock"); + GF_FREE (snap); + return NULL; + } + + INIT_LIST_HEAD (&snap->snap_list); + INIT_LIST_HEAD (&snap->volumes); + snap->snapname[0] = 0; + snap->snap_status = GD_SNAP_STATUS_INIT; + } + + return snap; + +}; + +/* Function glusterd_list_add_snapvol adds the volinfo object (snapshot volume) + to the snapshot object list and to the parent volume list */ +int32_t +glusterd_list_add_snapvol (glusterd_volinfo_t *origin_vol, + glusterd_volinfo_t *snap_vol) +{ + int ret = -1; + glusterd_snap_t *snap = NULL; + + GF_VALIDATE_OR_GOTO ("glusterd", origin_vol, out); + GF_VALIDATE_OR_GOTO ("glusterd", snap_vol, out); + + snap = snap_vol->snapshot; + GF_ASSERT (snap); + + list_add_tail (&snap_vol->vol_list, &snap->volumes); + LOCK (&origin_vol->lock); + { + list_add_order (&snap_vol->snapvol_list, + &origin_vol->snap_volumes, + glusterd_compare_snap_vol_time); + origin_vol->snap_count++; + } + UNLOCK (&origin_vol->lock); + + gf_log (THIS->name, GF_LOG_DEBUG, "Snap %s added to the list", + snap->snapname); + ret = 0; +out: + return ret; +} + +glusterd_snap_t* +glusterd_find_snap_by_name (char *snapname) +{ + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + GF_ASSERT (priv); + GF_ASSERT (snapname); + + + list_for_each_entry (snap, &priv->snapshots, snap_list) { + if (!strcmp (snap->snapname, snapname)) { + gf_log (THIS->name, GF_LOG_DEBUG, "Found " + "snap %s (%s)", snap->snapname, + uuid_utoa (snap->snap_id)); + goto out; + } + } + snap = NULL; +out: + return snap; +} + +glusterd_snap_t* +glusterd_find_snap_by_id (uuid_t snap_id) +{ + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + GF_ASSERT (priv); + + if (uuid_is_null(snap_id)) + goto out; + + list_for_each_entry (snap, &priv->snapshots, snap_list) { + if (!uuid_compare (snap->snap_id, snap_id)) { + gf_log (THIS->name, GF_LOG_DEBUG, "Found " + "snap %s (%s)", snap->snapname, + uuid_utoa (snap->snap_id)); + goto out; + } + } + snap = NULL; +out: + return snap; +} + +int +glusterd_do_lvm_snapshot_remove (glusterd_volinfo_t *snap_vol, + glusterd_brickinfo_t *brickinfo, + const char *mount_pt, const char *snap_device) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + runner_t runner = {0,}; + char msg[1024] = {0, }; + char pidfile[PATH_MAX] = {0, }; + pid_t pid = -1; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + if (!brickinfo) { + gf_log (this->name, GF_LOG_ERROR, "brickinfo NULL"); + goto out; + } + + GF_ASSERT (snap_vol); + GF_ASSERT (mount_pt); + GF_ASSERT (snap_device); + + GLUSTERD_GET_BRICK_PIDFILE (pidfile, snap_vol, brickinfo, priv); + if (glusterd_is_service_running (pidfile, &pid)) { + ret = kill (pid, SIGKILL); + if (ret && errno != ESRCH) { + gf_log (this->name, GF_LOG_ERROR, "Unable to kill pid " + "%d reason : %s", pid, strerror(errno)); + goto out; + } + } + + runinit (&runner); + snprintf (msg, sizeof (msg), "umount the snapshot mounted path %s", + mount_pt); + runner_add_args (&runner, "umount", mount_pt, NULL); + runner_log (&runner, "", GF_LOG_DEBUG, msg); + + /* We need not do synclock_unlock => runner_run => synclock_lock here. + Because it is needed if we are running a glusterfs process in + runner_run, so that when the glusterfs process started wants to + communicate to glusterd, glusterd wont be able to respond if it + has held the big lock. So we do unlock, run glusterfs process + (thus communicate to glusterd), lock. But since this is not a + glusterfs command that is being run, unlocking and then relocking + is not needed. + */ + ret = runner_run (&runner); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "unmounting the " + "path %s (brick: %s) failed (%s)", mount_pt, + brickinfo->path, strerror (errno)); + goto out; + } + + runinit (&runner); + snprintf (msg, sizeof(msg), "remove snapshot of the brick %s:%s, " + "device: %s", brickinfo->hostname, brickinfo->path, + snap_device); + runner_add_args (&runner, "/sbin/lvremove", "-f", snap_device, NULL); + runner_log (&runner, "", GF_LOG_DEBUG, msg); + + ret = runner_run (&runner); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "removing snapshot of the " + "brick (%s:%s) of device %s failed", + brickinfo->hostname, brickinfo->path, snap_device); + goto out; + } + +out: + return ret; +} + +int32_t +glusterd_lvm_snapshot_remove (dict_t *rsp_dict, glusterd_volinfo_t *snap_vol) +{ + char *mnt_pt = NULL; + struct mntent *entry = NULL; + int32_t brick_count = -1; + int32_t ret = -1; + glusterd_brickinfo_t *brickinfo = NULL; + xlator_t *this = NULL; + FILE *mtab = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (snap_vol); + + if (!snap_vol) { + gf_log (this->name, GF_LOG_ERROR, "snap volinfo is NULL"); + goto out; + } + + brick_count = -1; + list_for_each_entry (brickinfo, &snap_vol->bricks, brick_list) { + brick_count++; + if (uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + + if (brickinfo->snap_status == -1) { + gf_log (this->name, GF_LOG_INFO, + "snapshot was pending. lvm not present " + "for brick %s:%s of the snap %s.", + brickinfo->hostname, brickinfo->path, + snap_vol->snapshot->snapname); + + /* Adding missed delete to the dict */ + ret = glusterd_add_missed_snaps_to_dict + (rsp_dict, + snap_vol->volname, + brickinfo, + brick_count + 1, + GF_SNAP_OPTION_TYPE_DELETE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snapshot info " + "for %s:%s in the rsp_dict", + brickinfo->hostname, + brickinfo->path); + goto out; + } + + continue; + } + + ret = glusterd_get_brick_root (brickinfo->path, &mnt_pt); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "getting the root " + "of the brick for volume %s (snap %s) failed ", + snap_vol->volname, snap_vol->snapshot->snapname); + goto out; + } + + entry = glusterd_get_mnt_entry_info (mnt_pt, mtab); + if (!entry) { + gf_log (this->name, GF_LOG_WARNING, "getting the mount" + " entry for the brick %s:%s of the snap %s " + "(volume: %s) failed", brickinfo->hostname, + brickinfo->path, snap_vol->snapshot->snapname, + snap_vol->volname); + ret = -1; + goto out; + } + ret = glusterd_do_lvm_snapshot_remove (snap_vol, brickinfo, + mnt_pt, + entry->mnt_fsname); + if (mtab) + endmntent (mtab); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "remove the snapshot %s (%s)", + brickinfo->path, entry->mnt_fsname); + goto out; + } + + } + + ret = 0; +out: + return ret; +} + +int32_t +glusterd_snap_volume_remove (dict_t *rsp_dict, + glusterd_volinfo_t *snap_vol, + gf_boolean_t remove_lvm, + gf_boolean_t force) +{ + int ret = -1; + int save_ret = 0; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_volinfo_t *origin_vol = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (snap_vol); + + if (!snap_vol) { + gf_log(this->name, GF_LOG_WARNING, "snap_vol in NULL"); + ret = -1; + goto out; + } + + list_for_each_entry (brickinfo, &snap_vol->bricks, brick_list) { + if (uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + + ret = glusterd_brick_stop (snap_vol, brickinfo, _gf_false); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "Failed to stop " + "brick for volume %s", snap_vol->volname); + save_ret = ret; + + /* Continue to cleaning up the snap in case of error + if force flag is enabled */ + if (!force) + goto out; + } + } + + /* Only remove the backend lvm when required */ + if (remove_lvm) { + ret = glusterd_lvm_snapshot_remove (rsp_dict, snap_vol); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "Failed to remove " + "lvm snapshot volume %s", snap_vol->volname); + save_ret = ret; + if (!force) + goto out; + } + } + + ret = glusterd_store_delete_volume (snap_vol); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "Failed to remove volume %s " + "from store", snap_vol->volname); + save_ret = ret; + if (!force) + goto out; + } + + if (!list_empty(&snap_vol->snapvol_list)) { + ret = glusterd_volinfo_find (snap_vol->parent_volname, + &origin_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "parent volinfo %s for volume %s", + snap_vol->parent_volname, snap_vol->volname); + save_ret = ret; + if (!force) + goto out; + } + origin_vol->snap_count--; + } + + ret = glusterd_volinfo_delete (snap_vol); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "Failed to remove volinfo " + "%s ", snap_vol->volname); + save_ret = ret; + if (!force) + goto out; + } + + if (save_ret) + ret = save_ret; +out: + gf_log (this->name, GF_LOG_TRACE, "returning %d", ret); + return ret; +} + +int32_t +glusterd_snapobject_delete (glusterd_snap_t *snap) +{ + if (snap == NULL) { + gf_log(THIS->name, GF_LOG_WARNING, "snap is NULL"); + return -1; + } + + list_del_init (&snap->snap_list); + list_del_init (&snap->volumes); + if (LOCK_DESTROY(&snap->lock)) + gf_log (THIS->name, GF_LOG_WARNING, "Failed destroying lock" + "of snap %s", snap->snapname); + + GF_FREE (snap->description); + GF_FREE (snap); + + return 0; +} + +int32_t +glusterd_snap_remove (dict_t *rsp_dict, + glusterd_snap_t *snap, + gf_boolean_t remove_lvm, + gf_boolean_t force) +{ + int ret = -1; + int save_ret = 0; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *tmp = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (snap); + + if (!snap) { + gf_log(this->name, GF_LOG_WARNING, "snap is NULL"); + ret = -1; + goto out; + } + + list_for_each_entry_safe (snap_vol, tmp, &snap->volumes, vol_list) { + ret = glusterd_snap_volume_remove (rsp_dict, snap_vol, + remove_lvm, force); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "Failed to remove " + "volinfo %s for snap %s", snap_vol->volname, + snap->snapname); + save_ret = ret; + + /* Continue to cleaning up the snap in case of error + if force flag is enabled */ + if (!force) + goto out; + } + } + + ret = glusterd_store_delete_snap (snap); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "Failed to remove snap %s " + "from store", snap->snapname); + save_ret = ret; + if (!force) + goto out; + } + + ret = glusterd_snapobject_delete (snap); + if (ret) + gf_log (this->name, GF_LOG_WARNING, "Failed to delete " + "snap object %s", snap->snapname); + + if (save_ret) + ret = save_ret; +out: + gf_log (THIS->name, GF_LOG_TRACE, "returning %d", ret); + return ret; +} + +static int +glusterd_snapshot_get_snapvol_detail (dict_t *dict, + glusterd_volinfo_t *snap_vol, + char *keyprefix, int detail) +{ + int ret = -1; + int snap_limit = 0; + char key[PATH_MAX] = {0,}; + char *value = NULL; + glusterd_volinfo_t *origin_vol = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + + this = THIS; + conf = this->private; + GF_ASSERT (conf); + + GF_ASSERT (dict); + GF_ASSERT (snap_vol); + GF_ASSERT (keyprefix); + + /* Volume Name */ + value = gf_strdup (snap_vol->volname); + if (!value) + goto out; + + snprintf (key, sizeof (key), "%s.volname", keyprefix); + ret = dict_set_dynstr (dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "volume name in dictionary: %s", key); + goto out; + } + + /* Volume ID */ + value = gf_strdup (uuid_utoa (snap_vol->volume_id)); + if (NULL == value) { + ret = -1; + goto out; + } + + snprintf (key, sizeof (key), "%s.vol-id", keyprefix); + ret = dict_set_dynstr (dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "volume id in dictionary: %s", key); + goto out; + } + value = NULL; + + /* volume status */ + snprintf (key, sizeof (key), "%s.vol-status", keyprefix); + switch (snap_vol->status) { + case GLUSTERD_STATUS_STARTED: + ret = dict_set_str (dict, key, "Started"); + break; + case GLUSTERD_STATUS_STOPPED: + ret = dict_set_str (dict, key, "Stopped"); + break; + case GD_SNAP_STATUS_NONE: + ret = dict_set_str (dict, key, "None"); + break; + default: + gf_log (this->name, GF_LOG_ERROR, "Invalid volume status"); + ret = -1; + goto out; + } + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set volume status" + " in dictionary: %s", key); + goto out; + } + + + ret = glusterd_volinfo_find (snap_vol->parent_volname, &origin_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to get the parent " + "volinfo for the volume %s", snap_vol->volname); + goto out; + } + + /* Snaps available */ + if (conf->snap_max_hard_limit < origin_vol->snap_max_hard_limit) { + snap_limit = conf->snap_max_hard_limit; + gf_log(this->name, GF_LOG_DEBUG, "system snap-max-hard-limit is" + " lesser than volume snap-max-hard-limit, " + "snap-max-hard-limit value is set to %d", snap_limit); + } else { + snap_limit = origin_vol->snap_max_hard_limit; + gf_log(this->name, GF_LOG_DEBUG, "volume snap-max-hard-limit is" + " lesser than system snap-max-hard-limit, " + "snap-max-hard-limit value is set to %d", snap_limit); + } + + snprintf (key, sizeof (key), "%s.snaps-available", keyprefix); + if (snap_limit > origin_vol->snap_count) + ret = dict_set_int32 (dict, key, + snap_limit - origin_vol->snap_count); + else + ret = dict_set_int32 (dict, key, 0); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set available snaps"); + goto out; + } + + snprintf (key, sizeof (key), "%s.snapcount", keyprefix); + ret = dict_set_int32 (dict, key, origin_vol->snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not save snapcount"); + goto out; + } + + if (!detail) + goto out; + + /* Parent volume name */ + value = gf_strdup (snap_vol->parent_volname); + if (!value) + goto out; + + snprintf (key, sizeof (key), "%s.origin-volname", keyprefix); + ret = dict_set_dynstr (dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set parent " + "volume name in dictionary: %s", key); + goto out; + } + value = NULL; + + ret = 0; +out: + if (value) + GF_FREE (value); + + return ret; +} + +static int +glusterd_snapshot_get_snap_detail (dict_t *dict, glusterd_snap_t *snap, + char *keyprefix, glusterd_volinfo_t *volinfo) +{ + int ret = -1; + int volcount = 0; + char key[PATH_MAX] = {0,}; + char *value = NULL; + char *timestr = NULL; + struct tm *tmptr = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *tmp_vol = NULL; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT (dict); + GF_ASSERT (snap); + GF_ASSERT (keyprefix); + + /* Snap Name */ + value = gf_strdup (snap->snapname); + if (!value) + goto out; + + snprintf (key, sizeof (key), "%s.snapname", keyprefix); + ret = dict_set_dynstr (dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "snap name in dictionary"); + goto out; + } + + /* Snap ID */ + value = gf_strdup (uuid_utoa (snap->snap_id)); + if (NULL == value) { + ret = -1; + goto out; + } + + snprintf (key, sizeof (key), "%s.snap-id", keyprefix); + ret = dict_set_dynstr (dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "snap id in dictionary"); + goto out; + } + value = NULL; + + tmptr = localtime (&(snap->time_stamp)); + if (NULL == tmptr) { + gf_log (this->name, GF_LOG_ERROR, "Failed to convert " + "time_t to *tm"); + ret = -1; + goto out; + } + + timestr = GF_CALLOC (1, PATH_MAX, gf_gld_mt_char); + if (NULL == timestr) { + ret = -1; + goto out; + } + + ret = strftime (timestr, PATH_MAX, "%Y-%m-%d %H:%M:%S", tmptr); + if (0 == ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to convert time_t " + "to string"); + ret = -1; + goto out; + } + + snprintf (key, sizeof (key), "%s.snap-time", keyprefix); + ret = dict_set_dynstr (dict, key, timestr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "snap time stamp in dictionary"); + goto out; + } + timestr = NULL; + + /* If snap description is provided then add that into dictionary */ + if (NULL != snap->description) { + value = gf_strdup (snap->description); + if (NULL == value) { + ret = -1; + goto out; + } + + snprintf (key, sizeof (key), "%s.snap-desc", keyprefix); + ret = dict_set_dynstr (dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "snap description in dictionary"); + goto out; + } + value = NULL; + } + + snprintf (key, sizeof (key), "%s.snap-status", keyprefix); + switch (snap->snap_status) { + case GD_SNAP_STATUS_INIT: + ret = dict_set_str (dict, key, "Init"); + break; + case GD_SNAP_STATUS_IN_USE: + ret = dict_set_str (dict, key, "In-use"); + break; + case GD_SNAP_STATUS_DECOMMISSION: + ret = dict_set_str (dict, key, "Decommisioned"); + break; + case GD_SNAP_STATUS_RESTORED: + ret = dict_set_str (dict, key, "Restored"); + break; + case GD_SNAP_STATUS_NONE: + ret = dict_set_str (dict, key, "None"); + break; + default: + gf_log (this->name, GF_LOG_ERROR, "Invalid snap status"); + ret = -1; + goto out; + } + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snap status " + "in dictionary"); + goto out; + } + + if (volinfo) { + volcount = 1; + snprintf (key, sizeof (key), "%s.vol%d", keyprefix, volcount); + ret = glusterd_snapshot_get_snapvol_detail (dict, + volinfo, key, 0); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "get volume detail %s for snap %s", + snap_vol->volname, snap->snapname); + goto out; + } + goto done; + } + + list_for_each_entry_safe (snap_vol, tmp_vol, &snap->volumes, vol_list) { + volcount++; + snprintf (key, sizeof (key), "%s.vol%d", keyprefix, volcount); + ret = glusterd_snapshot_get_snapvol_detail (dict, + snap_vol, key, 1); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "get volume detail %s for snap %s", + snap_vol->volname, snap->snapname); + goto out; + } + } + +done: + snprintf (key, sizeof (key), "%s.vol-count", keyprefix); + ret = dict_set_int32 (dict, key, volcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set %s", + key); + goto out; + } + + ret = 0; +out: + if (value) + GF_FREE (value); + + if (timestr) + GF_FREE(timestr); + + return ret; +} + +static int +glusterd_snapshot_get_all_snap_info (dict_t *dict) +{ + int ret = -1; + int snapcount = 0; + char key[PATH_MAX] = {0,}; + glusterd_snap_t *snap = NULL; + glusterd_snap_t *tmp_snap = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT (priv); + + /* General parameter validation */ + GF_ASSERT (dict); + + list_for_each_entry_safe (snap, tmp_snap, &priv->snapshots, snap_list) { + snapcount++; + snprintf (key, sizeof (key), "snap%d", snapcount); + ret = glusterd_snapshot_get_snap_detail (dict, snap, key, NULL); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "snapdetail for snap %s", snap->snapname); + goto out; + } + } + + ret = dict_set_int32 (dict, "snap-count", snapcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snapcount"); + goto out; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_snapshot_get_info_by_volume (dict_t *dict, char *volname, + char *err_str, size_t len) +{ + int ret = -1; + int snapcount = 0; + int snap_limit = 0; + char *value = NULL; + char key[PATH_MAX] = ""; + glusterd_volinfo_t *volinfo = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *tmp_vol = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + + this = THIS; + conf = this->private; + GF_ASSERT (conf); + + GF_ASSERT (dict); + GF_ASSERT (volname); + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, len, "Volume (%s) does not exist", volname); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + /* Snaps available */ + if (conf->snap_max_hard_limit < volinfo->snap_max_hard_limit) { + snap_limit = conf->snap_max_hard_limit; + gf_log(this->name, GF_LOG_DEBUG, "system snap-max-hard-limit is" + " lesser than volume snap-max-hard-limit, " + "snap-max-hard-limit value is set to %d", snap_limit); + } else { + snap_limit = volinfo->snap_max_hard_limit; + gf_log(this->name, GF_LOG_DEBUG, "volume snap-max-hard-limit is" + " lesser than system snap-max-hard-limit, " + "snap-max-hard-limit value is set to %d", snap_limit); + } + + if (snap_limit > volinfo->snap_count) + ret = dict_set_int32 (dict, "snaps-available", + snap_limit - volinfo->snap_count); + else + ret = dict_set_int32 (dict, "snaps-available", 0); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set available snaps"); + goto out; + } + + /* Origin volume name */ + value = gf_strdup (volinfo->volname); + if (!value) + goto out; + + ret = dict_set_dynstr (dict, "origin-volname", value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set parent " + "volume name in dictionary: %s", key); + goto out; + } + value = NULL; + + list_for_each_entry_safe (snap_vol, tmp_vol, &volinfo->snap_volumes, + snapvol_list) { + snapcount++; + snprintf (key, sizeof (key), "snap%d", snapcount); + ret = glusterd_snapshot_get_snap_detail (dict, + snap_vol->snapshot, + key, snap_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "snapdetail for snap %s", + snap_vol->snapshot->snapname); + goto out; + } + } + ret = dict_set_int32 (dict, "snap-count", snapcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snapcount"); + goto out; + } + + ret = 0; +out: + if (value) + GF_FREE (value); + + return ret; +} + +/* This function will be called from RPC handler routine. + * This function is responsible for getting the requested + * snapshot info into the dictionary. + * + * @param req RPC request object. Required for sending a response back. + * @param op glusterd operation. Required for sending a response back. + * @param dict pointer to dictionary which will contain both + * request and response key-pair values. + * @return -1 on error and 0 on success + */ +int +glusterd_handle_snapshot_info (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int ret = -1; + int8_t snap_driven = 1; + char *volname = NULL; + char *snapname = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + int32_t cmd = GF_SNAP_INFO_TYPE_ALL; + + this = THIS; + GF_ASSERT (this); + + GF_VALIDATE_OR_GOTO (this->name, req, out); + GF_VALIDATE_OR_GOTO (this->name, dict, out); + + + ret = dict_get_int32 (dict, "cmd", &cmd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get type " + "of snapshot info"); + goto out; + } + + switch (cmd) { + case GF_SNAP_INFO_TYPE_ALL: + { + ret = glusterd_snapshot_get_all_snap_info (dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get info of all snaps"); + goto out; + } + break; + } + + case GF_SNAP_INFO_TYPE_SNAP: + { + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get snap name"); + goto out; + } + + ret = dict_set_int32 (dict, "snap-count", 1); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set snapcount"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + snprintf (err_str, len, + "Snap (%s) does not exist", snapname); + gf_log (this->name, GF_LOG_ERROR, + "%s", err_str); + ret = -1; + goto out; + } + ret = glusterd_snapshot_get_snap_detail (dict, snap, + "snap1", NULL); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get snap detail of snap " + "%s", snap->snapname); + goto out; + } + break; + } + + case GF_SNAP_INFO_TYPE_VOL: + { + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get volname"); + goto out; + } + ret = glusterd_snapshot_get_info_by_volume (dict, + volname, err_str, len); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get volume info of volume " + "%s", volname); + goto out; + } + snap_driven = 0; + break; + } + } + + ret = dict_set_int8 (dict, "snap-driven", snap_driven); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snap-driven"); + goto out; + } + + /* If everything is successful then send the response back to cli. + * In case of failure the caller of this function will take care + of the response */ + ret = glusterd_op_send_cli_response (op, 0, 0, req, dict, err_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to send cli " + "response"); + goto out; + } + + ret = 0; + +out: + return ret; +} + +/* This function sets all the snapshot names in the dictionary */ +int +glusterd_snapshot_get_all_snapnames (dict_t *dict) +{ + int ret = -1; + int snapcount = 0; + char *snapname = NULL; + char key[PATH_MAX] = {0,}; + glusterd_snap_t *snap = NULL; + glusterd_snap_t *tmp_snap = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (dict); + + list_for_each_entry_safe (snap, tmp_snap, &priv->snapshots, snap_list) { + snapcount++; + snapname = gf_strdup (snap->snapname); + if (!snapname) { + gf_log (this->name, GF_LOG_ERROR, "strdup failed"); + ret = -1; + goto out; + } + snprintf (key, sizeof (key), "snapname%d", snapcount); + ret = dict_set_dynstr (dict, key, snapname); + if (ret) { + GF_FREE (snapname); + gf_log (this->name, GF_LOG_ERROR, "Failed to set %s", + key); + goto out; + } + } + + ret = dict_set_int32 (dict, "snap-count", snapcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snapcount"); + goto out; + } + + ret = 0; +out: + + return ret; +} + +/* This function sets all the snapshot names + under a given volume in the dictionary */ +int +glusterd_snapshot_get_vol_snapnames (dict_t *dict, glusterd_volinfo_t *volinfo) +{ + int ret = -1; + int snapcount = 0; + char *snapname = NULL; + char key[PATH_MAX] = {0,}; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *tmp_vol = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (dict); + GF_ASSERT (volinfo); + + list_for_each_entry_safe (snap_vol, tmp_vol, + &volinfo->snap_volumes, snapvol_list) { + snapcount++; + snapname = gf_strdup (snap_vol->snapshot->snapname); + if (!snapname) { + gf_log (this->name, GF_LOG_ERROR, + "strdup failed"); + ret = -1; + goto out; + } + snprintf (key, sizeof (key), "snapname%d", snapcount); + ret = dict_set_dynstr (dict, key, snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "set %s", key); + GF_FREE (snapname); + goto out; + } + } + + ret = dict_set_int32 (dict, "snap-count", snapcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snapcount"); + goto out; + } + + ret = 0; +out: + + return ret; +} + +int +glusterd_handle_snapshot_list (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int ret = -1; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + + GF_VALIDATE_OR_GOTO (this->name, req, out); + GF_VALIDATE_OR_GOTO (this->name, dict, out); + + /* Ignore error for getting volname as it is optional */ + ret = dict_get_str (dict, "volname", &volname); + + if (NULL == volname) { + ret = glusterd_snapshot_get_all_snapnames (dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get snapshot list"); + goto out; + } + } else { + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, len, + "Volume (%s) does not exist", volname); + gf_log (this->name, GF_LOG_ERROR, + "%s", err_str); + goto out; + } + + ret = glusterd_snapshot_get_vol_snapnames (dict, volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get snapshot list for volume %s", + volname); + goto out; + } + } + + /* If everything is successful then send the response back to cli. + In case of failure the caller of this function will take of response.*/ + ret = glusterd_op_send_cli_response (op, 0, 0, req, dict, err_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to send cli " + "response"); + goto out; + } + + ret = 0; + +out: + return ret; +} + +/* This is a snapshot create handler function. This function will be + * executed in the originator node. This function is responsible for + * calling mgmt_v3 framework to do the actual snap creation on all the bricks + * + * @param req RPC request object + * @param op gluster operation + * @param dict dictionary containing snapshot restore request + * @param err_str In case of an err this string should be populated + * @param len length of err_str buffer + * + * @return Negative value on Failure and 0 in success + */ +int +glusterd_handle_snapshot_create (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int ret = -1; + char *volname = NULL; + char *snapname = NULL; + int64_t volcount = 0; + xlator_t *this = NULL; + char key[PATH_MAX] = ""; + char *username = NULL; + char *password = NULL; + uuid_t *uuid_ptr = NULL; + uuid_t tmp_uuid = {0}; + int i = 0; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (dict); + GF_ASSERT (err_str); + + ret = dict_get_int64 (dict, "volcount", &volcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "get the volume count"); + goto out; + } + if (volcount <= 0) { + gf_log (this->name, GF_LOG_ERROR, "Invalid volume count %ld " + "supplied", volcount); + ret = -1; + goto out; + } + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to get the snapname"); + goto out; + } + + if (strlen(snapname) >= GLUSTERD_MAX_SNAP_NAME) { + snprintf (err_str, len, "snapname cannot exceed 255 " + "characters"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + ret = -1; + goto out; + } + + uuid_ptr = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t); + if (!uuid_ptr) { + gf_log (this->name, GF_LOG_ERROR, "Out Of Memory"); + ret = -1; + goto out; + } + + uuid_generate (*uuid_ptr); + ret = dict_set_bin (dict, "snap-id", uuid_ptr, sizeof(uuid_t)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to set snap-id"); + GF_FREE (uuid_ptr); + goto out; + } + uuid_ptr = NULL; + + ret = dict_set_int64 (dict, "snap-time", (int64_t)time(NULL)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to set snap-time"); + goto out; + } + + for (i = 1; i <= volcount; i++) { + snprintf (key, sizeof (key), "volname%d", i); + ret = dict_get_str (dict, key, &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get volume name"); + goto out; + } + + /* generate internal username and password for the snap*/ + uuid_generate (tmp_uuid); + username = gf_strdup (uuid_utoa (tmp_uuid)); + snprintf (key, sizeof(key), "volume%d_username", i); + ret = dict_set_dynstr (dict, key, username); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snap " + "username for volume %s", volname); + GF_FREE (username); + goto out; + } + + uuid_generate (tmp_uuid); + password = gf_strdup (uuid_utoa (tmp_uuid)); + snprintf (key, sizeof(key), "volume%d_password", i); + ret = dict_set_dynstr (dict, key, password); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snap " + "password for volume %s", volname); + GF_FREE (password); + goto out; + } + + uuid_ptr = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t); + if (!uuid_ptr) { + gf_log (this->name, GF_LOG_ERROR, "Out Of Memory"); + ret = -1; + goto out; + } + + snprintf (key, sizeof(key) - 1, "vol%d_volid", i); + uuid_generate (*uuid_ptr); + ret = dict_set_bin (dict, key, uuid_ptr, sizeof(uuid_t)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set snap_volid"); + GF_FREE (uuid_ptr); + goto out; + } + } + + ret = glusterd_mgmt_v3_initiate_snap_phases (req, op, dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to initiate snap " + "phases"); + } + +out: + return ret; +} + +/* This is a snapshot status handler function. This function will be + * executed in a originator node. This function is responsible for + * calling mgmt v3 framework to get the actual snapshot status from + * all the bricks + * + * @param req RPC request object + * @param op gluster operation + * @param dict dictionary containing snapshot status request + * @param err_str In case of an err this string should be populated + * @param len length of err_str buffer + * + * return : 0 in case of success. + * -1 in case of failure. + * + */ +int +glusterd_handle_snapshot_status (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int ret = -1; + char *volname = NULL; + char *snapname = NULL; + char *buf = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + int32_t cmd = -1; + int i = 0; + dict_t *voldict = NULL; + char key[PATH_MAX] = ""; + glusterd_volinfo_t *volinfo = NULL; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *snap_volinfo = NULL; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + + GF_ASSERT (conf); + GF_ASSERT (req); + GF_ASSERT (dict); + GF_ASSERT (err_str); + + ret = dict_get_int32 (dict, "cmd", &cmd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not get status type"); + goto out; + } + switch (cmd) { + case GF_SNAP_STATUS_TYPE_ALL: + { + /* IF we give "gluster snapshot status" + * then lock is held on all snaps. + * This is the place where necessary information + * (snapname and snapcount)is populated in dictionary + * for locking. + */ + ++i; + list_for_each_entry (snap, &conf->snapshots, snap_list) + { + snprintf (key, sizeof (key), "snapname%d", i); + buf = gf_strdup (snap->snapname); + if (!buf) { + ret = -1; + goto out; + } + ret = dict_set_dynstr (dict, key, buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save snapname (%s) " + "in the dictionary", + snap->snapname); + GF_FREE (buf); + goto out; + } + + buf = NULL; + i++; + } + + ret = dict_set_int32 (dict, "snapcount", i - 1); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not " + "save snapcount in the dictionary"); + goto out; + } + break; + } + + case GF_SNAP_STATUS_TYPE_SNAP: + { + /* IF we give "gluster snapshot status <snapname>" + * then lock is held on single snap. + * This is the place where necessary information + * (snapname)is populated in dictionary + * for locking. + */ + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to fetch snap name"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + snprintf (err_str, len, "Snap (%s)" + "does not exist", snapname); + gf_log(this->name, GF_LOG_ERROR, + "%s", err_str); + ret = -1; + goto out; + } + break; + } + case GF_SNAP_STATUS_TYPE_VOL: + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to fetch volname"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, len, "Volume (%s) " + "does not exist", volname); + gf_log (this->name, GF_LOG_ERROR, + "%s", err_str); + goto out; + } + + i = 1; + list_for_each_entry (snap_volinfo, + &volinfo->snap_volumes, snapvol_list) { + snprintf (key, sizeof (key), "snapname%d", i); + + buf = gf_strdup + (snap_volinfo->snapshot->snapname); + if (!buf) { + ret = -1; + goto out; + } + + ret = dict_set_dynstr (dict, key, buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save snapname"); + GF_FREE (buf); + goto out; + } + + buf = NULL; + i++; + } + + ret = dict_set_int32 (dict, "snapcount", i-1); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save snapcount"); + goto out; + } + break; + default: + { + gf_log (this->name, GF_LOG_ERROR, "Unknown type"); + ret = -1; + goto out; + } + } + + /* Volume lock is not necessary for snapshot status, hence + * turning it off + */ + ret = dict_set_int8 (dict, "hold_vol_locks", 0); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Setting volume lock " + "flag failed"); + goto out; + } + + ret = glusterd_mgmt_v3_initiate_snap_phases (req, op, dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to initiate " + "snap phases"); + goto out; + } + + ret = 0; + +out: + if (voldict) { + dict_unref (voldict); + } + return ret; +} + + +/* This is a snapshot restore handler function. This function will be + * executed in the originator node. This function is responsible for + * calling mgmt_v3 framework to do the actual restore on all the bricks + * + * @param req RPC request object + * @param op gluster operation + * @param dict dictionary containing snapshot restore request + * @param err_str In case of an err this string should be populated + * @param len length of err_str buffer + * + * @return Negative value on Failure and 0 in success + */ +int +glusterd_handle_snapshot_restore (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int ret = -1; + char *snapname = NULL; + char *buf = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *snap_volinfo = NULL; + int32_t i = 0; + char key[PATH_MAX] = ""; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + + GF_ASSERT (conf); + GF_ASSERT (req); + GF_ASSERT (dict); + GF_ASSERT (err_str); + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "get snapname"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + snprintf (err_str, len, "Snap (%s) does not exist", snapname); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + ret = -1; + goto out; + } + + list_for_each_entry (snap_volinfo, &snap->volumes, vol_list) { + i++; + snprintf (key, sizeof (key), "volname%d", i); + buf = gf_strdup (snap_volinfo->parent_volname); + if (!buf) { + ret = -1; + goto out; + } + ret = dict_set_dynstr (dict, key, buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not set " + "parent volume name %s in the dict", + snap_volinfo->parent_volname); + GF_FREE (buf); + goto out; + } + buf = NULL; + } + + ret = dict_set_int32 (dict, "volcount", i); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save volume count"); + goto out; + } + + ret = glusterd_mgmt_v3_initiate_snap_phases (req, op, dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to initiate snap " + "phases"); + goto out; + } + + ret = 0; + +out: + return ret; +} + +glusterd_snap_t* +glusterd_create_snap_object (dict_t *dict, dict_t *rsp_dict) +{ + char *snapname = NULL; + uuid_t *snap_id = NULL; + char *description = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int ret = -1; + int64_t time_stamp = 0; + + this = THIS; + priv = this->private; + + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + + /* Fetch snapname, description, id and time from dict */ + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to fetch snapname"); + goto out; + } + + /* Ignore ret value for description*/ + ret = dict_get_str (dict, "description", &description); + + ret = dict_get_bin (dict, "snap-id", (void **)&snap_id); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to fetch snap_id"); + goto out; + } + + ret = dict_get_int64 (dict, "snap-time", &time_stamp); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to fetch snap-time"); + goto out; + } + if (time_stamp <= 0) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Invalid time-stamp: %ld", + time_stamp); + goto out; + } + + list_for_each_entry (snap, &priv->snapshots, snap_list) { + if (!strcmp (snap->snapname, snapname) || + !uuid_compare (snap->snap_id, *snap_id)) { + gf_log (THIS->name, GF_LOG_ERROR, + "Found duplicate snap %s (%s)", + snap->snapname, uuid_utoa (snap->snap_id)); + ret = -1; + break; + } + } + if (ret) { + snap = NULL; + goto out; + } + + snap = glusterd_new_snap_object (); + if (!snap) { + gf_log (this->name, GF_LOG_ERROR, "Could not create " + "the snap object for snap %s", snapname); + goto out; + } + + strcpy (snap->snapname, snapname); + uuid_copy (snap->snap_id, *snap_id); + snap->time_stamp = (time_t)time_stamp; + /* Set the status as GD_SNAP_STATUS_INIT and once the backend snapshot + is taken and snap is really ready to use, set the status to + GD_SNAP_STATUS_IN_USE. This helps in identifying the incomplete + snapshots and cleaning them up. + */ + snap->snap_status = GD_SNAP_STATUS_INIT; + if (description) { + snap->description = gf_strdup (description); + if (snap->description == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "Saving the Snap Description Failed"); + ret = -1; + goto out; + } + } + + ret = glusterd_store_snap (snap); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Could not store snap" + "object %s", snap->snapname); + goto out; + } + + list_add_order (&snap->snap_list, &priv->snapshots, + glusterd_compare_snap_time); + + gf_log (this->name, GF_LOG_TRACE, "Snap %s added to the list", + snap->snapname); + + ret = 0; + +out: + if (ret) { + if (snap) + glusterd_snap_remove (rsp_dict, snap, + _gf_true, _gf_true); + snap = NULL; + } + + return snap; +} + +/* This function is called to get the device path of the snap lvm. Usually + if /dev/mapper/<group-name>-<lvm-name> is the device for the lvm, + then the snap device will be /dev/<group-name>/<snapname>. + This function takes care of building the path for the snap device. +*/ +char * +glusterd_build_snap_device_path (char *device, char *snapname) +{ + char snap[PATH_MAX] = ""; + char msg[1024] = ""; + char volgroup[PATH_MAX] = ""; + char *snap_device = NULL; + xlator_t *this = NULL; + runner_t runner = {0,}; + char *ptr = NULL; + int ret = -1; + + this = THIS; + GF_ASSERT (this); + if (!device) { + gf_log (this->name, GF_LOG_ERROR, "device is NULL"); + goto out; + } + if (!snapname) { + gf_log (this->name, GF_LOG_ERROR, "snapname is NULL"); + goto out; + } + + runinit (&runner); + runner_add_args (&runner, "/sbin/lvs", "--noheadings", "-o", "vg_name", + device, NULL); + runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); + snprintf (msg, sizeof (msg), "Get volume group for device %s", device); + runner_log (&runner, this->name, GF_LOG_DEBUG, msg); + ret = runner_start (&runner); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get volume group " + "for device %s", device); + runner_end (&runner); + goto out; + } + ptr = fgets(volgroup, sizeof(volgroup), + runner_chio (&runner, STDOUT_FILENO)); + if (!ptr || !strlen(volgroup)) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get volume group " + "for snap %s", snapname); + runner_end (&runner); + ret = -1; + goto out; + } + runner_end (&runner); + + snprintf (snap, sizeof(snap), "/dev/%s/%s", gf_trim(volgroup), + snapname); + snap_device = gf_strdup (snap); + if (!snap_device) { + gf_log (this->name, GF_LOG_WARNING, "Cannot copy the " + "snapshot device name for snapname: %s)", snapname); + } + +out: + return snap_device; +} + +/* This function actually calls the command (or the API) for taking the + snapshot of the backend brick filesystem. If this is successful, + then call the glusterd_snap_create function to create the snap object + for glusterd +*/ +char * +glusterd_take_lvm_snapshot (glusterd_volinfo_t *snap_vol, + glusterd_brickinfo_t *brickinfo) +{ + char msg[NAME_MAX] = ""; + char buf[PATH_MAX] = ""; + char *snap_device = NULL; + char *ptr = NULL; + char *device = NULL; + int ret = -1; + gf_boolean_t match = _gf_false; + runner_t runner = {0,}; + xlator_t *this = NULL; + + this = THIS; + + if (!brickinfo) { + gf_log (this->name, GF_LOG_ERROR, "brickinfo NULL"); + goto out; + } + + device = glusterd_get_brick_mount_details (brickinfo); + if (!device) { + gf_log (this->name, GF_LOG_ERROR, "getting device name for " + "the brick %s:%s failed", brickinfo->hostname, + brickinfo->path); + goto out; + } + + /* Figuring out if setactivationskip flag is supported or not */ + runinit (&runner); + snprintf (msg, sizeof (msg), "running lvcreate help"); + runner_add_args (&runner, "/sbin/lvcreate", "--help", NULL); + runner_log (&runner, "", GF_LOG_DEBUG, msg); + runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); + ret = runner_start (&runner); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to run lvcreate help"); + runner_end (&runner); + goto out; + } + + /* Looking for setactivationskip in lvcreate --help */ + do { + ptr = fgets(buf, sizeof(buf), + runner_chio (&runner, STDOUT_FILENO)); + if (ptr) { + if (strstr(buf, "setactivationskip")) { + match = _gf_true; + break; + } + } + } while (ptr != NULL); + runner_end (&runner); + + /* Takng the actual snapshot */ + runinit (&runner); + snprintf (msg, sizeof (msg), "taking snapshot of the brick %s:%s", + brickinfo->hostname, brickinfo->path); + if (match == _gf_true) + runner_add_args (&runner, "/sbin/lvcreate", "-s", device, + "--setactivationskip", "n", "--name", + snap_vol->volname, NULL); + else + runner_add_args (&runner, "/sbin/lvcreate", "-s", device, + "--name", snap_vol->volname, NULL); + runner_log (&runner, "", GF_LOG_DEBUG, msg); + ret = runner_start (&runner); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "taking snapshot of the " + "brick (%s:%s) of device %s failed", + brickinfo->hostname, brickinfo->path, device); + runner_end (&runner); + goto out; + } + runner_end (&runner); + + snap_device = glusterd_build_snap_device_path (device, + snap_vol->volname); + if (!snap_device) { + gf_log (this->name, GF_LOG_WARNING, "Cannot copy the snapshot " + "device name for snap %s (volume id: %s)", + snap_vol->snapshot->snapname, snap_vol->volname); + ret = -1; + goto out; + } + +out: + return snap_device; +} + +int32_t +glusterd_snap_brick_create (char *device, glusterd_volinfo_t *snap_volinfo, + glusterd_brickinfo_t *original_brickinfo, + int32_t brick_count, char *snap_brick_dir) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + char snap_brick_mount_path[PATH_MAX] = ""; + char snap_brick_path[PATH_MAX] = ""; + char msg[1024] = ""; + struct stat statbuf = {0, }; + runner_t runner = {0, }; + + this = THIS; + priv = this->private; + + GF_ASSERT (device); + GF_ASSERT (snap_volinfo); + GF_ASSERT (original_brickinfo); + GF_ASSERT (snap_brick_dir); + + snprintf (snap_brick_mount_path, sizeof (snap_brick_mount_path), + "%s/%s/brick%d", snap_mount_folder, snap_volinfo->volname, + brick_count+1); + + snprintf (snap_brick_path, sizeof (snap_brick_path), "%s%s", + snap_brick_mount_path, snap_brick_dir); + + ret = mkdir_p (snap_brick_mount_path, 0777, _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "creating the brick directory" + " %s for the snapshot %s(device: %s) failed", + snap_brick_mount_path, snap_volinfo->volname, device); + goto out; + } + /* mount the snap logical device on the directory inside + /run/gluster/snaps/<snapname>/@snap_brick_mount_path + Way to mount the snap brick via mount api is this. + ret = mount (device, snap_brick_mount_path, entry->mnt_type, + MS_MGC_VAL, "nouuid"); + But for now, mounting using runner apis. + */ + runinit (&runner); + snprintf (msg, sizeof (msg), "mounting snapshot of the brick %s:%s", + original_brickinfo->hostname, original_brickinfo->path); + runner_add_args (&runner, "mount", "-o", "nouuid", device, + snap_brick_mount_path, NULL); + runner_log (&runner, "", GF_LOG_DEBUG, msg); + + /* let glusterd get blocked till snapshot is over */ + synclock_unlock (&priv->big_lock); + ret = runner_run (&runner); + synclock_lock (&priv->big_lock); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "mounting the snapshot " + "logical device %s failed (error: %s)", device, + strerror (errno)); + goto out; + } else + gf_log (this->name, GF_LOG_DEBUG, "mounting the snapshot " + "logical device %s successful", device); + + ret = stat (snap_brick_path, &statbuf); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "stat of the brick %s" + "(brick mount: %s) failed (%s)", snap_brick_path, + snap_brick_mount_path, strerror (errno)); + goto out; + } + ret = sys_lsetxattr (snap_brick_path, + GF_XATTR_VOL_ID_KEY, + snap_volinfo->volume_id, 16, + XATTR_REPLACE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "extended attribute %s on %s. Reason: " + "%s, snap: %s", GF_XATTR_VOL_ID_KEY, + snap_brick_path, strerror (errno), + snap_volinfo->volname); + goto out; + } + +out: + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "unmounting the snap brick" + " mount %s", snap_brick_mount_path); + umount (snap_brick_mount_path); + } + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Added missed_snap_entry to rsp_dict */ +int32_t +glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict, char *snap_uuid, + glusterd_brickinfo_t *brickinfo, + int32_t brick_number, int32_t op) +{ + char *buf = NULL; + char missed_snap_entry[PATH_MAX] = ""; + char name_buf[PATH_MAX] = ""; + int32_t missed_snap_count = -1; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (snap_uuid); + GF_ASSERT (brickinfo); + + snprintf (missed_snap_entry, sizeof(missed_snap_entry), + "%s:%s=%d:%s:%d:%d", uuid_utoa(brickinfo->uuid), + snap_uuid, brick_number, brickinfo->path, op, + GD_MISSED_SNAP_PENDING); + + buf = gf_strdup (missed_snap_entry); + if (!buf) { + ret = -1; + goto out; + } + + /* Fetch the missed_snap_count from the dict */ + ret = dict_get_int32 (rsp_dict, "missed_snap_count", + &missed_snap_count); + if (ret) { + /* Initialize the missed_snap_count for the first time */ + missed_snap_count = 0; + } + + /* Setting the missed_snap_entry in the rsp_dict */ + snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", + missed_snap_count); + ret = dict_set_dynstr (rsp_dict, name_buf, buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set missed_snap_entry (%s) " + "in the rsp_dict.", buf); + GF_FREE (buf); + goto out; + } + missed_snap_count++; + + /* Setting the new missed_snap_count in the dict */ + ret = dict_set_int32 (rsp_dict, "missed_snap_count", + missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set missed_snap_count for %s " + "in the rsp_dict.", missed_snap_entry); + goto out; + } + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int32_t +glusterd_add_bricks_to_snap_volume (dict_t *dict, dict_t *rsp_dict, + glusterd_volinfo_t *snap_vol, + glusterd_brickinfo_t *original_brickinfo, + glusterd_brickinfo_t *snap_brickinfo, + char **snap_brick_dir, int64_t volcount, + int32_t brick_count) +{ + char key[PATH_MAX] = ""; + char snap_brick_path[PATH_MAX] = ""; + char *snap_device = NULL; + gf_boolean_t add_missed_snap = _gf_false; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + GF_ASSERT (snap_vol); + GF_ASSERT (original_brickinfo); + GF_ASSERT (snap_brickinfo); + GF_ASSERT (snap_brick_dir); + + snprintf (key, sizeof(key) - 1, "vol%ld.brickdir%d", volcount, + brick_count); + ret = dict_get_ptr (dict, key, (void **)snap_brick_dir); + if (ret) { + /* Using original brickinfo here because it will be a + * pending snapshot and storing the original brickinfo + * will help in mapping while recreating the missed snapshot + */ + gf_log (this->name, GF_LOG_WARNING, "Unable to fetch " + "snap mount path (%s). Using original brickinfo", key); + snap_brickinfo->snap_status = -1; + strcpy (snap_brick_path, original_brickinfo->path); + + /* In origiator node add snaps missed + * from different nodes to the dict + */ + if (is_origin_glusterd (dict) == _gf_true) + add_missed_snap = _gf_true; + } else { + /* Create brick-path in the format /var/run/gluster/snaps/ * + * <snap-uuid>/<original-brick#>/snap-brick-dir * + */ + snprintf (snap_brick_path, sizeof(snap_brick_path), + "%s/%s/brick%d%s", snap_mount_folder, + snap_vol->volname, brick_count+1, + *snap_brick_dir); + } + + if ((snap_brickinfo->snap_status != -1) && + (!uuid_compare (original_brickinfo->uuid, MY_UUID)) && + (!glusterd_is_brick_started (original_brickinfo))) { + /* In case if the brick goes down after prevalidate. */ + gf_log (this->name, GF_LOG_WARNING, "brick %s:%s is not" + " started (snap: %s)", + original_brickinfo->hostname, + original_brickinfo->path, + snap_vol->snapshot->snapname); + + snap_brickinfo->snap_status = -1; + strcpy (snap_brick_path, original_brickinfo->path); + add_missed_snap = _gf_true; + } + + if (add_missed_snap) { + ret = glusterd_add_missed_snaps_to_dict (rsp_dict, + snap_vol->volname, + original_brickinfo, + brick_count + 1, + GF_SNAP_OPTION_TYPE_CREATE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to add missed" + " snapshot info for %s:%s in the rsp_dict", + original_brickinfo->hostname, + original_brickinfo->path); + goto out; + } + } + + snprintf (key, sizeof(key), "vol%ld.brick_snapdevice%d", + volcount, brick_count); + ret = dict_get_ptr (dict, key, (void **)&snap_device); + if (ret) { + /* If the device name is empty, so will be the brick path + * Hence the missed snap has already been added above + */ + gf_log (this->name, GF_LOG_ERROR, "Unable to fetch " + "snap device (%s). Leaving empty", key); + } else + strcpy (snap_brickinfo->device_path, snap_device); + + ret = gf_canonicalize_path (snap_brick_path); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to canonicalize path"); + goto out; + } + + strcpy (snap_brickinfo->hostname, original_brickinfo->hostname); + strcpy (snap_brickinfo->path, snap_brick_path); + uuid_copy (snap_brickinfo->uuid, original_brickinfo->uuid); + list_add_tail (&snap_brickinfo->brick_list, &snap_vol->bricks); + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int32_t +glusterd_take_brick_snapshot (glusterd_volinfo_t *origin_vol, + glusterd_volinfo_t *snap_vol, dict_t *rsp_dict, + glusterd_brickinfo_t *original_brickinfo, + glusterd_brickinfo_t *snap_brickinfo, + char *snap_brick_dir, int32_t brick_count) +{ + char *device = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (origin_vol); + GF_ASSERT (snap_vol); + GF_ASSERT (rsp_dict); + GF_ASSERT (original_brickinfo); + GF_ASSERT (snap_brickinfo); + GF_ASSERT (snap_brick_dir); + + device = glusterd_take_lvm_snapshot (snap_vol, original_brickinfo); + /* Fail the snapshot even though snapshot on one of + the bricks fails. At the end when we check whether + the snapshot volume meets quorum or not, then the + the snapshot can either be treated as success, or + in case of failure we can undo the changes and return + failure to cli. */ + if (!device) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to take snapshot of %s:%s", + original_brickinfo->hostname, + original_brickinfo->path); + goto out; + } + + /* create the complete brick here */ + ret = glusterd_snap_brick_create (device, snap_vol, + original_brickinfo, + brick_count, snap_brick_dir); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "not able to" + " create the brickinfo for the snap %s" + ", volume %s", snap_vol->snapshot->snapname, + origin_vol->volname); + goto out; + } + +out: + if (device) + GF_FREE (device); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Look for disconnected peers, for missed snap creates or deletes */ +static int32_t +glusterd_find_missed_snap (dict_t *rsp_dict, glusterd_volinfo_t *vol, + char *snap_uuid, struct list_head *peers, + int32_t op) +{ + int32_t brick_count = -1; + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (peers); + GF_ASSERT (vol); + GF_ASSERT (snap_uuid); + + brick_count = 0; + list_for_each_entry (brickinfo, &vol->bricks, brick_list) { + if (!uuid_compare (brickinfo->uuid, MY_UUID)) { + /* If the brick belongs to the same node */ + brick_count++; + continue; + } + + list_for_each_entry (peerinfo, peers, uuid_list) { + if (uuid_compare (peerinfo->uuid, brickinfo->uuid)) { + /* If the brick doesnt belong to this peer */ + continue; + } + + /* Found peer who owns the brick, * + * if peer is not connected or not * + * friend add it to missed snap list */ + if (!(peerinfo->connected) || + (peerinfo->state.state != + GD_FRIEND_STATE_BEFRIENDED)) { + ret = glusterd_add_missed_snaps_to_dict + (rsp_dict, + snap_uuid, + brickinfo, + brick_count + 1, + op); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snapshot " + "info for %s:%s in the " + "rsp_dict", brickinfo->hostname, + brickinfo->path); + goto out; + } + } + } + brick_count++; + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +glusterd_volinfo_t * +glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap, + dict_t *dict, dict_t *rsp_dict, int64_t volcount) +{ + char key[PATH_MAX] = ""; + char *snap_brick_dir = NULL; + char *username = NULL; + char *password = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *snap_vol = NULL; + uuid_t *snap_volid = NULL; + int32_t ret = -1; + int32_t brick_count = 0; + glusterd_brickinfo_t *snap_brickinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (origin_vol); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + + /* fetch username, password and vol_id from dict*/ + snprintf (key, sizeof(key), "volume%ld_username", volcount); + ret = dict_get_str (dict, key, &username); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get %s for " + "snap %s", key, snap->snapname); + goto out; + } + + snprintf (key, sizeof(key), "volume%ld_password", volcount); + ret = dict_get_str (dict, key, &password); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get %s for " + "snap %s", key, snap->snapname); + goto out; + } + snprintf (key, sizeof(key) - 1, "vol%ld_volid", volcount); + ret = dict_get_bin (dict, key, (void **)&snap_volid); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch snap_volid"); + goto out; + } + + /* We are not setting the username and password here as + * we need to set the user name and password passed in + * the dictionary + */ + ret = glusterd_volinfo_dup (origin_vol, &snap_vol, _gf_false); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to duplicate volinfo " + "for the snapshot %s", snap->snapname); + goto out; + } + + /* uuid is used as lvm snapshot name. + This will avoid restrictions on snapshot names provided by user */ + GLUSTERD_GET_UUID_NOHYPHEN (snap_vol->volname, *snap_volid); + uuid_copy (snap_vol->volume_id, *snap_volid); + snap_vol->is_snap_volume = _gf_true; + strcpy (snap_vol->parent_volname, origin_vol->volname); + snap_vol->snapshot = snap; + + glusterd_auth_set_username (snap_vol, username); + glusterd_auth_set_password (snap_vol, password); + + /* Adding snap brickinfos to the snap volinfo */ + brick_count = 0; + list_for_each_entry (brickinfo, &origin_vol->bricks, brick_list) { + snap_brickinfo = NULL; + + ret = glusterd_brickinfo_new (&snap_brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "initializing the brick for the snap " + "volume failed (snapname: %s)", snap->snapname); + goto out; + } + + ret = glusterd_add_bricks_to_snap_volume (dict, rsp_dict, + snap_vol, + brickinfo, + snap_brickinfo, + &snap_brick_dir, + volcount, + brick_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add the snap brick for " + "%s:%s to the snap volume", + brickinfo->hostname, brickinfo->path); + GF_FREE (snap_brickinfo); + goto out; + } + + /* Take snapshot of the brick */ + if ((uuid_compare (brickinfo->uuid, MY_UUID)) || + (snap_brickinfo->snap_status == -1)) { + brick_count++; + continue; + } + + ret = glusterd_take_brick_snapshot (origin_vol, snap_vol, + rsp_dict, brickinfo, + snap_brickinfo, + snap_brick_dir, + brick_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to take snapshot for %s:%s", + brickinfo->hostname, brickinfo->path); + goto out; + } + + brick_count++; + } + + /*TODO: the quorum check of the snap volume here */ + + ret = glusterd_store_volinfo (snap_vol, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to store snapshot " + "volinfo (%s) for snap %s", snap_vol->volname, + snap->snapname); + goto out; + } + + ret = generate_brick_volfiles (snap_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "generating the brick " + "volfiles for the snap %s (volume: %s) failed", + snap->snapname, origin_vol->volname); + goto out; + } + + ret = generate_client_volfiles (snap_vol, GF_CLIENT_TRUSTED); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "generating the trusted " + "client volfiles for the snap %s (volume: %s) failed", + snap->snapname, origin_vol->volname); + goto out; + } + ret = generate_client_volfiles (snap_vol, GF_CLIENT_OTHER); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "generating the client " + "volfiles for the snap %s (volume: %s) failed", + snap->snapname, origin_vol->volname); + goto out; + } + + ret = glusterd_list_add_snapvol (origin_vol, snap_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "could not add the snap " + "volume %s to the list", snap_vol->volname); + goto out; + } + + list_for_each_entry (brickinfo, &snap_vol->bricks, brick_list) { + if (uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + + if (brickinfo->snap_status == -1) { + gf_log (this->name, GF_LOG_INFO, + "not starting snap brick %s:%s for " + "for the snap %s (volume: %s)", + brickinfo->hostname, brickinfo->path, + snap->snapname, origin_vol->volname); + continue; + } + + ret = glusterd_brick_start (snap_vol, brickinfo, _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "starting the " + "brick %s:%s for the snap %s (volume: %s) " + "failed", brickinfo->hostname, brickinfo->path, + snap->snapname, origin_vol->volname); + goto out; + } + } + + snap_vol->status = GLUSTERD_STATUS_STARTED; + ret = glusterd_store_volinfo (snap_vol, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to store snap volinfo"); + goto out; + } + +out: + if (ret) { + if (snap_vol) + glusterd_snap_volume_remove (rsp_dict, snap_vol, + _gf_true, _gf_true); + snap_vol = NULL; + } + + return snap_vol; +} + +/* This is a snapshot remove handler function. This function will be + * executed in the originator node. This function is responsible for + * calling mgmt v3 framework to do the actual remove on all the bricks + * + * @param req RPC request object + * @param op gluster operation + * @param dict dictionary containing snapshot remove request + * @param err_str In case of an err this string should be populated + * @param len length of err_str buffer + * + * @return Negative value on Failure and 0 in success + */ +int +glusterd_handle_snapshot_remove (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int ret = -1; + int64_t volcount = 0; + char *snapname = NULL; + char *volname = NULL; + char key[PATH_MAX] = ""; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *tmp = NULL; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT (req); + GF_ASSERT (dict); + GF_ASSERT (err_str); + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get snapname"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + snprintf (err_str, len, "Snap (%s) does not exist", snapname); + gf_log (this->name, GF_LOG_ERROR, + "%s", err_str); + ret = -1; + goto out; + } + + /* Set volnames in the dict to get mgmt_v3 lock */ + list_for_each_entry_safe (snap_vol, tmp, &snap->volumes, vol_list) { + volcount++; + volname = gf_strdup (snap_vol->parent_volname); + if (!volname) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "strdup failed"); + goto out; + } + + snprintf (key, sizeof (key), "volname%ld", volcount); + ret = dict_set_dynstr (dict, key, volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "volume name in dictionary"); + GF_FREE (volname); + goto out; + } + volname = NULL; + } + ret = dict_set_int64 (dict, "volcount", volcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set volcount"); + goto out; + } + + ret = glusterd_mgmt_v3_initiate_snap_phases (req, op, dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to initiate snap " + "phases"); + goto out; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_snapshot_remove_prevalidate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int32_t ret = -1; + char *snapname = NULL; + xlator_t *this = NULL; + glusterd_snap_t *snap = NULL; + + this = THIS; + + if (!dict || !op_errstr) { + gf_log (this->name, GF_LOG_ERROR, "input parameters NULL"); + goto out; + } + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Getting the snap name " + "failed"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + gf_log (this->name, GF_LOG_ERROR, "Snap %s does not exist", + snapname); + ret = -1; + goto out; + } + ret = 0; +out: + return ret; +} + +int +glusterd_snapshot_status_prevalidate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int ret = -1; + char *snapname = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + int32_t cmd = -1; + glusterd_volinfo_t *volinfo = NULL; + char *volname = NULL; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + + GF_ASSERT (conf); + GF_ASSERT (op_errstr); + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, "Input dict is NULL"); + goto out; + } + + ret = dict_get_int32 (dict, "cmd", &cmd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not fetch status cmd"); + goto out; + } + + switch (cmd) { + case GF_SNAP_STATUS_TYPE_ALL: + { + break; + } + case GF_SNAP_STATUS_TYPE_SNAP: + { + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not fetch snapname"); + goto out; + } + + if (!glusterd_find_snap_by_name (snapname)) { + ret = gf_asprintf (op_errstr, "Snap (%s) " + "not found", snapname); + if (ret < 0) { + goto out; + } + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Snap (%s) " + "not found", snapname); + goto out; + } + break; + } + case GF_SNAP_STATUS_TYPE_VOL: + { + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not fetch volname"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + ret = gf_asprintf (op_errstr, "Volume (%s)" + "not found", volname); + if (ret < 0) { + goto out; + } + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Volume " + "%s not present", volname); + goto out; + } + break; + + } + default: + { + gf_log (this->name, GF_LOG_ERROR, "Invalid command"); + break; + } + } + ret = 0; + +out: + return ret; +} + +int32_t +glusterd_snapshot_remove_commit (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int32_t ret = -1; + char *snapname = NULL; + char *dup_snapname = NULL; + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *snap_volinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + GF_ASSERT (op_errstr); + + priv = this->private; + GF_ASSERT (priv); + + if (!dict || !op_errstr) { + gf_log (this->name, GF_LOG_ERROR, "input parameters NULL"); + goto out; + } + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Getting the snap name " + "failed"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + gf_log (this->name, GF_LOG_ERROR, "Snap %s does not exist", + snapname); + ret = -1; + goto out; + } + + if (is_origin_glusterd (dict) == _gf_true) { + /* TODO : As of now there is only volume in snapshot. + * Change this when multiple volume snapshot is introduced + */ + snap_volinfo = list_entry (snap->volumes.next, + glusterd_volinfo_t, + vol_list); + if (!snap_volinfo) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch snap_volinfo"); + ret = -1; + goto out; + } + + /* From origin glusterd check if * + * any peers with snap bricks is down */ + ret = glusterd_find_missed_snap (rsp_dict, snap_volinfo, + snap_volinfo->volname, + &priv->peers, + GF_SNAP_OPTION_TYPE_DELETE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to find missed snap deletes"); + goto out; + } + } + + ret = glusterd_snap_remove (rsp_dict, snap, _gf_true, _gf_false); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to remove snap %s", + snapname); + goto out; + } + + dup_snapname = gf_strdup (snapname); + if (!dup_snapname) { + gf_log (this->name, GF_LOG_ERROR, "Strdup failed"); + ret = -1; + goto out; + } + + ret = dict_set_dynstr (rsp_dict, "snapname", dup_snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set the snapname"); + GF_FREE (dup_snapname); + goto out; + } + + ret = 0; +out: + return ret; +} + +int32_t +glusterd_do_snap_cleanup (dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + int32_t ret = -1; + char *name = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_snap_t *snap = NULL; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); + + if (!dict || !op_errstr) { + gf_log (this->name, GF_LOG_ERROR, "input parameters NULL"); + goto out; + } + + ret = dict_get_str (dict, "snapname", &name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "getting the snap " + "name failed (volume: %s)", volinfo->volname); + goto out; + } + + /* + If the snapname is not found that means the failure happened at + staging, or in commit, before the snap object is created, in which + case there is nothing to cleanup. So set ret to 0. + */ + snap = glusterd_find_snap_by_name (name); + if (!snap) { + gf_log (this->name, GF_LOG_INFO, "snap %s is not found", name); + ret = 0; + goto out; + } + + ret = glusterd_snap_remove (rsp_dict, snap, _gf_true, _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "removing the snap %s failed", + name); + goto out; + } + + name = NULL; + + ret = 0; + +out: + + return ret; +} + +/* In case of a successful, delete or create operation, during post_validate * + * look for missed snap operations and update the missed snap lists */ +int32_t +glusterd_snapshot_update_snaps_post_validate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int32_t ret = -1; + int32_t missed_snap_count = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + GF_ASSERT (op_errstr); + + ret = dict_get_int32 (dict, "missed_snap_count", + &missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "No missed snaps"); + ret = 0; + goto out; + } + + ret = glusterd_store_update_missed_snaps (dict, missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to update missed_snaps_list"); + goto out; + } + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_snapshot_create_commit (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int ret = -1; + int64_t i = 0; + int64_t volcount = 0; + char *snapname = NULL; + char *volname = NULL; + char *tmp_name = NULL; + char key[PATH_MAX] = ""; + xlator_t *this = NULL; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *origin_vol = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + GF_ASSERT(rsp_dict); + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_int64 (dict, "volcount", &volcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "get the volume count"); + goto out; + } + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to fetch snapname"); + goto out; + } + tmp_name = gf_strdup (snapname); + if (!tmp_name) { + gf_log (this->name, GF_LOG_ERROR, "Out of memory"); + ret = -1; + goto out; + } + + ret = dict_set_dynstr (rsp_dict, "snapname", tmp_name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set snapname in rsp_dict"); + GF_FREE (tmp_name); + goto out; + } + tmp_name = NULL; + + snap = glusterd_create_snap_object (dict, rsp_dict); + if (!snap) { + gf_log (this->name, GF_LOG_ERROR, "creating the" + "snap object %s failed", snapname); + ret = -1; + goto out; + } + + for (i = 1; i <= volcount; i++) { + snprintf (key, sizeof (key), "volname%ld", i); + ret = dict_get_str (dict, key, &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &origin_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get the volinfo for " + "the volume %s", volname); + goto out; + } + + /* TODO: Create a stub where the bricks are + added parallely by worker threads so that + the snap creating happens parallely. */ + snap_vol = glusterd_do_snap_vol (origin_vol, snap, dict, + rsp_dict, i); + if (!snap_vol) { + ret = -1; + gf_log (this->name, GF_LOG_WARNING, "taking the " + "snapshot of the volume %s failed", volname); + goto out; + } + } + + snap->snap_status = GD_SNAP_STATUS_IN_USE; + ret = glusterd_store_snap (snap); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Could not store snap" + "object %s", snap->snapname); + goto out; + } + + ret = 0; + +out: + if (ret) { + if (snap) + glusterd_snap_remove (rsp_dict, snap, + _gf_true, _gf_true); + snap = NULL; + } + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +snap_max_hard_limit_set_commit (dict_t *dict, uint64_t value, + char *volname, char **op_errstr) +{ + char err_str[PATH_MAX] = ""; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (volname); + GF_ASSERT (op_errstr); + + conf = this->private; + + GF_ASSERT (conf); + + /* TODO: Initiate auto deletion when there is a limit change */ + if (!volname) { + /* For system limit */ + conf->snap_max_hard_limit = value; + + ret = glusterd_store_global_info (this); + if (ret) { + snprintf (err_str, PATH_MAX, "Failed to store " + "snap-max-hard-limit for system"); + goto out; + } + } else { + /* For one volume */ + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, PATH_MAX, "Failed to get the" + " volinfo for volume %s", volname); + goto out; + } + + volinfo->snap_max_hard_limit = value; + + ret = glusterd_store_volinfo (volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + snprintf (err_str, PATH_MAX, "Failed to store " + "snap-max-hard-limit for volume %s", volname); + goto out; + } + } + + ret = 0; +out: + if (ret) { + *op_errstr = gf_strdup (err_str); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + } + return ret; +} + +int +snap_max_limits_display_commit (dict_t *rsp_dict, char *volname, + char **op_errstr) +{ + char err_str[PATH_MAX] = ""; + char buf[PATH_MAX] = ""; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + int ret = -1; + uint64_t active_hard_limit = 0; + uint64_t snap_max_limit = 0; + uint64_t soft_limit_value = -1; + uint64_t count = 0; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (volname); + GF_ASSERT (op_errstr); + + conf = this->private; + + GF_ASSERT (conf); + + if (!volname) { + /* For system limit */ + list_for_each_entry (volinfo, &conf->volumes, vol_list) { + if (volinfo->is_snap_volume == _gf_true) + continue; + snap_max_limit = volinfo->snap_max_hard_limit; + if (snap_max_limit > conf->snap_max_hard_limit) + active_hard_limit = conf->snap_max_hard_limit; + else + active_hard_limit = snap_max_limit; + soft_limit_value = (active_hard_limit * + conf->snap_max_soft_limit) / 100; + + snprintf (buf, sizeof(buf), "volume%ld-volname", count); + ret = dict_set_str (rsp_dict, buf, volinfo->volname); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-snap-max-hard-limit", count); + ret = dict_set_uint64 (rsp_dict, buf, snap_max_limit); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-active-hard-limit", count); + ret = dict_set_uint64 (rsp_dict, buf, + active_hard_limit); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-snap-max-soft-limit", count); + ret = dict_set_uint64 (rsp_dict, buf, soft_limit_value); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + count++; + } + + ret = dict_set_uint64 (rsp_dict, "voldisplaycount", count); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set voldisplaycount"); + goto out; + } + } else { + /* For one volume */ + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, PATH_MAX, "Failed to get the" + " volinfo for volume %s", volname); + goto out; + } + + snap_max_limit = volinfo->snap_max_hard_limit; + if (snap_max_limit > conf->snap_max_hard_limit) + active_hard_limit = conf->snap_max_hard_limit; + else + active_hard_limit = snap_max_limit; + + soft_limit_value = (active_hard_limit * + conf->snap_max_soft_limit) / 100; + + snprintf (buf, sizeof(buf), "volume%ld-volname", count); + ret = dict_set_str (rsp_dict, buf, volinfo->volname); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-snap-max-hard-limit", count); + ret = dict_set_uint64 (rsp_dict, buf, snap_max_limit); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-active-hard-limit", count); + ret = dict_set_uint64 (rsp_dict, buf, active_hard_limit); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-snap-max-soft-limit", count); + ret = dict_set_uint64 (rsp_dict, buf, soft_limit_value); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + + count++; + + ret = dict_set_uint64 (rsp_dict, "voldisplaycount", count); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set voldisplaycount"); + goto out; + } + + } + + ret = dict_set_uint64 (rsp_dict, "snap-max-hard-limit", + conf->snap_max_hard_limit); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set sys-snap-max-hard-limit "); + goto out; + } + + ret = dict_set_uint64 (rsp_dict, "snap-max-soft-limit", + conf->snap_max_soft_limit); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set sys-snap-max-hard-limit "); + goto out; + } + + ret = 0; +out: + if (ret) { + *op_errstr = gf_strdup (err_str); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + } + return ret; +} + +int +glusterd_snapshot_config_commit (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + char *volname = NULL; + xlator_t *this = NULL; + int ret = -1; + char err_str[PATH_MAX] = {0,}; + glusterd_conf_t *conf = NULL; + int config_command = 0; + uint64_t hard_limit = 0; + uint64_t soft_limit = 0; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + conf = this->private; + + GF_ASSERT (conf); + + ret = dict_get_int32 (dict, "config-command", &config_command); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get config-command type"); + goto out; + } + + /* Ignore the return value of the following dict_get, + * as they are optional + */ + ret = dict_get_str (dict, "volname", &volname); + + ret = dict_get_uint64 (dict, "snap-max-hard-limit", &hard_limit); + + ret = dict_get_uint64 (dict, "snap-max-soft-limit", &soft_limit); + + switch (config_command) { + case GF_SNAP_CONFIG_TYPE_SET: + if (hard_limit) { + /* Commit ops for snap-max-hard-limit */ + ret = snap_max_hard_limit_set_commit (dict, hard_limit, + volname, + op_errstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "snap-max-hard-limit set " + "commit failed."); + goto out; + } + } + + if (soft_limit) { + /* For system limit */ + conf->snap_max_soft_limit = soft_limit; + + ret = glusterd_store_global_info (this); + if (ret) { + snprintf (err_str, PATH_MAX, "Failed to store " + "snap-max-soft-limit for system"); + *op_errstr = gf_strdup (err_str); + gf_log (this->name, GF_LOG_ERROR, "%s", + err_str); + goto out; + } + } + break; + + case GF_SNAP_CONFIG_DISPLAY: + /* Reading data from local node only */ + if (!is_origin_glusterd (dict)) { + ret = 0; + break; + } + + ret = snap_max_limits_display_commit (rsp_dict, volname, + op_errstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "snap-max-limit " + "display commit failed."); + goto out; + } + break; + default: + break; + } + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_get_brick_lvm_details (dict_t *rsp_dict, + glusterd_brickinfo_t *brickinfo, char *volname, + char *device, char *key_prefix) +{ + + int ret = -1; + glusterd_conf_t *priv = NULL; + runner_t runner = {0,}; + xlator_t *this = NULL; + char msg[PATH_MAX] = ""; + char buf[PATH_MAX] = ""; + char *ptr = NULL; + char *token = NULL; + char key[PATH_MAX] = ""; + char *value = NULL; + + GF_ASSERT (rsp_dict); + GF_ASSERT (brickinfo); + GF_ASSERT (volname); + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + device = glusterd_get_brick_mount_details (brickinfo); + if (!device) { + gf_log (this->name, GF_LOG_ERROR, "Getting device name for " + "the brick %s:%s failed", brickinfo->hostname, + brickinfo->path); + goto out; + } + runinit (&runner); + snprintf (msg, sizeof (msg), "running lvs command, " + "for getting snap status"); + /* Using lvs command fetch the Volume Group name, + * Percentage of data filled and Logical Volume size + * + * "-o" argument is used to get the desired information, + * example : "lvs /dev/VolGroup/thin_vol -o vgname,lv_size", + * will get us Volume Group name and Logical Volume size. + * + * Here separator used is ":", + * for the above given command with separator ":", + * The output will be "vgname:lvsize" + */ + runner_add_args (&runner, "lvs", device, "--noheading", "-o", + "vg_name,data_percent,lv_size", + "--separator", ":", NULL); + runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); + runner_log (&runner, "", GF_LOG_DEBUG, msg); + ret = runner_start (&runner); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not perform lvs action"); + goto end; + } + do { + ptr = fgets (buf, sizeof (buf), + runner_chio (&runner, STDOUT_FILENO)); + + if (ptr == NULL) + break; + token = strtok (buf, ":"); + if (token != NULL) { + while (token && token[0] == ' ') + token++; + if (!token) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Invalid vg entry"); + goto end; + } + value = gf_strdup (token); + if (!value) { + ret = -1; + goto end; + } + ret = snprintf (key, sizeof (key), "%s.vgname", + key_prefix); + if (ret < 0) { + goto end; + } + + ret = dict_set_dynstr (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save vgname "); + goto end; + } + } + + token = strtok (NULL, ":"); + if (token != NULL) { + value = gf_strdup (token); + if (!value) { + ret = -1; + goto end; + } + ret = snprintf (key, sizeof (key), "%s.data", + key_prefix); + if (ret < 0) { + goto end; + } + + ret = dict_set_dynstr (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save data percent "); + goto end; + } + } + token = strtok (NULL, ":"); + if (token != NULL) { + value = gf_strdup (token); + if (!value) { + ret = -1; + goto end; + } + ret = snprintf (key, sizeof (key), "%s.lvsize", + key_prefix); + if (ret < 0) { + goto end; + } + + ret = dict_set_dynstr (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save meta data percent "); + goto end; + } + } + + } while (ptr != NULL); + + ret = 0; + +end: + runner_end (&runner); + +out: + if (ret && value) { + GF_FREE (value); + } + + return ret; +} + +int +glusterd_get_single_brick_status (char **op_errstr, dict_t *rsp_dict, + char *keyprefix, int index, + glusterd_volinfo_t *snap_volinfo, + glusterd_brickinfo_t *brickinfo) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + char key[PATH_MAX] = ""; + char *device = NULL; + char *value = NULL; + char brick_path[PATH_MAX] = ""; + char pidfile[PATH_MAX] = ""; + pid_t pid = -1; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + GF_ASSERT (keyprefix); + GF_ASSERT (snap_volinfo); + GF_ASSERT (brickinfo); + + ret = snprintf (key, sizeof (key), "%s.brick%d.path", keyprefix, + index); + if (ret < 0) { + goto out; + } + + ret = snprintf (brick_path, sizeof (brick_path), + "%s:%s", brickinfo->hostname, brickinfo->path); + if (ret < 0) { + goto out; + } + + value = gf_strdup (brick_path); + if (!value) { + ret = -1; + goto out; + } + + ret = dict_set_dynstr (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to store " + "brick_path %s", brickinfo->path); + goto out; + } + + if (brickinfo->snap_status == -1) { + /* Setting vgname as "Pending Snapshot" */ + value = gf_strdup ("Pending Snapshot"); + if (!value) { + ret = -1; + goto out; + } + + snprintf (key, sizeof (key), "%s.brick%d.vgname", + keyprefix, index); + ret = dict_set_dynstr (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save vgname "); + goto out; + } + + ret = 0; + goto out; + } + value = NULL; + + ret = snprintf (key, sizeof (key), "%s.brick%d.status", + keyprefix, index); + if (ret < 0) { + goto out; + } + + if (brickinfo->status == GF_BRICK_STOPPED) { + value = gf_strdup ("No"); + if (!value) { + ret = -1; + goto out; + } + ret = dict_set_str (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save brick status"); + goto out; + } + value = NULL; + } else { + value = gf_strdup ("Yes"); + if (!value) { + ret = -1; + goto out; + } + ret = dict_set_str (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save brick status"); + goto out; + } + value = NULL; + + GLUSTERD_GET_BRICK_PIDFILE (pidfile, snap_volinfo, + brickinfo, priv); + ret = glusterd_is_service_running (pidfile, &pid); + + ret = snprintf (key, sizeof (key), "%s.brick%d.pid", + keyprefix, index); + if (ret < 0) { + goto out; + } + + ret = dict_set_int32 (rsp_dict, key, pid); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save pid %d", pid); + goto out; + } + } + + ret = snprintf (key, sizeof (key), "%s.brick%d", + keyprefix, index); + if (ret < 0) { + goto out; + } + + ret = glusterd_get_brick_lvm_details (rsp_dict, brickinfo, + snap_volinfo->volname, + device, key); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "brick LVM details"); + goto out; + } +out: + if (ret && value) { + GF_FREE (value); + } + + return ret; +} + +int +glusterd_get_single_snap_status (char **op_errstr, dict_t *rsp_dict, + char *keyprefix, glusterd_snap_t *snap) +{ + int ret = -1; + xlator_t *this = NULL; + char key[PATH_MAX] = ""; + char brickkey[PATH_MAX] = ""; + glusterd_volinfo_t *snap_volinfo = NULL; + glusterd_volinfo_t *tmp_volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + int volcount = 0; + int brickcount = 0; + + this = THIS; + GF_ASSERT (this); + + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + GF_ASSERT (keyprefix); + GF_ASSERT (snap); + + list_for_each_entry_safe (snap_volinfo, tmp_volinfo, &snap->volumes, + vol_list) { + ret = snprintf (key, sizeof (key), "%s.vol%d", keyprefix, + volcount); + if (ret < 0) { + goto out; + } + list_for_each_entry (brickinfo, &snap_volinfo->bricks, + brick_list) { + if (!glusterd_is_local_brick (this, snap_volinfo, + brickinfo)) { + brickcount++; + continue; + } + + ret = glusterd_get_single_brick_status (op_errstr, + rsp_dict, key, brickcount, + snap_volinfo, brickinfo); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Getting " + "single snap status failed"); + goto out; + } + brickcount++; + } + ret = snprintf (brickkey, sizeof (brickkey), "%s.brickcount", + key); + if (ret < 0) { + goto out; + } + + ret = dict_set_int32 (rsp_dict, brickkey, brickcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save brick count"); + goto out; + } + volcount++; + } + + ret = snprintf (key, sizeof (key), "%s.volcount", keyprefix); + if (ret < 0) { + goto out; + } + + ret = dict_set_int32 (rsp_dict, key, volcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save volcount"); + goto out; + } + +out: + + return ret; +} + +int +glusterd_get_each_snap_object_status (char **op_errstr, dict_t *rsp_dict, + glusterd_snap_t *snap, char *keyprefix) +{ + int ret = -1; + char key[PATH_MAX] = ""; + char *temp = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + GF_ASSERT (snap); + GF_ASSERT (keyprefix); + + /* TODO : Get all the snap volume info present in snap object, + * as of now, There will be only one snapvolinfo per snap object + */ + ret = snprintf (key, sizeof (key), "%s.snapname", keyprefix); + if (ret < 0) { + goto out; + } + + temp = gf_strdup (snap->snapname); + if (temp == NULL) { + ret = -1; + goto out; + } + ret = dict_set_dynstr (rsp_dict, key, temp); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not save " + "snap name"); + goto out; + } + + temp = NULL; + + ret = snprintf (key, sizeof (key), "%s.uuid", keyprefix); + if (ret < 0) { + goto out; + } + + temp = gf_strdup (uuid_utoa (snap->snap_id)); + if (temp == NULL) { + ret = -1; + goto out; + } + + ret = dict_set_dynstr (rsp_dict, key, temp); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not save " + "snap UUID"); + goto out; + } + + temp = NULL; + + ret = glusterd_get_single_snap_status (op_errstr, rsp_dict, keyprefix, + snap); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not get single snap status"); + goto out; + } + + ret = snprintf (key, sizeof (key), "%s.volcount", keyprefix); + if (ret < 0) { + goto out; + } + + ret = dict_set_int32 (rsp_dict, key, 1); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not save volcount"); + goto out; + } +out: + if (ret && temp) + GF_FREE (temp); + + return ret; +} + +int +glusterd_get_snap_status_of_volume (char **op_errstr, dict_t *rsp_dict, + char *volname, char *keyprefix) { + int ret = -1; + glusterd_volinfo_t *snap_volinfo = NULL; + glusterd_volinfo_t *temp_volinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + char key[PATH_MAX] = ""; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int i = 0; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + GF_ASSERT (volname); + GF_ASSERT (keyprefix); + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get volinfo of " + "volume %s", volname); + goto out; + } + + list_for_each_entry_safe (snap_volinfo, temp_volinfo, + &volinfo->snap_volumes, snapvol_list) { + ret = snprintf (key, sizeof (key), "status.snap%d", i); + if (ret < 0) { + goto out; + } + + ret = glusterd_get_each_snap_object_status (op_errstr, + rsp_dict, snap_volinfo->snapshot, key); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Function : " + "glusterd_get_single_snap_status failed"); + goto out; + } + i++; + } + + ret = dict_set_int32 (rsp_dict, "status.snapcount", i); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to save snapcount"); + ret = -1; + goto out; + } +out: + return ret; +} + +int +glusterd_get_all_snapshot_status (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int32_t i = 0; + int ret = -1; + char key[PATH_MAX] = ""; + glusterd_conf_t *priv = NULL; + glusterd_snap_t *snap = NULL; + glusterd_snap_t *tmp_snap = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + list_for_each_entry_safe (snap, tmp_snap, + &priv->snapshots, snap_list) { + ret = snprintf (key, sizeof (key), "status.snap%d", i); + if (ret < 0) { + goto out; + } + + ret = glusterd_get_each_snap_object_status (op_errstr, + rsp_dict, snap, key); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not get " + "the details of a snap object: %s", + snap->snapname); + goto out; + } + i++; + } + + ret = dict_set_int32 (rsp_dict, "status.snapcount", i); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not save snapcount"); + goto out; + } + + ret = 0; +out: + return ret; +} + + +int +glusterd_snapshot_status_commit (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + xlator_t *this = NULL; + int ret = -1; + glusterd_conf_t *conf = NULL; + char *get_buffer = NULL; + int32_t cmd = -1; + char *snapname = NULL; + glusterd_snap_t *snap = NULL; + char *volname = NULL; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + conf = this->private; + + GF_ASSERT (conf); + ret = dict_get_int32 (dict, "cmd", &cmd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get status cmd type"); + goto out; + } + + ret = dict_set_int32 (rsp_dict, "cmd", cmd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save status cmd in rsp dictionary"); + goto out; + } + switch (cmd) { + case GF_SNAP_STATUS_TYPE_ALL: + { + ret = glusterd_get_all_snapshot_status (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to " + "get snapshot status"); + goto out; + } + break; + } + case GF_SNAP_STATUS_TYPE_SNAP: + { + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to " + "get snap name"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + ret = gf_asprintf (op_errstr, "Snap (%s) " + "not found", snapname); + if (ret < 0) { + goto out; + } + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Unable to " + "get snap volinfo"); + goto out; + } + ret = glusterd_get_each_snap_object_status (op_errstr, + rsp_dict, snap, "status.snap0"); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to " + "get status of snap %s", get_buffer); + goto out; + } + break; + } + case GF_SNAP_STATUS_TYPE_VOL: + { + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to" + " get volume name"); + goto out; + } + + ret = glusterd_get_snap_status_of_volume (op_errstr, + rsp_dict, volname, "status.vol0"); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Function :" + " glusterd_get_snap_status_of_volume " + "failed"); + goto out; + } + } + } + ret = 0; +out: + return ret; +} + +int32_t +glusterd_snapshot_create_postvalidate (dict_t *dict, int32_t op_ret, + char **op_errstr, dict_t *rsp_dict) +{ + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int ret = -1; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + + priv = this->private; + GF_ASSERT (priv); + + if (op_ret) { + ret = glusterd_do_snap_cleanup (dict, op_errstr, rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "cleanup operation " + "failed"); + goto out; + } + } else { + ret = glusterd_snapshot_update_snaps_post_validate (dict, + op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "create snapshot"); + goto out; + } + } + + ret = 0; +out: + return ret; +} + +int32_t +glusterd_snapshot (dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int32_t snap_command = 0; + int ret = -1; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_int32 (dict, "type", &snap_command); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case (GF_SNAP_OPTION_TYPE_CREATE): + ret = glusterd_snapshot_create_commit (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "create snapshot"); + goto out; + } + break; + + case GF_SNAP_OPTION_TYPE_CONFIG: + ret = glusterd_snapshot_config_commit (dict, op_errstr, + rsp_dict); + break; + + case GF_SNAP_OPTION_TYPE_DELETE: + ret = glusterd_snapshot_remove_commit (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "delete snapshot"); + goto out; + } + break; + + case GF_SNAP_OPTION_TYPE_RESTORE: + ret = glusterd_snapshot_restore (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Failed to " + "restore snapshot"); + goto out; + } + + break; + + case GF_SNAP_OPTION_TYPE_STATUS: + ret = glusterd_snapshot_status_commit (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "show snapshot status"); + goto out; + } + break; + + + default: + gf_log (this->name, GF_LOG_WARNING, "invalid snap command"); + goto out; + break; + } + + ret = 0; + +out: + return ret; +} + +int +glusterd_snapshot_brickop (dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + int ret = -1; + int64_t vol_count = 0; + int64_t count = 1; + char key[1024] = {0,}; + char *volname = NULL; + int32_t snap_command = 0; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + + ret = dict_get_int32 (dict, "type", &snap_command); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case GF_SNAP_OPTION_TYPE_CREATE: + ret = dict_get_int64 (dict, "volcount", &vol_count); + if (ret) + goto out; + while (count <= vol_count) { + snprintf (key, 1024, "volname%"PRId64, count); + ret = dict_get_str (dict, key, &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to get volname"); + goto out; + } + ret = dict_set_str (dict, "volname", volname); + if (ret) + goto out; + + ret = gd_brick_op_phase (GD_OP_SNAP, NULL, dict, + op_errstr); + if (ret) + goto out; + volname = NULL; + count++; + } + + dict_del (dict, "volname"); + ret = 0; + break; + case GF_SNAP_OPTION_TYPE_DELETE: + break; + default: + break; + } + +out: + return ret; +} + +int +glusterd_snapshot_prevalidate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int snap_command = 0; + xlator_t *this = NULL; + int ret = -1; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + + ret = dict_get_int32 (dict, "type", &snap_command); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case (GF_SNAP_OPTION_TYPE_CREATE): + ret = glusterd_snapshot_create_prevalidate (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot create " + "pre-validation failed"); + goto out; + } + break; + + case (GF_SNAP_OPTION_TYPE_CONFIG): + ret = glusterd_snapshot_config_prevalidate (dict, op_errstr); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot config " + "pre-validation failed"); + goto out; + } + break; + + case GF_SNAP_OPTION_TYPE_RESTORE: + ret = glusterd_snapshot_restore_prevalidate (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot restore " + "validation failed"); + goto out; + } + break; + case GF_SNAP_OPTION_TYPE_DELETE: + ret = glusterd_snapshot_remove_prevalidate (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot remove " + "validation failed"); + goto out; + } + break; + + case GF_SNAP_OPTION_TYPE_STATUS: + ret = glusterd_snapshot_status_prevalidate (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot status " + "validation failed"); + goto out; + } + break; + + default: + gf_log (this->name, GF_LOG_WARNING, "invalid snap command"); + goto out; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_snapshot_postvalidate (dict_t *dict, int32_t op_ret, char **op_errstr, + dict_t *rsp_dict) +{ + int snap_command = 0; + xlator_t *this = NULL; + int ret = -1; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + + ret = dict_get_int32 (dict, "type", &snap_command); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case GF_SNAP_OPTION_TYPE_CREATE: + ret = glusterd_snapshot_create_postvalidate (dict, op_ret, + op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot create " + "post-validation failed"); + goto out; + } + break; + + case GF_SNAP_OPTION_TYPE_DELETE: + case GF_SNAP_OPTION_TYPE_RESTORE: + ret = glusterd_snapshot_update_snaps_post_validate (dict, + op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "update missed snaps list"); + goto out; + } + break; + + default: + gf_log (this->name, GF_LOG_WARNING, "invalid snap command"); + goto out; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_handle_snapshot_fn (rpcsvc_request_t *req) +{ + int32_t ret = 0; + dict_t *dict = NULL; + gf_cli_req cli_req = {{0},}; + glusterd_op_t cli_op = GD_OP_SNAP; + int type = 0; + glusterd_conf_t *conf = NULL; + char *host_uuid = NULL; + char err_str[2048] = {0,}; + xlator_t *this = NULL; + char *volname = NULL; + + GF_ASSERT (req); + + this = THIS; + GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); + + ret = xdr_to_generic (req->msg[0], &cli_req, + (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (cli_req.dict.dict_len > 0) { + dict = dict_new (); + if (!dict) + goto out; + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "unserialize req-buffer to dictionary"); + snprintf (err_str, sizeof (err_str), "Unable to decode " + "the command"); + goto out; + } + + dict->extra_stdfree = cli_req.dict.dict_val; + + host_uuid = gf_strdup (uuid_utoa(MY_UUID)); + if (host_uuid == NULL) { + snprintf (err_str, sizeof (err_str), "Failed to get " + "the uuid of local glusterd"); + ret = -1; + goto out; + } + ret = dict_set_dynstr (dict, "host-uuid", host_uuid); + if (ret) { + GF_FREE (host_uuid); + goto out; + } + + + } else { + gf_log (this->name, GF_LOG_ERROR, "request dict length is %d", + cli_req.dict.dict_len); + goto out; + } + + ret = dict_get_int32 (dict, "type", &type); + if (ret < 0) { + snprintf (err_str, sizeof (err_str), "Command type not found"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + switch (type) { + case GF_SNAP_OPTION_TYPE_CREATE: + ret = glusterd_handle_snapshot_create (req, cli_op, dict, + err_str, sizeof (err_str)); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot create " + "failed: %s", err_str); + } + break; + case GF_SNAP_OPTION_TYPE_RESTORE: + ret = glusterd_handle_snapshot_restore (req, cli_op, dict, + err_str, sizeof (err_str)); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot restore " + "failed: %s", err_str); + } + + break; + case GF_SNAP_OPTION_TYPE_INFO: + ret = glusterd_handle_snapshot_info (req, cli_op, dict, + err_str, sizeof (err_str)); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot info " + "failed"); + } + break; + case GF_SNAP_OPTION_TYPE_LIST: + ret = glusterd_handle_snapshot_list (req, cli_op, dict, + err_str, sizeof (err_str)); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot list " + "failed"); + } + break; + case GF_SNAP_OPTION_TYPE_CONFIG: + /* TODO : Type of lock to be taken when we are setting + * limits system wide + */ + ret = dict_get_str (dict, "volname", &volname); + if (!volname) { + ret = dict_set_int32 (dict, "hold_vol_locks", + _gf_false); + if (ret) { + gf_log ("cli", GF_LOG_ERROR, + "Unable to set hold_vol_locks value " + "as _gf_false"); + goto out; + } + + } + ret = glusterd_mgmt_v3_initiate_all_phases (req, cli_op, dict); + break; + case GF_SNAP_OPTION_TYPE_DELETE: + ret = glusterd_handle_snapshot_remove (req, cli_op, dict, + err_str, + sizeof (err_str)); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot delete " + "failed: %s", err_str); + } + break; + case GF_SNAP_OPTION_TYPE_START: + case GF_SNAP_OPTION_TYPE_STOP: + case GF_SNAP_OPTION_TYPE_STATUS: + ret = glusterd_handle_snapshot_status (req, cli_op, dict, + err_str, + sizeof (err_str)); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot status " + "failed: %s", err_str); + } + break; + default: + gf_log (this->name, GF_LOG_ERROR, "Unkown snapshot request " + "type (%d)", type); + ret = -1; /* Failure */ + } + +out: + if (ret) { + if (err_str[0] == '\0') + snprintf (err_str, sizeof (err_str), + "Operation failed"); + ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, + dict, err_str); + } + + return ret; +} + +int +glusterd_handle_snapshot (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, glusterd_handle_snapshot_fn); +} + +static inline void +glusterd_free_snap_op (glusterd_snap_op_t *snap_op) +{ + if (snap_op) { + if (snap_op->brick_path) + GF_FREE (snap_op->brick_path); + + GF_FREE (snap_op); + } +} + +/* Look for duplicates and accordingly update the list */ +int32_t +glusterd_update_missed_snap_entry (glusterd_missed_snap_info *missed_snapinfo, + glusterd_snap_op_t *missed_snap_op) +{ + int32_t ret = -1; + glusterd_snap_op_t *snap_opinfo = NULL; + gf_boolean_t match = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(missed_snapinfo); + GF_ASSERT(missed_snap_op); + + list_for_each_entry (snap_opinfo, &missed_snapinfo->snap_ops, + snap_ops_list) { + if ((!strcmp (snap_opinfo->brick_path, + missed_snap_op->brick_path)) && + (snap_opinfo->op == missed_snap_op->op)) { + /* If two entries have conflicting status + * GD_MISSED_SNAP_DONE takes precedence + */ + if ((snap_opinfo->status == GD_MISSED_SNAP_PENDING) && + (missed_snap_op->status == GD_MISSED_SNAP_DONE)) { + snap_opinfo->status = GD_MISSED_SNAP_DONE; + gf_log (this->name, GF_LOG_INFO, + "Updating missed snap status " + "for %s:%d:%s:%d as DONE", + missed_snapinfo->node_snap_info, + snap_opinfo->brick_num, + snap_opinfo->brick_path, + snap_opinfo->op); + ret = 0; + glusterd_free_snap_op (missed_snap_op); + goto out; + } + match = _gf_true; + break; + } else if ((snap_opinfo->brick_num == + missed_snap_op->brick_num) && + (snap_opinfo->op == GF_SNAP_OPTION_TYPE_CREATE) && + (missed_snap_op->op == + GF_SNAP_OPTION_TYPE_DELETE)) { + /* Optimizing create and delete entries for the same + * brick and same node + */ + gf_log (this->name, GF_LOG_INFO, + "Updating missed snap status " + "for %s:%d:%s:%d as DONE", + missed_snapinfo->node_snap_info, + snap_opinfo->brick_num, + snap_opinfo->brick_path, + snap_opinfo->op); + snap_opinfo->status = GD_MISSED_SNAP_DONE; + ret = 0; + glusterd_free_snap_op (missed_snap_op); + goto out; + } + } + + if (match == _gf_true) { + gf_log (this->name, GF_LOG_INFO, + "Duplicate entry. Not updating"); + glusterd_free_snap_op (missed_snap_op); + } else { + list_add_tail (&missed_snap_op->snap_ops_list, + &missed_snapinfo->snap_ops); + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Add new missed snap entry to the missed_snaps list. */ +int32_t +glusterd_store_missed_snaps_list (char *missed_info, int32_t brick_num, + char *brick_path, int32_t snap_op, + int32_t snap_status) +{ + int32_t ret = -1; + glusterd_missed_snap_info *missed_snapinfo = NULL; + glusterd_snap_op_t *missed_snap_op = NULL; + glusterd_conf_t *priv = NULL; + gf_boolean_t match = _gf_false; + gf_boolean_t free_missed_snap_info = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(missed_info); + GF_ASSERT(brick_path); + + priv = this->private; + GF_ASSERT (priv); + + /* Create the snap_op object consisting of the * + * snap id and the op */ + ret = glusterd_missed_snap_op_new (&missed_snap_op); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create new missed snap object."); + ret = -1; + goto out; + } + + missed_snap_op->brick_path = gf_strdup(brick_path); + if (!missed_snap_op->brick_path) { + ret = -1; + goto out; + } + missed_snap_op->brick_num = brick_num; + missed_snap_op->op = snap_op; + missed_snap_op->status = snap_status; + + /* Look for other entries for the same node and same snap */ + list_for_each_entry (missed_snapinfo, &priv->missed_snaps_list, + missed_snaps) { + if (!strcmp (missed_snapinfo->node_snap_info, + missed_info)) { + /* Found missed snapshot info for * + * the same node and same snap */ + match = _gf_true; + break; + } + } + + if (match == _gf_false) { + /* First snap op missed for the brick */ + ret = glusterd_missed_snapinfo_new (&missed_snapinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create missed snapinfo"); + goto out; + } + free_missed_snap_info = _gf_true; + missed_snapinfo->node_snap_info = gf_strdup(missed_info); + if (!missed_snapinfo->node_snap_info) { + ret = -1; + goto out; + } + + list_add_tail (&missed_snap_op->snap_ops_list, + &missed_snapinfo->snap_ops); + list_add_tail (&missed_snapinfo->missed_snaps, + &priv->missed_snaps_list); + + ret = 0; + goto out; + } else { + ret = glusterd_update_missed_snap_entry (missed_snapinfo, + missed_snap_op); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to update existing missed snap entry."); + goto out; + } + } + +out: + if (ret) { + glusterd_free_snap_op (missed_snap_op); + + if (missed_snapinfo && + (free_missed_snap_info == _gf_true)) { + if (missed_snapinfo->node_snap_info) + GF_FREE (missed_snapinfo->node_snap_info); + + GF_FREE (missed_snapinfo); + } + } + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Add missing snap entries to the in-memory conf->missed_snap_list */ +int32_t +glusterd_add_missed_snaps_to_list (dict_t *dict, int32_t missed_snap_count) +{ + char *buf = NULL; + char *tmp = NULL; + char *save_ptr = NULL; + char *nodeid = NULL; + char *snap_uuid = NULL; + char *brick_path = NULL; + char missed_info[PATH_MAX] = ""; + char name_buf[PATH_MAX] = ""; + int32_t i = -1; + int32_t ret = -1; + int32_t brick_num = -1; + int32_t snap_op = -1; + int32_t snap_status = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + + priv = this->private; + GF_ASSERT (priv); + + /* We can update the missed_snaps_list without acquiring * + * any additional locks as big lock will be held. */ + for (i = 0; i < missed_snap_count; i++) { + snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", + i); + ret = dict_get_str (dict, name_buf, &buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch %s", name_buf); + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "missed_snap_entry = %s", + buf); + + /* Need to make a duplicate string coz the same dictionary * + * is resent to the non-originator nodes */ + tmp = gf_strdup (buf); + if (!tmp) { + ret = -1; + goto out; + } + + /* Fetch the node-id, snap-id, brick_num, + * brick_path, snap_op and snap status + */ + nodeid = strtok_r (tmp, ":", &save_ptr); + snap_uuid = strtok_r (NULL, "=", &save_ptr); + brick_num = atoi(strtok_r (NULL, ":", &save_ptr)); + brick_path = strtok_r (NULL, ":", &save_ptr); + snap_op = atoi(strtok_r (NULL, ":", &save_ptr)); + snap_status = atoi(strtok_r (NULL, ":", &save_ptr)); + + if (!nodeid || !snap_uuid || !brick_path || + brick_num < 1 || snap_op < 1 || + snap_status < 1) { + gf_log (this->name, GF_LOG_ERROR, + "Invalid missed_snap_entry"); + ret = -1; + goto out; + } + + snprintf (missed_info, sizeof(missed_info), "%s:%s", + nodeid, snap_uuid); + + ret = glusterd_store_missed_snaps_list (missed_info, + brick_num, + brick_path, + snap_op, + snap_status); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to store missed snaps_list"); + goto out; + } + + GF_FREE (tmp); + tmp = NULL; + } + +out: + if (tmp) + GF_FREE (tmp); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 085e3e85d..1c2ec58e8 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -1,5 +1,5 @@ /* - Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com> + Copyright (c) 2007-2013 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. This file is licensed to you under your choice of the GNU Lesser @@ -35,6 +35,7 @@ #include "glusterd-op-sm.h" #include "glusterd-utils.h" #include "glusterd-hooks.h" +#include "store.h" #include "glusterd-store.h" #include "rpc-clnt.h" @@ -44,151 +45,8 @@ #include <inttypes.h> #include <dirent.h> -static int32_t -glusterd_store_mkdir (char *path) -{ - int32_t ret = -1; - - ret = mkdir (path, 0777); - - if ((-1 == ret) && (EEXIST != errno)) { - gf_log (THIS->name, GF_LOG_ERROR, "mkdir() failed on path %s," - "errno: %s", path, strerror (errno)); - } else { - ret = 0; - } - - return ret; -} - -int32_t -glusterd_store_handle_create_on_absence (glusterd_store_handle_t **shandle, - char *path) -{ - GF_ASSERT (shandle); - int32_t ret = 0; - - if (*shandle == NULL) { - ret = glusterd_store_handle_new (path, shandle); - - if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, "Unable to create " - "store handle for path: %s", path); - } - } - return ret; -} - -int32_t -glusterd_store_mkstemp (glusterd_store_handle_t *shandle) -{ - int fd = -1; - char tmppath[PATH_MAX] = {0,}; - - GF_ASSERT (shandle); - GF_ASSERT (shandle->path); - - snprintf (tmppath, sizeof (tmppath), "%s.tmp", shandle->path); - fd = open (tmppath, O_RDWR | O_CREAT | O_TRUNC | O_SYNC, 0600); - if (fd <= 0) { - gf_log (THIS->name, GF_LOG_ERROR, "Failed to open %s, " - "error: %s", tmppath, strerror (errno)); - } - - return fd; -} - -int -glusterd_store_sync_direntry (char *path) -{ - int ret = -1; - int dirfd = -1; - char *dir = NULL; - char *pdir = NULL; - xlator_t *this = NULL; - - this = THIS; - - dir = gf_strdup (path); - if (!dir) - goto out; - - pdir = dirname (dir); - dirfd = open (pdir, O_RDONLY); - if (dirfd == -1) { - gf_log (this->name, GF_LOG_ERROR, "Failed to open directory " - "%s, due to %s", pdir, strerror (errno)); - goto out; - } - - ret = fsync (dirfd); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Failed to fsync %s, due to " - "%s", pdir, strerror (errno)); - goto out; - } - - ret = 0; -out: - if (dirfd >= 0) { - ret = close (dirfd); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Failed to close " - "%s, due to %s", pdir, strerror (errno)); - } - } - - if (dir) - GF_FREE (dir); - - return ret; -} - -int32_t -glusterd_store_rename_tmppath (glusterd_store_handle_t *shandle) -{ - int32_t ret = -1; - char tmppath[PATH_MAX] = {0,}; - - GF_ASSERT (shandle); - GF_ASSERT (shandle->path); - - snprintf (tmppath, sizeof (tmppath), "%s.tmp", shandle->path); - ret = rename (tmppath, shandle->path); - if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, "Failed to rename %s to %s, " - "error: %s", tmppath, shandle->path, strerror (errno)); - goto out; - } - - ret = glusterd_store_sync_direntry (tmppath); -out: - return ret; -} - -int32_t -glusterd_store_unlink_tmppath (glusterd_store_handle_t *shandle) -{ - int32_t ret = -1; - char tmppath[PATH_MAX] = {0,}; - - GF_ASSERT (shandle); - GF_ASSERT (shandle->path); - - snprintf (tmppath, sizeof (tmppath), "%s.tmp", shandle->path); - ret = unlink (tmppath); - if (ret && (errno != ENOENT)) { - gf_log (THIS->name, GF_LOG_ERROR, "Failed to mv %s to %s, " - "error: %s", tmppath, shandle->path, strerror (errno)); - } else { - ret = 0; - } - - return ret; -} - -static void -glusterd_replace_slash_with_hipen (char *str) +void +glusterd_replace_slash_with_hyphen (char *str) { char *ptr = NULL; @@ -213,7 +71,7 @@ glusterd_store_create_brick_dir (glusterd_volinfo_t *volinfo) GF_ASSERT (priv); GLUSTERD_GET_BRICK_DIR (brickdirpath, volinfo, priv); - ret = glusterd_store_mkdir (brickdirpath); + ret = gf_store_mkdir (brickdirpath); return ret; } @@ -227,7 +85,7 @@ glusterd_store_key_vol_brick_set (glusterd_brickinfo_t *brickinfo, GF_ASSERT (len >= PATH_MAX); snprintf (key_vol_brick, len, "%s", brickinfo->path); - glusterd_replace_slash_with_hipen (key_vol_brick); + glusterd_replace_slash_with_hyphen (key_vol_brick); } static void @@ -326,7 +184,7 @@ glusterd_store_volinfo_brick_fname_write (int vol_fd, brick_count); glusterd_store_brickinfofname_set (brickinfo, brickfname, sizeof (brickfname)); - ret = glusterd_store_save_value (vol_fd, key, brickfname); + ret = gf_store_save_value (vol_fd, key, brickfname); if (ret) goto out; @@ -345,9 +203,9 @@ glusterd_store_create_brick_shandle_on_absence (glusterd_volinfo_t *volinfo, GF_ASSERT (brickinfo); glusterd_store_brickinfopath_set (volinfo, brickinfo, brickpath, - sizeof (brickpath)); - ret = glusterd_store_handle_create_on_absence (&brickinfo->shandle, - brickpath); + sizeof (brickpath)); + ret = gf_store_handle_create_on_absence (&brickinfo->shandle, + brickpath); return ret; } @@ -360,30 +218,48 @@ glusterd_store_brickinfo_write (int fd, glusterd_brickinfo_t *brickinfo) GF_ASSERT (brickinfo); GF_ASSERT (fd > 0); - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_HOSTNAME, - brickinfo->hostname); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_HOSTNAME, + brickinfo->hostname); if (ret) goto out; - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_PATH, - brickinfo->path); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_PATH, + brickinfo->path); if (ret) goto out; snprintf (value, sizeof(value), "%d", brickinfo->port); - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_PORT, - value); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_PORT, value); snprintf (value, sizeof(value), "%d", brickinfo->rdma_port); - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_RDMA_PORT, - value); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_RDMA_PORT, + value); snprintf (value, sizeof(value), "%d", brickinfo->decommissioned); - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED, - value); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED, + value); if (ret) goto out; + if (strlen(brickinfo->device_path) > 0) { + snprintf (value, sizeof(value), "%s", brickinfo->device_path); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH, + value); + if (ret) + goto out; + } + + snprintf (value, sizeof(value), "%d", brickinfo->snap_status); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS, + value); + if (ret) + goto out; + + if (!brickinfo->vg[0]) + goto out; + + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_VGNAME, + brickinfo->vg); out: gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; @@ -396,7 +272,7 @@ glusterd_store_perform_brick_store (glusterd_brickinfo_t *brickinfo) int32_t ret = -1; GF_ASSERT (brickinfo); - fd = glusterd_store_mkstemp (brickinfo->shandle); + fd = gf_store_mkstemp (brickinfo->shandle); if (fd <= 0) { ret = -1; goto out; @@ -408,7 +284,7 @@ glusterd_store_perform_brick_store (glusterd_brickinfo_t *brickinfo) out: if (ret && (fd > 0)) - glusterd_store_unlink_tmppath (brickinfo->shandle); + gf_store_unlink_tmppath (brickinfo->shandle); if (fd > 0) close (fd); gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -440,18 +316,17 @@ glusterd_store_brickinfo (glusterd_volinfo_t *volinfo, goto out; ret = glusterd_store_perform_brick_store (brickinfo); + out: gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret); return ret; } int32_t -glusterd_store_delete_brick (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) +glusterd_store_delete_brick (glusterd_brickinfo_t *brickinfo, char *delete_path) { int32_t ret = -1; glusterd_conf_t *priv = NULL; - char path[PATH_MAX] = {0,}; char brickpath[PATH_MAX] = {0,}; char *ptr = NULL; char *tmppath = NULL; @@ -459,15 +334,11 @@ glusterd_store_delete_brick (glusterd_volinfo_t *volinfo, this = THIS; GF_ASSERT (this); - GF_ASSERT (volinfo); GF_ASSERT (brickinfo); priv = this->private; - GF_ASSERT (priv); - GLUSTERD_GET_BRICK_DIR (path, volinfo, priv); - tmppath = gf_strdup (brickinfo->path); ptr = strchr (tmppath, '/'); @@ -477,15 +348,16 @@ glusterd_store_delete_brick (glusterd_volinfo_t *volinfo, ptr = strchr (tmppath, '/'); } - snprintf (brickpath, sizeof (brickpath), "%s/%s:%s", - path, brickinfo->hostname, tmppath); + snprintf (brickpath, sizeof (brickpath), + "%s/"GLUSTERD_BRICK_INFO_DIR"/%s:%s", delete_path, + brickinfo->hostname, tmppath); GF_FREE (tmppath); ret = unlink (brickpath); if ((ret < 0) && (errno != ENOENT)) { - gf_log (this->name, GF_LOG_ERROR, "Unlink failed on %s, " + gf_log (this->name, GF_LOG_DEBUG, "Unlink failed on %s, " "reason: %s", brickpath, strerror(errno)); ret = -1; goto out; @@ -495,7 +367,7 @@ glusterd_store_delete_brick (glusterd_volinfo_t *volinfo, out: if (brickinfo->shandle) { - glusterd_store_handle_destroy (brickinfo->shandle); + gf_store_handle_destroy (brickinfo->shandle); brickinfo->shandle = NULL; } gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); @@ -503,7 +375,7 @@ out: } int32_t -glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo) +glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo, char *delete_path) { int32_t ret = 0; glusterd_brickinfo_t *tmp = NULL; @@ -520,7 +392,7 @@ glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo) GF_ASSERT (volinfo); list_for_each_entry (tmp, &volinfo->bricks, brick_list) { - ret = glusterd_store_delete_brick (volinfo, tmp); + ret = glusterd_store_delete_brick (tmp, delete_path); if (ret) goto out; } @@ -528,7 +400,8 @@ glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo) priv = this->private; GF_ASSERT (priv); - GLUSTERD_GET_BRICK_DIR (brickdir, volinfo, priv); + snprintf (brickdir, sizeof (brickdir), "%s/%s", delete_path, + GLUSTERD_BRICK_INFO_DIR); dir = opendir (brickdir); @@ -539,7 +412,7 @@ glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo) brickdir, entry->d_name); ret = unlink (path); if (ret && errno != ENOENT) { - gf_log (this->name, GF_LOG_ERROR, "Unable to unlink %s, " + gf_log (this->name, GF_LOG_DEBUG, "Unable to unlink %s, " "reason: %s", path, strerror(errno)); } glusterd_for_each_entry (entry, dir); @@ -558,13 +431,13 @@ static int _storeslaves (dict_t *this, char *key, data_t *value, void *data) { int32_t ret = 0; - glusterd_store_handle_t *shandle = NULL; - xlator_t *xl = NULL; + gf_store_handle_t *shandle = NULL; + xlator_t *xl = NULL; xl = THIS; GF_ASSERT (xl); - shandle = (glusterd_store_handle_t*)data; + shandle = (gf_store_handle_t*)data; GF_ASSERT (shandle); GF_ASSERT (shandle->fd > 0); @@ -583,7 +456,7 @@ _storeslaves (dict_t *this, char *key, data_t *value, void *data) gf_log (xl->name, GF_LOG_DEBUG, "Storing in volinfo:key= %s, val=%s", key, value->data); - ret = glusterd_store_save_value (shandle->fd, key, (char*)value->data); + ret = gf_store_save_value (shandle->fd, key, (char*)value->data); if (ret) { gf_log (xl->name, GF_LOG_ERROR, "Unable to write into store" " handle for path: %s", shandle->path); @@ -597,13 +470,13 @@ int _storeopts (dict_t *this, char *key, data_t *value, void *data) { int32_t ret = 0; int32_t exists = 0; - glusterd_store_handle_t *shandle = NULL; - xlator_t *xl = NULL; + gf_store_handle_t *shandle = NULL; + xlator_t *xl = NULL; xl = THIS; GF_ASSERT (xl); - shandle = (glusterd_store_handle_t*)data; + shandle = (gf_store_handle_t*)data; GF_ASSERT (shandle); GF_ASSERT (shandle->fd > 0); @@ -636,7 +509,7 @@ int _storeopts (dict_t *this, char *key, data_t *value, void *data) return 0; } - ret = glusterd_store_save_value (shandle->fd, key, (char*)value->data); + ret = gf_store_save_value (shandle->fd, key, (char*)value->data); if (ret) { gf_log (xl->name, GF_LOG_ERROR, "Unable to write into store" " handle for path: %s", shandle->path); @@ -648,90 +521,118 @@ int _storeopts (dict_t *this, char *key, data_t *value, void *data) int32_t glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo) { - char *str = NULL; + char *str = NULL; + char buf[PATH_MAX] = {0,}; + int32_t ret = -1; GF_ASSERT (fd > 0); GF_ASSERT (volinfo); - char buf[PATH_MAX] = {0,}; - int32_t ret = -1; - snprintf (buf, sizeof (buf), "%d", volinfo->type); - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_TYPE, buf); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_TYPE, buf); if (ret) goto out; snprintf (buf, sizeof (buf), "%d", volinfo->brick_count); - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_COUNT, buf); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_COUNT, buf); if (ret) goto out; snprintf (buf, sizeof (buf), "%d", volinfo->status); - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_STATUS, buf); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_STATUS, buf); if (ret) goto out; snprintf (buf, sizeof (buf), "%d", volinfo->sub_count); - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_SUB_COUNT, - buf); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_SUB_COUNT, buf); if (ret) goto out; snprintf (buf, sizeof (buf), "%d", volinfo->stripe_count); - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_STRIPE_CNT, - buf); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_STRIPE_CNT, buf); if (ret) goto out; snprintf (buf, sizeof (buf), "%d", volinfo->replica_count); - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_REPLICA_CNT, - buf); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_REPLICA_CNT, + buf); if (ret) goto out; snprintf (buf, sizeof (buf), "%d", volinfo->version); - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_VERSION, - buf); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_VERSION, buf); if (ret) goto out; snprintf (buf, sizeof (buf), "%d", volinfo->transport_type); - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_TRANSPORT, - buf); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_TRANSPORT, buf); if (ret) goto out; - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_ID, - uuid_utoa (volinfo->volume_id)); + snprintf (buf, sizeof (buf), "%s", volinfo->parent_volname); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_PARENT_VOLNAME, buf); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to store " + GLUSTERD_STORE_KEY_PARENT_VOLNAME); + goto out; + } + + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_ID, + uuid_utoa (volinfo->volume_id)); if (ret) goto out; str = glusterd_auth_get_username (volinfo); if (str) { - ret = glusterd_store_save_value (fd, - GLUSTERD_STORE_KEY_USERNAME, - str); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_USERNAME, + str); if (ret) goto out; } str = glusterd_auth_get_password (volinfo); if (str) { - ret = glusterd_store_save_value (fd, - GLUSTERD_STORE_KEY_PASSWORD, - str); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_PASSWORD, + str); if (ret) goto out; } - if (volinfo->backend == GD_VOL_BK_BD) { - snprintf (buf, sizeof (buf), "%d", volinfo->backend); - ret = glusterd_store_save_value (fd, - GLUSTERD_STORE_KEY_VOL_BACKEND, buf); + snprintf (buf, sizeof (buf), "%d", volinfo->op_version); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_OP_VERSION, buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", volinfo->client_op_version); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION, + buf); + if (ret) + goto out; + if (volinfo->caps) { + snprintf (buf, sizeof (buf), "%d", volinfo->caps); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_CAPS, + buf); if (ret) goto out; } + snprintf (buf, sizeof (buf), "%d", volinfo->is_volume_restored); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_IS_RESTORED, buf); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "Unable to write is_volume_restored"); + goto out; + } + + snprintf (buf, sizeof (buf), "%"PRIu64, volinfo->snap_max_hard_limit); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, + buf); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "Unable to write snap-max-hard-limit"); + goto out; + } + out: if (ret) gf_log (THIS->name, GF_LOG_ERROR, "Unable to write volume " @@ -749,8 +650,7 @@ glusterd_store_voldirpath_set (glusterd_volinfo_t *volinfo, char *voldirpath, priv = THIS->private; GF_ASSERT (priv); - snprintf (voldirpath, len, "%s/%s/%s", priv->workdir, - GLUSTERD_VOLUME_DIR_PREFIX, volinfo->volname); + GLUSTERD_GET_VOLUME_DIR (voldirpath, volinfo, priv); } static int32_t @@ -763,16 +663,38 @@ glusterd_store_create_volume_dir (glusterd_volinfo_t *volinfo) glusterd_store_voldirpath_set (volinfo, voldirpath, sizeof (voldirpath)); - ret = glusterd_store_mkdir (voldirpath); + ret = gf_store_mkdir (voldirpath); + gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret); return ret; } +static int32_t +glusterd_store_create_snap_dir (glusterd_snap_t *snap) +{ + int32_t ret = -1; + char snapdirpath[PATH_MAX] = {0,}; + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + GF_ASSERT (priv); + GF_ASSERT (snap); + + GLUSTERD_GET_SNAP_DIR (snapdirpath, snap, priv); + + ret = mkdir_p (snapdirpath, 0755, _gf_true); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to create snaps dir " + "%s", snapdirpath); + } + return ret; +} + int32_t glusterd_store_volinfo_write (int fd, glusterd_volinfo_t *volinfo) { int32_t ret = -1; - glusterd_store_handle_t *shandle = NULL; + gf_store_handle_t *shandle = NULL; GF_ASSERT (fd > 0); GF_ASSERT (volinfo); GF_ASSERT (volinfo->shandle); @@ -792,6 +714,49 @@ out: return ret; } +int32_t +glusterd_store_snapinfo_write (glusterd_snap_t *snap) +{ + int32_t ret = -1; + int fd = 0; + char buf[PATH_MAX] = ""; + + GF_ASSERT (snap); + + fd = gf_store_mkstemp (snap->shandle); + if (fd <= 0) + goto out; + + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_ID, + uuid_utoa (snap->snap_id)); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", snap->snap_status); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_STATUS, buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", snap->snap_restored); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_RESTORED, buf); + if (ret) + goto out; + + if (snap->description) { + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_DESC, + snap->description); + if (ret) + goto out; + } + + snprintf (buf, sizeof (buf), "%ld", snap->time_stamp); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_TIMESTAMP, buf); + +out: + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + static void glusterd_store_rbstatepath_set (glusterd_volinfo_t *volinfo, char *rbstatepath, size_t len) @@ -836,6 +801,37 @@ glusterd_store_node_state_path_set (glusterd_volinfo_t *volinfo, GLUSTERD_NODE_STATE_FILE); } +static void +glusterd_store_missed_snaps_list_path_set (char *missed_snaps_list, + size_t len) +{ + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + GF_ASSERT (priv); + GF_ASSERT (missed_snaps_list); + GF_ASSERT (len <= PATH_MAX); + + snprintf (missed_snaps_list, len, "%s/snaps/" + GLUSTERD_MISSED_SNAPS_LIST_FILE, priv->workdir); +} + +static void +glusterd_store_snapfpath_set (glusterd_snap_t *snap, char *snap_fpath, + size_t len) +{ + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + GF_ASSERT (priv); + GF_ASSERT (snap); + GF_ASSERT (snap_fpath); + GF_ASSERT (len <= PATH_MAX); + + snprintf (snap_fpath, len, "%s/snaps/%s/%s", priv->workdir, + snap->snapname, GLUSTERD_SNAP_INFO_FILE); +} + int32_t glusterd_store_create_rbstate_shandle_on_absence (glusterd_volinfo_t *volinfo) { @@ -845,8 +841,8 @@ glusterd_store_create_rbstate_shandle_on_absence (glusterd_volinfo_t *volinfo) GF_ASSERT (volinfo); glusterd_store_rbstatepath_set (volinfo, rbstatepath, sizeof (rbstatepath)); - ret = glusterd_store_handle_create_on_absence (&volinfo->rb_shandle, - rbstatepath); + ret = gf_store_handle_create_on_absence (&volinfo->rb_shandle, + rbstatepath); return ret; } @@ -859,8 +855,7 @@ glusterd_store_create_vol_shandle_on_absence (glusterd_volinfo_t *volinfo) GF_ASSERT (volinfo); glusterd_store_volfpath_set (volinfo, volfpath, sizeof (volfpath)); - ret = glusterd_store_handle_create_on_absence (&volinfo->shandle, - volfpath); + ret = gf_store_handle_create_on_absence (&volinfo->shandle, volfpath); return ret; } @@ -875,9 +870,45 @@ glusterd_store_create_nodestate_sh_on_absence (glusterd_volinfo_t *volinfo) glusterd_store_node_state_path_set (volinfo, node_state_path, sizeof (node_state_path)); ret = - glusterd_store_handle_create_on_absence (&volinfo->node_state_shandle, - node_state_path); + gf_store_handle_create_on_absence (&volinfo->node_state_shandle, + node_state_path); + + return ret; +} + +static int32_t +glusterd_store_create_missed_snaps_list_shandle_on_absence () +{ + char missed_snaps_list[PATH_MAX] = ""; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + priv = this->private; + GF_ASSERT (priv); + + glusterd_store_missed_snaps_list_path_set (missed_snaps_list, + sizeof(missed_snaps_list)); + ret = gf_store_handle_create_on_absence + (&priv->missed_snaps_list_shandle, + missed_snaps_list); + return ret; +} + +int32_t +glusterd_store_create_snap_shandle_on_absence (glusterd_snap_t *snap) +{ + char snapfpath[PATH_MAX] = {0}; + int32_t ret = 0; + + GF_ASSERT (snap); + + glusterd_store_snapfpath_set (snap, snapfpath, sizeof (snapfpath)); + ret = gf_store_handle_create_on_absence (&snap->shandle, snapfpath); return ret; } @@ -913,8 +944,7 @@ glusterd_store_rbstate_write (int fd, glusterd_volinfo_t *volinfo) GF_ASSERT (volinfo); snprintf (buf, sizeof (buf), "%d", volinfo->rep_brick.rb_status); - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_RB_STATUS, - buf); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_RB_STATUS, buf); if (ret) goto out; @@ -923,16 +953,16 @@ glusterd_store_rbstate_write (int fd, glusterd_volinfo_t *volinfo) snprintf (buf, sizeof (buf), "%s:%s", volinfo->rep_brick.src_brick->hostname, volinfo->rep_brick.src_brick->path); - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_RB_SRC_BRICK, - buf); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_RB_SRC_BRICK, + buf); if (ret) goto out; snprintf (buf, sizeof (buf), "%s:%s", volinfo->rep_brick.dst_brick->hostname, volinfo->rep_brick.dst_brick->path); - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_RB_DST_BRICK, - buf); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_RB_DST_BRICK, + buf); if (ret) goto out; @@ -948,13 +978,12 @@ glusterd_store_rbstate_write (int fd, glusterd_volinfo_t *volinfo) } snprintf (buf, sizeof (buf), "%d", port); - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_RB_DST_PORT, - buf); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_RB_DST_PORT, + buf); if (ret) goto out; uuid_unparse (volinfo->rep_brick.rb_id, buf); - ret = glusterd_store_save_value (fd, GF_REPLACE_BRICK_TID_KEY, - buf); + ret = gf_store_save_value (fd, GF_REPLACE_BRICK_TID_KEY, buf); } ret = 0; @@ -970,7 +999,7 @@ glusterd_store_perform_rbstate_store (glusterd_volinfo_t *volinfo) int32_t ret = -1; GF_ASSERT (volinfo); - fd = glusterd_store_mkstemp (volinfo->rb_shandle); + fd = gf_store_mkstemp (volinfo->rb_shandle); if (fd <= 0) { ret = -1; goto out; @@ -980,13 +1009,13 @@ glusterd_store_perform_rbstate_store (glusterd_volinfo_t *volinfo) if (ret) goto out; - ret = glusterd_store_rename_tmppath (volinfo->rb_shandle); + ret = gf_store_rename_tmppath (volinfo->rb_shandle); if (ret) goto out; out: if (ret && (fd > 0)) - glusterd_store_unlink_tmppath (volinfo->rb_shandle); + gf_store_unlink_tmppath (volinfo->rb_shandle); if (fd > 0) close (fd); gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -1008,22 +1037,18 @@ glusterd_store_node_state_write (int fd, glusterd_volinfo_t *volinfo) } snprintf (buf, sizeof (buf), "%d", volinfo->rebal.defrag_cmd); - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_DEFRAG, - buf); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_DEFRAG, buf); if (ret) goto out; snprintf (buf, sizeof (buf), "%d", volinfo->rebal.op); - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_DEFRAG_OP, - buf); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_DEFRAG_OP, buf); if (ret) goto out; if (volinfo->rebal.defrag_cmd) { uuid_unparse (volinfo->rebal.rebalance_id, buf); - ret = glusterd_store_save_value (fd, - GF_REBALANCE_TID_KEY, - buf); + ret = gf_store_save_value (fd, GF_REBALANCE_TID_KEY, buf); } out: gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -1037,7 +1062,7 @@ glusterd_store_perform_node_state_store (glusterd_volinfo_t *volinfo) int32_t ret = -1; GF_ASSERT (volinfo); - fd = glusterd_store_mkstemp (volinfo->node_state_shandle); + fd = gf_store_mkstemp (volinfo->node_state_shandle); if (fd <= 0) { ret = -1; goto out; @@ -1047,13 +1072,13 @@ glusterd_store_perform_node_state_store (glusterd_volinfo_t *volinfo) if (ret) goto out; - ret = glusterd_store_rename_tmppath (volinfo->node_state_shandle); + ret = gf_store_rename_tmppath (volinfo->node_state_shandle); if (ret) goto out; out: if (ret && (fd > 0)) - glusterd_store_unlink_tmppath (volinfo->node_state_shandle); + gf_store_unlink_tmppath (volinfo->node_state_shandle); if (fd > 0) close (fd); gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -1067,7 +1092,7 @@ glusterd_store_perform_volume_store (glusterd_volinfo_t *volinfo) int32_t ret = -1; GF_ASSERT (volinfo); - fd = glusterd_store_mkstemp (volinfo->shandle); + fd = gf_store_mkstemp (volinfo->shandle); if (fd <= 0) { ret = -1; goto out; @@ -1083,7 +1108,7 @@ glusterd_store_perform_volume_store (glusterd_volinfo_t *volinfo) out: if (ret && (fd > 0)) - glusterd_store_unlink_tmppath (volinfo->shandle); + gf_store_unlink_tmppath (volinfo->shandle); if (fd > 0) close (fd); gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -1116,7 +1141,7 @@ glusterd_store_bricks_cleanup_tmp (glusterd_volinfo_t *volinfo) GF_ASSERT (volinfo); list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - glusterd_store_unlink_tmppath (brickinfo->shandle); + gf_store_unlink_tmppath (brickinfo->shandle); } } @@ -1127,11 +1152,19 @@ glusterd_store_volume_cleanup_tmp (glusterd_volinfo_t *volinfo) glusterd_store_bricks_cleanup_tmp (volinfo); - glusterd_store_unlink_tmppath (volinfo->shandle); + gf_store_unlink_tmppath (volinfo->shandle); - glusterd_store_unlink_tmppath (volinfo->rb_shandle); + gf_store_unlink_tmppath (volinfo->rb_shandle); - glusterd_store_unlink_tmppath (volinfo->node_state_shandle); + gf_store_unlink_tmppath (volinfo->node_state_shandle); +} + +void +glusterd_store_snap_cleanup_tmp (glusterd_snap_t *snap) +{ + GF_ASSERT (snap); + + gf_store_unlink_tmppath (snap->shandle); } int32_t @@ -1143,7 +1176,7 @@ glusterd_store_brickinfos_atomic_update (glusterd_volinfo_t *volinfo) GF_ASSERT (volinfo); list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - ret = glusterd_store_rename_tmppath (brickinfo->shandle); + ret = gf_store_rename_tmppath (brickinfo->shandle); if (ret) goto out; } @@ -1157,7 +1190,7 @@ glusterd_store_volinfo_atomic_update (glusterd_volinfo_t *volinfo) int ret = -1; GF_ASSERT (volinfo); - ret = glusterd_store_rename_tmppath (volinfo->shandle); + ret = gf_store_rename_tmppath (volinfo->shandle); if (ret) goto out; @@ -1185,6 +1218,60 @@ out: } int32_t +glusterd_store_snap_atomic_update (glusterd_snap_t *snap) +{ + int ret = -1; + GF_ASSERT (snap); + + ret = gf_store_rename_tmppath (snap->shandle); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, "Couldn't rename " + "temporary file(s): Reason %s", strerror (errno)); + + return ret; +} + +int32_t +glusterd_store_snap (glusterd_snap_t *snap) +{ + int32_t ret = -1; + + GF_ASSERT (snap); + + ret = glusterd_store_create_snap_dir (snap); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to create snap dir"); + goto out; + } + + ret = glusterd_store_create_snap_shandle_on_absence (snap); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to create snap info " + "file"); + goto out; + } + + ret = glusterd_store_snapinfo_write (snap); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to write snap info"); + goto out; + } + + ret = glusterd_store_snap_atomic_update (snap); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR,"Failed to do automic update"); + goto out; + } + +out: + if (ret) + glusterd_store_snap_cleanup_tmp (snap); + + gf_log (THIS->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t glusterd_store_volinfo (glusterd_volinfo_t *volinfo, glusterd_volinfo_ver_ac_t ac) { int32_t ret = -1; @@ -1241,18 +1328,20 @@ out: return ret; } - int32_t glusterd_store_delete_volume (glusterd_volinfo_t *volinfo) { - char pathname[PATH_MAX] = {0,}; - int32_t ret = 0; - glusterd_conf_t *priv = NULL; - DIR *dir = NULL; - struct dirent *entry = NULL; - char path[PATH_MAX] = {0,}; - struct stat st = {0, }; - xlator_t *this = NULL; + char pathname[PATH_MAX] = {0,}; + int32_t ret = 0; + glusterd_conf_t *priv = NULL; + DIR *dir = NULL; + struct dirent *entry = NULL; + char path[PATH_MAX] = {0,}; + char delete_path[PATH_MAX] = {0,}; + char trashdir[PATH_MAX] = {0,}; + struct stat st = {0, }; + xlator_t *this = NULL; + gf_boolean_t rename_fail = _gf_false; this = THIS; GF_ASSERT (this); @@ -1261,29 +1350,53 @@ glusterd_store_delete_volume (glusterd_volinfo_t *volinfo) priv = this->private; GF_ASSERT (priv); - snprintf (pathname, sizeof (pathname), "%s/vols/%s", priv->workdir, - volinfo->volname); - dir = opendir (pathname); + GLUSTERD_GET_VOLUME_DIR (pathname, volinfo, priv); + + snprintf (delete_path, sizeof (delete_path), + "%s/"GLUSTERD_TRASH"/%s.deleted", priv->workdir, + uuid_utoa (volinfo->volume_id)); + + snprintf (trashdir, sizeof (trashdir), "%s/"GLUSTERD_TRASH, + priv->workdir); + + ret = mkdir (trashdir, 0777); + if (ret && errno != EEXIST) { + gf_log (this->name, GF_LOG_ERROR, "Failed to create trash " + "directory, reason : %s", strerror (errno)); + ret = -1; + goto out; + } + + ret = rename (pathname, delete_path); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to rename volume " + "directory for volume %s", volinfo->volname); + rename_fail = _gf_true; + goto out; + } + + dir = opendir (delete_path); if (!dir) { - gf_log (this->name, GF_LOG_ERROR, "Failed to open directory %s." - " Reason : %s", pathname, strerror (errno)); + gf_log (this->name, GF_LOG_DEBUG, "Failed to open directory %s." + " Reason : %s", delete_path, strerror (errno)); + ret = 0; goto out; } - ret = glusterd_store_remove_bricks (volinfo); + ret = glusterd_store_remove_bricks (volinfo, delete_path); if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Remove bricks failed for %s", + gf_log (this->name, GF_LOG_DEBUG, "Remove bricks failed for %s", volinfo->volname); } glusterd_for_each_entry (entry, dir); while (entry) { - snprintf (path, PATH_MAX, "%s/%s", pathname, entry->d_name); + snprintf (path, PATH_MAX, "%s/%s", delete_path, entry->d_name); ret = stat (path, &st); if (ret == -1) { - gf_log (this->name, GF_LOG_ERROR, "Failed to stat " + gf_log (this->name, GF_LOG_DEBUG, "Failed to stat " "entry %s : %s", path, strerror (errno)); goto stat_failed; } @@ -1293,11 +1406,12 @@ glusterd_store_delete_volume (glusterd_volinfo_t *volinfo) else ret = unlink (path); - if (ret) - gf_log (this->name, GF_LOG_ERROR, " Failed to remove " + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, " Failed to remove " "%s. Reason : %s", path, strerror (errno)); + } - gf_log (this->name, ret ? GF_LOG_ERROR : GF_LOG_DEBUG, "%s %s", + gf_log (this->name, GF_LOG_DEBUG, "%s %s", ret ? "Failed to remove":"Removed", entry->d_name); stat_failed: @@ -1307,290 +1421,153 @@ stat_failed: ret = closedir (dir); if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Failed to close dir %s. " - "Reason : %s",pathname, strerror (errno)); + gf_log (this->name, GF_LOG_DEBUG, "Failed to close dir %s. " + "Reason : %s",delete_path, strerror (errno)); } - ret = rmdir (pathname); + ret = rmdir (delete_path); if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Failed to rmdir: %s, err: %s", - pathname, strerror (errno)); + gf_log (this->name, GF_LOG_DEBUG, "Failed to rmdir: %s,err: %s", + delete_path, strerror (errno)); + } + ret = rmdir (trashdir); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to rmdir: %s, Reason:" + " %s", trashdir, strerror (errno)); } - out: if (volinfo->shandle) { - glusterd_store_handle_destroy (volinfo->shandle); + gf_store_handle_destroy (volinfo->shandle); volinfo->shandle = NULL; } - gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + ret = (rename_fail == _gf_true) ? -1: 0; + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } +/*TODO: cleanup the duplicate code and implement a generic function for + * deleting snap/volume depending on the parameter flag */ +int32_t +glusterd_store_delete_snap (glusterd_snap_t *snap) +{ + char pathname[PATH_MAX] = {0,}; + int32_t ret = 0; + glusterd_conf_t *priv = NULL; + DIR *dir = NULL; + struct dirent *entry = NULL; + char path[PATH_MAX] = {0,}; + char delete_path[PATH_MAX] = {0,}; + char trashdir[PATH_MAX] = {0,}; + struct stat st = {0, }; + xlator_t *this = NULL; + gf_boolean_t rename_fail = _gf_false; -int -glusterd_store_read_and_tokenize (FILE *file, char *str, - char **iter_key, char **iter_val, - glusterd_store_op_errno_t *store_errno) -{ - int32_t ret = -1; - char *savetok = NULL; + this = THIS; + priv = this->private; + GF_ASSERT (priv); - GF_ASSERT (file); - GF_ASSERT (str); - GF_ASSERT (iter_key); - GF_ASSERT (iter_val); - GF_ASSERT (store_errno); + GF_ASSERT (snap); + GLUSTERD_GET_SNAP_DIR (pathname, snap, priv); - ret = fscanf (file, "%s", str); - if (ret <= 0 || feof (file)) { - ret = -1; - *store_errno = GD_STORE_EOF; - goto out; - } + snprintf (delete_path, sizeof (delete_path), + "%s/"GLUSTERD_TRASH"/snap-%s.deleted", priv->workdir, + uuid_utoa (snap->snap_id)); - *iter_key = strtok_r (str, "=", &savetok); - if (*iter_key == NULL) { - ret = -1; - *store_errno = GD_STORE_KEY_NULL; - goto out; - } + snprintf (trashdir, sizeof (trashdir), "%s/"GLUSTERD_TRASH, + priv->workdir); - *iter_val = strtok_r (NULL, "=", &savetok); - if (*iter_key == NULL) { + ret = mkdir (trashdir, 0777); + if (ret && errno != EEXIST) { + gf_log (this->name, GF_LOG_ERROR, "Failed to create trash " + "directory, reason : %s", strerror (errno)); ret = -1; - *store_errno = GD_STORE_VALUE_NULL; - goto out; - } - - *store_errno = GD_STORE_SUCCESS; - ret = 0; -out: - return ret; -} - -int32_t -glusterd_store_retrieve_value (glusterd_store_handle_t *handle, - char *key, char **value) -{ - int32_t ret = -1; - char *scan_str = NULL; - char *iter_key = NULL; - char *iter_val = NULL; - char *free_str = NULL; - struct stat st = {0,}; - glusterd_store_op_errno_t store_errno = GD_STORE_SUCCESS; - - GF_ASSERT (handle); - - handle->fd = open (handle->path, O_RDWR); - - if (handle->fd == -1) { - gf_log ("", GF_LOG_ERROR, "Unable to open file %s errno: %s", - handle->path, strerror (errno)); - goto out; - } - if (!handle->read) - handle->read = fdopen (handle->fd, "r"); - - if (!handle->read) { - gf_log ("", GF_LOG_ERROR, "Unable to open file %s errno: %s", - handle->path, strerror (errno)); goto out; } - ret = fstat (handle->fd, &st); - if (ret < 0) { - gf_log ("glusterd", GF_LOG_WARNING, - "stat on file failed"); - ret = -1; - store_errno = GD_STORE_STAT_FAILED; + ret = rename (pathname, delete_path); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to rename snap " + "directory %s to %s", snap->snapname, delete_path); + rename_fail = _gf_true; goto out; } - scan_str = GF_CALLOC (1, st.st_size, - gf_gld_mt_char); - if (scan_str == NULL) { - ret = -1; - store_errno = GD_STORE_ENOMEM; + dir = opendir (delete_path); + if (!dir) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to open directory %s." + " Reason : %s", delete_path, strerror (errno)); + ret = 0; goto out; } - free_str = scan_str; - - do { - ret = glusterd_store_read_and_tokenize (handle->read, scan_str, - &iter_key, &iter_val, - &store_errno); - if (ret < 0) { - goto out; + glusterd_for_each_entry (entry, dir); + while (entry) { + snprintf (path, PATH_MAX, "%s/%s", delete_path, entry->d_name); + ret = stat (path, &st); + if (ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to stat " + "entry %s : %s", path, strerror (errno)); + goto stat_failed; } - gf_log ("", GF_LOG_DEBUG, "key %s read", iter_key); + if (S_ISDIR (st.st_mode)) + ret = rmdir (path); + else + ret = unlink (path); - if (!strcmp (key, iter_key)) { - gf_log ("", GF_LOG_DEBUG, "key %s found", key); - ret = 0; - if (iter_val) - *value = gf_strdup (iter_val); - goto out; + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, " Failed to remove " + "%s. Reason : %s", path, strerror (errno)); } - } while (1); -out: - if (handle->fd > 0) { - close (handle->fd); - handle->read = NULL; - } - - GF_FREE (free_str); - - return ret; -} -int32_t -glusterd_store_save_value (int fd, char *key, char *value) -{ - int32_t ret = -1; - FILE *fp = NULL; - xlator_t *this = NULL; - - this = THIS; - GF_ASSERT (this); - GF_ASSERT (fd > 0); - GF_ASSERT (key); - GF_ASSERT (value); - - fp = fdopen (fd, "a+"); - if (fp == NULL) { - gf_log (this->name, GF_LOG_WARNING, "fdopen failed."); - ret = -1; - goto out; + gf_log (this->name, GF_LOG_DEBUG, "%s %s", + ret ? "Failed to remove":"Removed", + entry->d_name); +stat_failed: + memset (path, 0, sizeof(path)); + glusterd_for_each_entry (entry, dir); } - ret = fprintf (fp, "%s=%s\n", key, value); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, "Unable to store key: %s," - "value: %s, error: %s", key, value, - strerror (errno)); - ret = -1; - goto out; + ret = closedir (dir); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to close dir %s. " + "Reason : %s",delete_path, strerror (errno)); } - ret = fflush (fp); - if (feof (fp)) { - gf_log (this->name, GF_LOG_WARNING, - "fflush failed, error: %s", - strerror (errno)); - ret = -1; - goto out; + ret = rmdir (delete_path); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to rmdir: %s,err: %s", + delete_path, strerror (errno)); } - - ret = 0; -out: - - gf_log (this->name, GF_LOG_DEBUG, "returning: %d", ret); - return ret; -} - -int32_t -glusterd_store_handle_new (char *path, glusterd_store_handle_t **handle) -{ - int32_t ret = -1; - glusterd_store_handle_t *shandle = NULL; - int fd = -1; - char *spath = NULL; - xlator_t *this = NULL; - - this = THIS; - GF_ASSERT (this); - - shandle = GF_CALLOC (1, sizeof (*shandle), gf_gld_mt_store_handle_t); - if (!shandle) - goto out; - - spath = gf_strdup (path); - - if (!spath) - goto out; - - fd = open (path, O_RDWR | O_CREAT | O_APPEND, 0600); - if (fd <= 0) { - gf_log (this->name, GF_LOG_ERROR, "Failed to open file: %s, " - "error: %s", path, strerror (errno)); - goto out; + ret = rmdir (trashdir); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to rmdir: %s, Reason:" + " %s", trashdir, strerror (errno)); } - ret = glusterd_store_sync_direntry (spath); - if (ret) - goto out; - - shandle->path = spath; - *handle = shandle; - - ret = 0; out: - if (fd > 0) - close (fd); - - if (ret == -1) { - GF_FREE (spath); - GF_FREE (shandle); + if (snap->shandle) { + gf_store_handle_destroy (snap->shandle); + snap->shandle = NULL; } + ret = (rename_fail == _gf_true) ? -1: 0; gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } int -glusterd_store_handle_retrieve (char *path, glusterd_store_handle_t **handle) -{ - int32_t ret = -1; - struct stat statbuf = {0}; - - ret = stat (path, &statbuf); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "Unable to retrieve store " - "handle for %s, error: %s", path, strerror (errno)); - goto out; - } - ret = glusterd_store_handle_new (path, handle); -out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; -} - -int32_t -glusterd_store_handle_destroy (glusterd_store_handle_t *handle) -{ - int32_t ret = -1; - - if (!handle) { - ret = 0; - goto out; - } - - GF_FREE (handle->path); - - GF_FREE (handle); - - ret = 0; - -out: - gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); - - return ret; -} - -int glusterd_store_global_info (xlator_t *this) { int ret = -1; glusterd_conf_t *conf = NULL; char op_version_str[15] = {0,}; char path[PATH_MAX] = {0,}; - glusterd_store_handle_t *handle = NULL; + gf_store_handle_t *handle = NULL; char *uuid_str = NULL; + char buf[256] = {0, }; conf = this->private; @@ -1601,7 +1578,7 @@ glusterd_store_global_info (xlator_t *this) if (!conf->handle) { snprintf (path, PATH_MAX, "%s/%s", conf->workdir, GLUSTERD_INFO_FILE); - ret = glusterd_store_handle_new (path, &handle); + ret = gf_store_handle_new (path, &handle); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Unable to get store handle"); @@ -1620,14 +1597,14 @@ glusterd_store_global_info (xlator_t *this) goto out; } - handle->fd = glusterd_store_mkstemp (handle); + handle->fd = gf_store_mkstemp (handle); if (handle->fd <= 0) { ret = -1; goto out; } - ret = glusterd_store_save_value (handle->fd, GLUSTERD_STORE_UUID_KEY, - uuid_str); + ret = gf_store_save_value (handle->fd, GLUSTERD_STORE_UUID_KEY, + uuid_str); if (ret) { gf_log (this->name, GF_LOG_CRITICAL, "Storing uuid failed ret = %d", ret); @@ -1635,18 +1612,36 @@ glusterd_store_global_info (xlator_t *this) } snprintf (op_version_str, 15, "%d", conf->op_version); - ret = glusterd_store_save_value (handle->fd, GD_OP_VERSION_KEY, - op_version_str); + ret = gf_store_save_value (handle->fd, GD_OP_VERSION_KEY, + op_version_str); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Storing op-version failed ret = %d", ret); goto out; } - ret = glusterd_store_rename_tmppath (handle); + snprintf (buf, sizeof (buf), "%"PRIu64, conf->snap_max_hard_limit); + ret = gf_store_save_value (handle->fd, + GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Storing snap-max-hard-limit failed ret = %d", ret); + goto out; + } + + snprintf (buf, sizeof (buf), "%"PRIu64, conf->snap_max_soft_limit); + ret = gf_store_save_value (handle->fd, + GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT, buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Storing snap-max-soft-limit failed ret = %d", ret); + goto out; + } + + ret = gf_store_rename_tmppath (handle); out: if (ret && (handle->fd > 0)) - glusterd_store_unlink_tmppath (handle); + gf_store_unlink_tmppath (handle); if (handle->fd > 0) { close (handle->fd); @@ -1672,17 +1667,17 @@ glusterd_retrieve_op_version (xlator_t *this, int *op_version) int tmp_version = 0; char *tmp = NULL; char path[PATH_MAX] = {0,}; - glusterd_store_handle_t *handle = NULL; + gf_store_handle_t *handle = NULL; priv = this->private; if (!priv->handle) { snprintf (path, PATH_MAX, "%s/%s", priv->workdir, GLUSTERD_INFO_FILE); - ret = glusterd_store_handle_retrieve (path, &handle); + ret = gf_store_handle_retrieve (path, &handle); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get store " + gf_log ("", GF_LOG_DEBUG, "Unable to get store " "handle!"); goto out; } @@ -1690,9 +1685,8 @@ glusterd_retrieve_op_version (xlator_t *this, int *op_version) priv->handle = handle; } - ret = glusterd_store_retrieve_value (priv->handle, - GD_OP_VERSION_KEY, - &op_version_str); + ret = gf_store_retrieve_value (priv->handle, GD_OP_VERSION_KEY, + &op_version_str); if (ret) { gf_log (this->name, GF_LOG_DEBUG, "No previous op_version present"); @@ -1715,21 +1709,100 @@ out: return ret; } +int +glusterd_retrieve_sys_snap_max_limit (xlator_t *this, uint64_t *limit, + char *key) +{ + char *limit_str = NULL; + glusterd_conf_t *priv = NULL; + int ret = -1; + uint64_t tmp_limit = 0; + char *tmp = NULL; + char path[PATH_MAX] = {0,}; + gf_store_handle_t *handle = NULL; + + GF_ASSERT (this); + priv = this->private; + + GF_ASSERT (priv); + GF_ASSERT (limit); + GF_ASSERT (key); + + if (!priv->handle) { + snprintf (path, PATH_MAX, "%s/%s", priv->workdir, + GLUSTERD_INFO_FILE); + ret = gf_store_handle_retrieve (path, &handle); + + if (ret) { + gf_log ("", GF_LOG_DEBUG, "Unable to get store " + "handle!"); + goto out; + } + + priv->handle = handle; + } + + ret = gf_store_retrieve_value (priv->handle, + key, + &limit_str); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "No previous %s present", key); + goto out; + } + + tmp_limit = strtoul (limit_str, &tmp, 10); + if ((tmp_limit <= 0) || (tmp && strlen (tmp) > 1)) { + gf_log (this->name, GF_LOG_WARNING, "invalid version number"); + goto out; + } + + *limit = tmp_limit; + + ret = 0; +out: + if (limit_str) + GF_FREE (limit_str); + + return ret; +} static int glusterd_restore_op_version (xlator_t *this) { - glusterd_conf_t *conf = NULL; - int ret = 0; - int op_version = 0; + glusterd_conf_t *conf = NULL; + int ret = 0; + int op_version = 0; conf = this->private; + ret = glusterd_retrieve_sys_snap_max_limit (this, + &conf->snap_max_hard_limit, + GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Unable to retrieve system snap-max-hard-limit, " + "setting it to default value(%d)", + GLUSTERD_SNAPS_MAX_HARD_LIMIT); + conf->snap_max_hard_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + } + + ret = glusterd_retrieve_sys_snap_max_limit (this, + &conf->snap_max_soft_limit, + GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Unable to retrieve system snap-max-soft-limit, " + "setting it to default value(%d)", + GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT); + conf->snap_max_soft_limit = GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT; + } + ret = glusterd_retrieve_op_version (this, &op_version); if (!ret) { if ((op_version < GD_OP_VERSION_MIN) || (op_version > GD_OP_VERSION_MAX)) { gf_log (this->name, GF_LOG_ERROR, - "wrong op-version (%d) retreived", op_version); + "wrong op-version (%d) retrieved", op_version); ret = -1; goto out; } @@ -1739,11 +1812,29 @@ glusterd_restore_op_version (xlator_t *this) goto out; } - gf_log (this->name, GF_LOG_INFO, "op-version not found in store, " - "setting it to minimum op-version : %d", GD_OP_VERSION_MIN); - - /* If op-version is missing, set it to GD_OP_VERSION_MIN */ - conf->op_version = GD_OP_VERSION_MIN; + /* op-version can be missing from the store file in 2 cases, + * 1. This is a new install of glusterfs + * 2. This is an upgrade of glusterfs from a version without op-version + * to a version with op-version (eg. 3.3 -> 3.4) + * + * Detection of a new install or an upgrade from an older install can be + * done by checking for the presence of the its peer-id in the store + * file. If peer-id is present, the installation is an upgrade else, it + * is a new install. + * + * For case 1, set op-version to GD_OP_VERSION_MAX. + * For case 2, set op-version to GD_OP_VERSION_MIN. + */ + ret = glusterd_retrieve_uuid(); + if (ret) { + gf_log (this->name, GF_LOG_INFO, "Detected new install. Setting" + " op-version to maximum : %d", GD_OP_VERSION_MAX); + conf->op_version = GD_OP_VERSION_MAX; + } else { + gf_log (this->name, GF_LOG_INFO, "Upgrade detected. Setting" + " op-version to minimum : %d", GD_OP_VERSION_MIN); + conf->op_version = GD_OP_VERSION_MIN; + } ret = 0; out: return ret; @@ -1754,7 +1845,7 @@ glusterd_retrieve_uuid () { char *uuid_str = NULL; int32_t ret = -1; - glusterd_store_handle_t *handle = NULL; + gf_store_handle_t *handle = NULL; glusterd_conf_t *priv = NULL; char path[PATH_MAX] = {0,}; @@ -1763,10 +1854,10 @@ glusterd_retrieve_uuid () if (!priv->handle) { snprintf (path, PATH_MAX, "%s/%s", priv->workdir, GLUSTERD_INFO_FILE); - ret = glusterd_store_handle_retrieve (path, &handle); + ret = gf_store_handle_retrieve (path, &handle); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get store " + gf_log ("", GF_LOG_DEBUG, "Unable to get store" "handle!"); goto out; } @@ -1774,12 +1865,11 @@ glusterd_retrieve_uuid () priv->handle = handle; } - ret = glusterd_store_retrieve_value (priv->handle, - GLUSTERD_STORE_UUID_KEY, - &uuid_str); + ret = gf_store_retrieve_value (priv->handle, GLUSTERD_STORE_UUID_KEY, + &uuid_str); if (ret) { - gf_log ("", GF_LOG_INFO, "No previous uuid is present"); + gf_log ("", GF_LOG_DEBUG, "No previous uuid is present"); goto out; } @@ -1791,247 +1881,13 @@ out: return ret; } -int32_t -glusterd_store_iter_new (glusterd_store_handle_t *shandle, - glusterd_store_iter_t **iter) -{ - int32_t ret = -1; - glusterd_store_iter_t *tmp_iter = NULL; - int fd = -1; - - GF_ASSERT (shandle); - GF_ASSERT (iter); - - tmp_iter = GF_CALLOC (1, sizeof (*tmp_iter), - gf_gld_mt_store_iter_t); - - if (!tmp_iter) { - gf_log ("", GF_LOG_ERROR, "Out of Memory"); - goto out; - } - - fd = open (shandle->path, O_RDWR); - - if (fd < 0) { - gf_log ("", GF_LOG_ERROR, "Unable to open %s, errno: %d", - shandle->path, errno); - goto out; - } - - tmp_iter->fd = fd; - - tmp_iter->file = fdopen (tmp_iter->fd, "r"); - - if (!tmp_iter->file) { - gf_log ("", GF_LOG_ERROR, "Unable to open file %s errno: %d", - shandle->path, errno); - goto out; - } - - strncpy (tmp_iter->filepath, shandle->path, sizeof (tmp_iter->filepath)); - tmp_iter->filepath[sizeof (tmp_iter->filepath) - 1] = 0; - *iter = tmp_iter; - ret = 0; - -out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - return ret; -} - -int32_t -glusterd_store_validate_key_value (char *storepath, char *key, char*val, - glusterd_store_op_errno_t *op_errno) -{ - int ret = 0; - - GF_ASSERT (op_errno); - GF_ASSERT (storepath); - - if ((key == NULL) && (val == NULL)) { - ret = -1; - gf_log ("glusterd", GF_LOG_ERROR, "Glusterd store may be " - "corrupted, Invalid key and value (null) in %s", - storepath); - *op_errno = GD_STORE_KEY_VALUE_NULL; - } else if (key == NULL) { - ret = -1; - gf_log ("glusterd", GF_LOG_ERROR, "Glusterd store may be " - "corrupted, Invalid key (null) in %s", storepath); - *op_errno = GD_STORE_KEY_NULL; - } else if (val == NULL) { - ret = -1; - gf_log ("glusterd", GF_LOG_ERROR, "Glusterd store may be " - "corrupted, Invalid value (null) for key %s in %s", - key, storepath); - *op_errno = GD_STORE_VALUE_NULL; - } else { - ret = 0; - *op_errno = GD_STORE_SUCCESS; - } - - return ret; -} - -int32_t -glusterd_store_iter_get_next (glusterd_store_iter_t *iter, - char **key, char **value, - glusterd_store_op_errno_t *op_errno) -{ - int32_t ret = -1; - char *scan_str = NULL; - char *free_str = NULL; - char *iter_key = NULL; - char *iter_val = NULL; - struct stat st = {0,}; - glusterd_store_op_errno_t store_errno = GD_STORE_SUCCESS; - - GF_ASSERT (iter); - GF_ASSERT (iter->file); - GF_ASSERT (key); - GF_ASSERT (value); - - ret = fstat (iter->fd, &st); - if (ret < 0) { - gf_log ("glusterd", GF_LOG_WARNING, - "stat on file failed"); - ret = -1; - store_errno = GD_STORE_STAT_FAILED; - goto out; - } - - scan_str = GF_CALLOC (1, st.st_size, - gf_gld_mt_char); - if (scan_str == NULL) { - ret = -1; - store_errno = GD_STORE_ENOMEM; - goto out; - } - - *key = NULL; - *value = NULL; - - free_str = scan_str; - - ret = glusterd_store_read_and_tokenize (iter->file, scan_str, - &iter_key, &iter_val, - &store_errno); - if (ret < 0) { - goto out; - } - - - ret = glusterd_store_validate_key_value (iter->filepath, iter_key, - iter_val, &store_errno); - if (ret) - goto out; - - *value = gf_strdup (iter_val); - - *key = gf_strdup (iter_key); - if (!iter_key || !iter_val) { - ret = -1; - store_errno = GD_STORE_ENOMEM; - goto out; - } - - ret = 0; - -out: - if (ret) { - if (*key) { - GF_FREE (*key); - *key = NULL; - } - if (*value) { - GF_FREE (*value); - *value = NULL; - } - } - GF_FREE (free_str); - if (op_errno) - *op_errno = store_errno; - - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - return ret; -} - -int32_t -glusterd_store_iter_get_matching (glusterd_store_iter_t *iter, - char *key, char **value) -{ - int32_t ret = -1; - char *tmp_key = NULL; - char *tmp_value = NULL; - - ret = glusterd_store_iter_get_next (iter, &tmp_key, &tmp_value, - NULL); - while (!ret) { - if (!strncmp (key, tmp_key, strlen (key))){ - *value = tmp_value; - GF_FREE (tmp_key); - goto out; - } - GF_FREE (tmp_key); - GF_FREE (tmp_value); - ret = glusterd_store_iter_get_next (iter, &tmp_key, - &tmp_value, NULL); - } -out: - return ret; -} - -int32_t -glusterd_store_iter_destroy (glusterd_store_iter_t *iter) -{ - int32_t ret = -1; - - if (!iter) - return 0; - - if (iter->file) - ret = fclose (iter->file); - else - ret = 0; - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to close fd: %d, ret: %d, " - "errno: %d" ,iter->fd, ret, errno); - } - - GF_FREE (iter); - - return ret; -} - -char* -glusterd_store_strerror (glusterd_store_op_errno_t op_errno) -{ - switch (op_errno) { - case GD_STORE_SUCCESS: - return "Success"; - case GD_STORE_KEY_NULL: - return "Invalid Key"; - case GD_STORE_VALUE_NULL: - return "Invalid Value"; - case GD_STORE_KEY_VALUE_NULL: - return "Invalid Key and Value"; - case GD_STORE_EOF: - return "No data"; - case GD_STORE_ENOMEM: - return "No memory"; - default: - return "Invalid errno"; - } - return "Invalid errno"; -} int32_t glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo) { - int32_t ret = 0; glusterd_brickinfo_t *brickinfo = NULL; - glusterd_store_iter_t *iter = NULL; + gf_store_iter_t *iter = NULL; char *key = NULL; char *value = NULL; char brickdir[PATH_MAX] = {0,}; @@ -2039,20 +1895,19 @@ glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo) glusterd_conf_t *priv = NULL; int32_t brick_count = 0; char tmpkey[4096] = {0,}; - glusterd_store_iter_t *tmpiter = NULL; + gf_store_iter_t *tmpiter = NULL; char *tmpvalue = NULL; - struct pmap_registry *pmap = NULL; - glusterd_store_op_errno_t op_errno = GD_STORE_SUCCESS; + struct pmap_registry *pmap = NULL; + gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; GF_ASSERT (volinfo); GF_ASSERT (volinfo->volname); priv = THIS->private; - GLUSTERD_GET_BRICK_DIR (brickdir, volinfo, priv) - - ret = glusterd_store_iter_new (volinfo->shandle, &tmpiter); + GLUSTERD_GET_BRICK_DIR (brickdir, volinfo, priv); + ret = gf_store_iter_new (volinfo->shandle, &tmpiter); if (ret) goto out; @@ -2063,30 +1918,28 @@ glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo) goto out; snprintf (tmpkey, sizeof (tmpkey), "%s-%d", GLUSTERD_STORE_KEY_VOL_BRICK,brick_count); - ret = glusterd_store_iter_get_matching (tmpiter, tmpkey, - &tmpvalue); + ret = gf_store_iter_get_matching (tmpiter, tmpkey, &tmpvalue); snprintf (path, sizeof (path), "%s/%s", brickdir, tmpvalue); GF_FREE (tmpvalue); tmpvalue = NULL; - ret = glusterd_store_handle_retrieve (path, &brickinfo->shandle); + ret = gf_store_handle_retrieve (path, &brickinfo->shandle); if (ret) goto out; - ret = glusterd_store_iter_new (brickinfo->shandle, &iter); + ret = gf_store_iter_new (brickinfo->shandle, &iter); if (ret) goto out; - ret = glusterd_store_iter_get_next (iter, &key, &value, - &op_errno); + ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); if (ret) { gf_log ("glusterd", GF_LOG_ERROR, "Unable to iterate " "the store for brick: %s, reason: %s", path, - glusterd_store_strerror (op_errno)); + gf_store_strerror (op_errno)); goto out; } while (!ret) { @@ -2101,7 +1954,7 @@ glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo) strlen (GLUSTERD_STORE_KEY_BRICK_PORT))) { gf_string2int (value, &brickinfo->port); - if (brickinfo->port < GF_IANA_PRIV_PORTS_START){ + if (brickinfo->port < priv->base_port) { /* This is required to adhere to the IANA standards */ brickinfo->port = 0; @@ -2117,8 +1970,7 @@ glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo) strlen (GLUSTERD_STORE_KEY_BRICK_RDMA_PORT))) { gf_string2int (value, &brickinfo->rdma_port); - if (brickinfo->rdma_port < - GF_IANA_PRIV_PORTS_START){ + if (brickinfo->rdma_port < priv->base_port) { /* This is required to adhere to the IANA standards */ brickinfo->rdma_port = 0; @@ -2135,6 +1987,18 @@ glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo) } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED, strlen (GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED))) { gf_string2int (value, &brickinfo->decommissioned); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH, + strlen (GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH))) { + strncpy (brickinfo->device_path, value, + sizeof (brickinfo->device_path)); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS, + strlen (GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS))) { + gf_string2int (value, &brickinfo->snap_status); + } else if (!strncmp (key, + GLUSTERD_STORE_KEY_BRICK_VGNAME, + strlen (GLUSTERD_STORE_KEY_BRICK_VGNAME))) { + strncpy (brickinfo->vg, value, + sizeof (brickinfo->vg)); } else { gf_log ("", GF_LOG_ERROR, "Unknown key: %s", key); @@ -2145,13 +2009,13 @@ glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo) key = NULL; value = NULL; - ret = glusterd_store_iter_get_next (iter, &key, &value, - &op_errno); + ret = gf_store_iter_get_next (iter, &key, &value, + &op_errno); } if (op_errno != GD_STORE_EOF) goto out; - ret = glusterd_store_iter_destroy (iter); + ret = gf_store_iter_destroy (iter); if (ret) goto out; @@ -2160,7 +2024,7 @@ glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo) brick_count++; } - ret = glusterd_store_iter_destroy (tmpiter); + ret = gf_store_iter_destroy (tmpiter); if (ret) goto out; out: @@ -2171,42 +2035,39 @@ out: int32_t -glusterd_store_retrieve_rbstate (char *volname) +glusterd_store_retrieve_rbstate (glusterd_volinfo_t *volinfo) { int32_t ret = -1; - glusterd_volinfo_t *volinfo = NULL; - glusterd_store_iter_t *iter = NULL; + gf_store_iter_t *iter = NULL; char *key = NULL; char *value = NULL; char volpath[PATH_MAX] = {0,}; glusterd_conf_t *priv = NULL; char path[PATH_MAX] = {0,}; - glusterd_store_op_errno_t op_errno = GD_STORE_SUCCESS; - - priv = THIS->private; + gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; + xlator_t *this = NULL; - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, "Couldn't get" - "volinfo for %s.", volname); - goto out; - } + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (volinfo); GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, priv); snprintf (path, sizeof (path), "%s/%s", volpath, GLUSTERD_VOLUME_RBSTATE_FILE); - ret = glusterd_store_handle_retrieve (path, &volinfo->rb_shandle); + ret = gf_store_handle_retrieve (path, &volinfo->rb_shandle); if (ret) goto out; - ret = glusterd_store_iter_new (volinfo->rb_shandle, &iter); + ret = gf_store_iter_new (volinfo->rb_shandle, &iter); if (ret) goto out; - ret = glusterd_store_iter_get_next (iter, &key, &value, &op_errno); + ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); if (ret) goto out; @@ -2255,61 +2116,56 @@ glusterd_store_retrieve_rbstate (char *volname) key = NULL; value = NULL; - ret = glusterd_store_iter_get_next (iter, &key, &value, - &op_errno); + ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); } if (op_errno != GD_STORE_EOF) goto out; - ret = glusterd_store_iter_destroy (iter); + ret = gf_store_iter_destroy (iter); if (ret) goto out; out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); return ret; } int32_t -glusterd_store_retrieve_node_state (char *volname) +glusterd_store_retrieve_node_state (glusterd_volinfo_t *volinfo) { int32_t ret = -1; - glusterd_volinfo_t *volinfo = NULL; - glusterd_store_iter_t *iter = NULL; + gf_store_iter_t *iter = NULL; char *key = NULL; char *value = NULL; char volpath[PATH_MAX] = {0,}; glusterd_conf_t *priv = NULL; char path[PATH_MAX] = {0,}; - glusterd_store_op_errno_t op_errno = GD_STORE_SUCCESS; - - priv = THIS->private; + gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; + xlator_t *this = NULL; - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, "Couldn't get" - "volinfo for %s.", volname); - goto out; - } + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (volinfo); GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, priv); snprintf (path, sizeof (path), "%s/%s", volpath, GLUSTERD_NODE_STATE_FILE); - ret = glusterd_store_handle_retrieve (path, - &volinfo->node_state_shandle); + ret = gf_store_handle_retrieve (path, &volinfo->node_state_shandle); if (ret) goto out; - ret = glusterd_store_iter_new (volinfo->node_state_shandle, &iter); + ret = gf_store_iter_new (volinfo->node_state_shandle, &iter); if (ret) goto out; - ret = glusterd_store_iter_get_next (iter, &key, &value, &op_errno); + ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); if (ret) goto out; @@ -2334,66 +2190,70 @@ glusterd_store_retrieve_node_state (char *volname) key = NULL; value = NULL; - ret = glusterd_store_iter_get_next (iter, &key, &value, - &op_errno); + ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); } if (op_errno != GD_STORE_EOF) goto out; - ret = glusterd_store_iter_destroy (iter); + ret = gf_store_iter_destroy (iter); if (ret) goto out; out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); return ret; } -int32_t -glusterd_store_retrieve_volume (char *volname) -{ - int32_t ret = -1; - glusterd_volinfo_t *volinfo = NULL; - glusterd_store_iter_t *iter = NULL; - char *key = NULL; - char *value = NULL; - char volpath[PATH_MAX] = {0,}; - glusterd_conf_t *priv = NULL; - char path[PATH_MAX] = {0,}; - int exists = 0; - glusterd_store_op_errno_t op_errno = GD_STORE_SUCCESS; - ret = glusterd_volinfo_new (&volinfo); - - if (ret) - goto out; +int +glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo) +{ + int ret = -1; + int exists = 0; + char *key = NULL; + char *value = NULL; + char volpath[PATH_MAX] = {0,}; + char path[PATH_MAX] = {0,}; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + gf_store_iter_t *iter = NULL; + gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; - strncpy (volinfo->volname, volname, GLUSTERD_MAX_VOLUME_NAME); + this = THIS; + GF_ASSERT (this); + conf = THIS->private; + GF_ASSERT (volinfo); - priv = THIS->private; + GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, conf); - GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, priv); snprintf (path, sizeof (path), "%s/%s", volpath, GLUSTERD_VOLUME_INFO_FILE); - ret = glusterd_store_handle_retrieve (path, &volinfo->shandle); - - if (ret) + ret = gf_store_handle_retrieve (path, &volinfo->shandle); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "volinfo handle is NULL"); goto out; + } - ret = glusterd_store_iter_new (volinfo->shandle, &iter); - - if (ret) + ret = gf_store_iter_new (volinfo->shandle, &iter); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get new store " + "iter"); goto out; + } - ret = glusterd_store_iter_get_next (iter, &key, &value, &op_errno); - if (ret) + ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get next store " + "iter"); goto out; + } while (!ret) { + gf_log ("", GF_LOG_DEBUG, "key = %s value = %s", key, value); if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_TYPE, strlen (GLUSTERD_STORE_KEY_VOL_TYPE))) { volinfo->type = atoi (value); @@ -2453,9 +2313,24 @@ glusterd_store_retrieve_volume (char *volname) gf_log ("", GF_LOG_DEBUG, "Parsed as "GEOREP" " " slave:key=%s,value:%s", key, value); - } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_BACKEND, - strlen (GLUSTERD_STORE_KEY_VOL_BACKEND))) { - volinfo->backend = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_OP_VERSION, + strlen (GLUSTERD_STORE_KEY_VOL_OP_VERSION))) { + volinfo->op_version = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION, + strlen (GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION))) { + volinfo->client_op_version = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_CAPS, + strlen (GLUSTERD_STORE_KEY_VOL_CAPS))) { + volinfo->caps = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, + strlen (GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT))) { + volinfo->snap_max_hard_limit = (uint64_t) atoll (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_IS_RESTORED, + strlen (GLUSTERD_STORE_KEY_VOL_IS_RESTORED))) { + volinfo->is_volume_restored = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_PARENT_VOLNAME, + strlen (GLUSTERD_STORE_KEY_PARENT_VOLNAME))) { + strncpy (volinfo->parent_volname, value, sizeof(volinfo->parent_volname) - 1); } else { if (is_key_glusterd_hooks_friendly (key)) { @@ -2495,8 +2370,7 @@ glusterd_store_retrieve_volume (char *volname) key = NULL; value = NULL; - ret = glusterd_store_iter_get_next (iter, &key, &value, - &op_errno); + ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); } /* backward compatibility */ @@ -2535,16 +2409,59 @@ glusterd_store_retrieve_volume (char *volname) volinfo->subvol_count = (volinfo->brick_count / volinfo->dist_leaf_count); + /* Only calculate volume op-versions if they are not found */ + if (!volinfo->op_version && !volinfo->client_op_version) + gd_update_volume_op_versions (volinfo); } if (op_errno != GD_STORE_EOF) goto out; - ret = glusterd_store_iter_destroy (iter); + ret = gf_store_iter_destroy (iter); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to destroy store " + "iter"); + goto out; + } + + ret = 0; +out: + return ret; +} + +glusterd_volinfo_t* +glusterd_store_retrieve_volume (char *volname, glusterd_snap_t *snap) +{ + int32_t ret = -1; + glusterd_volinfo_t *volinfo = NULL; + glusterd_volinfo_t *origin_volinfo = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (volname); + + ret = glusterd_volinfo_new (&volinfo); if (ret) goto out; + priv = THIS->private; + + strncpy (volinfo->volname, volname, GLUSTERD_MAX_VOLUME_NAME); + volinfo->snapshot = snap; + if (snap) + volinfo->is_snap_volume = _gf_true; + + ret = glusterd_store_update_volinfo (volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to update volinfo " + "for %s volume", volname); + goto out; + } + ret = glusterd_store_retrieve_bricks (volinfo); if (ret) goto out; @@ -2553,12 +2470,28 @@ glusterd_store_retrieve_volume (char *volname) if (ret) goto out; - list_add_tail (&volinfo->vol_list, &priv->volumes); - + if (!snap) { + list_add_tail (&volinfo->vol_list, &priv->volumes); + } else { + ret = glusterd_volinfo_find (volinfo->parent_volname, + &origin_volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Parent volinfo " + "not found for %s volume", volname); + goto out; + } + glusterd_list_add_snapvol (origin_volinfo, volinfo); + } out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + if (ret) { + if (volinfo) + glusterd_volinfo_delete (volinfo); + volinfo = NULL; + } - return ret; + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); + + return volinfo; } inline void @@ -2570,16 +2503,16 @@ glusterd_store_set_options_path (glusterd_conf_t *conf, char *path, size_t len) int _store_global_opts (dict_t *this, char *key, data_t *value, void *data) { - glusterd_store_handle_t *shandle = data; + gf_store_handle_t *shandle = data; - glusterd_store_save_value (shandle->fd, key, (char*)value->data); + gf_store_save_value (shandle->fd, key, (char*)value->data); return 0; } int32_t glusterd_store_options (xlator_t *this, dict_t *opts) { - glusterd_store_handle_t *shandle = NULL; + gf_store_handle_t *shandle = NULL; glusterd_conf_t *conf = NULL; char path[PATH_MAX] = {0}; int fd = -1; @@ -2588,11 +2521,11 @@ glusterd_store_options (xlator_t *this, dict_t *opts) conf = this->private; glusterd_store_set_options_path (conf, path, sizeof (path)); - ret = glusterd_store_handle_new (path, &shandle); + ret = gf_store_handle_new (path, &shandle); if (ret) goto out; - fd = glusterd_store_mkstemp (shandle); + fd = gf_store_mkstemp (shandle); if (fd <= 0) { ret = -1; goto out; @@ -2601,11 +2534,11 @@ glusterd_store_options (xlator_t *this, dict_t *opts) shandle->fd = fd; dict_foreach (opts, _store_global_opts, shandle); shandle->fd = 0; - ret = glusterd_store_rename_tmppath (shandle); + ret = gf_store_rename_tmppath (shandle); if (ret) goto out; out: - glusterd_store_handle_destroy (shandle); + gf_store_handle_destroy (shandle); if (fd >=0 ) close (fd); return ret; @@ -2616,25 +2549,25 @@ glusterd_store_retrieve_options (xlator_t *this) { char path[PATH_MAX] = {0}; glusterd_conf_t *conf = NULL; - glusterd_store_handle_t *shandle = NULL; - glusterd_store_iter_t *iter = NULL; + gf_store_handle_t *shandle = NULL; + gf_store_iter_t *iter = NULL; char *key = NULL; char *value = NULL; - glusterd_store_op_errno_t op_errno = 0; + gf_store_op_errno_t op_errno = 0; int ret = -1; conf = this->private; glusterd_store_set_options_path (conf, path, sizeof (path)); - ret = glusterd_store_handle_retrieve (path, &shandle); + ret = gf_store_handle_retrieve (path, &shandle); if (ret) goto out; - ret = glusterd_store_iter_new (shandle, &iter); + ret = gf_store_iter_new (shandle, &iter); if (ret) goto out; - ret = glusterd_store_iter_get_next (iter, &key, &value, &op_errno); + ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); while (!ret) { ret = dict_set_dynstr (conf->opts, key, value); if (ret) { @@ -2646,22 +2579,21 @@ glusterd_store_retrieve_options (xlator_t *this) key = NULL; value = NULL; - ret = glusterd_store_iter_get_next (iter, &key, &value, - &op_errno); + ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); } if (op_errno != GD_STORE_EOF) goto out; ret = 0; out: - glusterd_store_iter_destroy (iter); - glusterd_store_handle_destroy (shandle); + gf_store_iter_destroy (iter); + gf_store_handle_destroy (shandle); return ret; } int32_t -glusterd_store_retrieve_volumes (xlator_t *this) +glusterd_store_retrieve_volumes (xlator_t *this, glusterd_snap_t *snap) { - int32_t ret = 0; + int32_t ret = -1; char path[PATH_MAX] = {0,}; glusterd_conf_t *priv = NULL; DIR *dir = NULL; @@ -2673,51 +2605,58 @@ glusterd_store_retrieve_volumes (xlator_t *this) GF_ASSERT (priv); - snprintf (path, PATH_MAX, "%s/%s", priv->workdir, - GLUSTERD_VOLUME_DIR_PREFIX); + if (snap) + snprintf (path, PATH_MAX, "%s/snaps/%s", priv->workdir, + snap->snapname); + else + snprintf (path, PATH_MAX, "%s/%s", priv->workdir, + GLUSTERD_VOLUME_DIR_PREFIX); dir = opendir (path); if (!dir) { gf_log ("", GF_LOG_ERROR, "Unable to open dir %s", path); - ret = -1; goto out; } glusterd_for_each_entry (entry, dir); while (entry) { - ret = glusterd_store_retrieve_volume (entry->d_name); - if (ret) { + if ( entry->d_type != DT_DIR ) + goto next; + + volinfo = glusterd_store_retrieve_volume (entry->d_name, snap); + if (!volinfo) { gf_log ("", GF_LOG_ERROR, "Unable to restore " "volume: %s", entry->d_name); + ret = -1; goto out; } - ret = glusterd_store_retrieve_rbstate (entry->d_name); + ret = glusterd_store_retrieve_rbstate (volinfo); if (ret) { /* Backward compatibility */ gf_log ("", GF_LOG_INFO, "Creating a new rbstate " "for volume: %s.", entry->d_name); - ret = glusterd_volinfo_find (entry->d_name, &volinfo); ret = glusterd_store_create_rbstate_shandle_on_absence (volinfo); ret = glusterd_store_perform_rbstate_store (volinfo); } - ret = glusterd_store_retrieve_node_state (entry->d_name); + ret = glusterd_store_retrieve_node_state (volinfo); if (ret) { /* Backward compatibility */ gf_log ("", GF_LOG_INFO, "Creating a new node_state " "for volume: %s.", entry->d_name); - ret = glusterd_volinfo_find (entry->d_name, &volinfo); - ret = glusterd_store_create_nodestate_sh_on_absence (volinfo); ret = glusterd_store_perform_node_state_store (volinfo); } + +next: glusterd_for_each_entry (entry, dir); } + ret = 0; out: if (dir) closedir (dir); @@ -2727,6 +2666,482 @@ out: } int32_t +glusterd_resolve_snap_bricks (xlator_t *this, glusterd_snap_t *snap) +{ + int32_t ret = -1; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + + GF_ASSERT (this); + GF_VALIDATE_OR_GOTO (this->name, snap, out); + + list_for_each_entry (volinfo, &snap->volumes, vol_list) { + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + ret = glusterd_resolve_brick (brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "resolve brick failed in restore"); + goto out; + } + } + } + + ret = 0; + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); + + return ret; +} + +int +glusterd_store_update_snap (glusterd_snap_t *snap) +{ + int ret = -1; + char *key = NULL; + char *value = NULL; + char snappath[PATH_MAX] = {0,}; + char path[PATH_MAX] = {0,}; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + gf_store_iter_t *iter = NULL; + gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; + + this = THIS; + conf = this->private; + GF_ASSERT (snap); + + GLUSTERD_GET_SNAP_DIR (snappath, snap, conf); + + snprintf (path, sizeof (path), "%s/%s", snappath, + GLUSTERD_SNAP_INFO_FILE); + + ret = gf_store_handle_retrieve (path, &snap->shandle); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "snap handle is NULL"); + goto out; + } + + ret = gf_store_iter_new (snap->shandle, &iter); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get new store " + "iter"); + goto out; + } + + ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get next store " + "iter"); + goto out; + } + + while (!ret) { + gf_log (this->name, GF_LOG_DEBUG, "key = %s value = %s", + key, value); + + if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_ID, + strlen (GLUSTERD_STORE_KEY_SNAP_ID))) { + ret = uuid_parse (value, snap->snap_id); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "Failed to parse uuid"); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_RESTORED, + strlen (GLUSTERD_STORE_KEY_SNAP_RESTORED))) { + snap->snap_restored = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_STATUS, + strlen (GLUSTERD_STORE_KEY_SNAP_STATUS))) { + snap->snap_status = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_DESC, + strlen (GLUSTERD_STORE_KEY_SNAP_DESC))) { + snap->description = gf_strdup (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_TIMESTAMP, + strlen (GLUSTERD_STORE_KEY_SNAP_TIMESTAMP))) { + snap->time_stamp = atoi (value); + } + + GF_FREE (key); + GF_FREE (value); + key = NULL; + value = NULL; + + ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); + } + + if (op_errno != GD_STORE_EOF) + goto out; + + ret = gf_store_iter_destroy (iter); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to destroy store " + "iter"); + } + +out: + return ret; +} + +int32_t +glusterd_store_retrieve_snap (char *snapname) +{ + int32_t ret = -1; + dict_t *dict = NULL; + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (snapname); + + dict = dict_new(); + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create dict"); + ret = -1; + goto out; + } + + snap = glusterd_new_snap_object (); + if (!snap) { + gf_log (this->name, GF_LOG_ERROR, "Failed to create " + " snap object"); + goto out; + } + + strncpy (snap->snapname, snapname, strlen(snapname)); + ret = glusterd_store_update_snap (snap); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to update snapshot " + "for %s snap", snapname); + goto out; + } + + ret = glusterd_store_retrieve_volumes (this, snap); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to retrieve " + "snap volumes for snap %s", snapname); + goto out; + } + + /* Unlike bricks of normal volumes which are resolved at the end of + the glusterd restore, the bricks belonging to the snap volumes of + each snap should be resolved as part of snapshot restore itself. + Because if the snapshot has to be removed, then resolving bricks + helps glusterd in understanding what all bricks have its own uuid + and killing those bricks. + */ + ret = glusterd_resolve_snap_bricks (this, snap); + if (ret) + gf_log (this->name, GF_LOG_WARNING, "resolving the snap bricks" + " failed (snap: %s)", snap?snap->snapname:""); + + /* When the snapshot command from cli is received, the on disk and + in memory structures for the snapshot are created (with the status) + being marked as GD_SNAP_STATUS_INIT. Once the backend snapshot is + taken, the status is changed to GD_SNAP_STATUS_IN_USE. If glusterd + dies after taking the backend snapshot, but before updating the + status, then when glusterd comes up, it should treat that snapshot + as a failed snapshot and clean it up. + */ + if (snap->snap_status != GD_SNAP_STATUS_IN_USE) { + ret = glusterd_snap_remove (dict, snap, _gf_true, _gf_true); + if (ret) + gf_log (this->name, GF_LOG_WARNING, "failed to remove" + " the snapshot %s", snap->snapname); + goto out; + } + + /* TODO: list_add_order can do 'N-square' comparisions and + is not efficient. Find a better solution to store the snap + in order */ + list_add_order (&snap->snap_list, &priv->snapshots, + glusterd_compare_snap_time); + +out: + if (dict) + dict_unref (dict); + + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); + return ret; +} + +/* Read the missed_snap_list and update the in-memory structs */ +int32_t +glusterd_store_retrieve_missed_snaps_list (xlator_t *this) +{ + char buf[PATH_MAX] = ""; + char path[PATH_MAX] = ""; + char *missed_node_info = NULL; + char *brick_path = NULL; + char *value = NULL; + char *save_ptr = NULL; + FILE *fp = NULL; + int32_t brick_num = -1; + int32_t snap_op = -1; + int32_t snap_status = -1; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + gf_store_op_errno_t store_errno = GD_STORE_SUCCESS; + + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + /* Get the path of the missed_snap_list */ + glusterd_store_missed_snaps_list_path_set (path, sizeof(path)); + + fp = fopen (path, "r"); + if (!fp) { + /* If errno is ENOENT then there are no missed snaps yet */ + if (errno != ENOENT) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Failed to open %s. " + "Error: %s", path, strerror(errno)); + } else { + gf_log (this->name, GF_LOG_INFO, + "No missed snaps list."); + ret = 0; + } + goto out; + } + + do { + ret = gf_store_read_and_tokenize (fp, buf, + &missed_node_info, &value, + &store_errno); + if (ret) { + if (store_errno == GD_STORE_EOF) { + gf_log (this->name, + GF_LOG_DEBUG, + "EOF for missed_snap_list"); + ret = 0; + break; + } + gf_log (this->name, GF_LOG_ERROR, + "Failed to fetch data from " + "missed_snaps_list. Error: %s", + gf_store_strerror (store_errno)); + goto out; + } + + /* Fetch the brick_num, brick_path, snap_op and snap status */ + brick_num = atoi(strtok_r (value, ":", &save_ptr)); + brick_path = strtok_r (NULL, ":", &save_ptr); + snap_op = atoi(strtok_r (NULL, ":", &save_ptr)); + snap_status = atoi(strtok_r (NULL, ":", &save_ptr)); + + if (!missed_node_info || !brick_path || + brick_num < 1 || snap_op < 1 || + snap_status < 1) { + gf_log (this->name, GF_LOG_ERROR, + "Invalid missed_snap_entry"); + ret = -1; + goto out; + } + + ret = glusterd_store_missed_snaps_list (missed_node_info, + brick_num, + brick_path, + snap_op, + snap_status); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to store missed snaps_list"); + goto out; + } + + } while (store_errno == GD_STORE_SUCCESS); + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); + return ret; +} + +int32_t +glusterd_store_retrieve_snaps (xlator_t *this) +{ + int32_t ret = 0; + char path[PATH_MAX] = {0,}; + glusterd_conf_t *priv = NULL; + DIR *dir = NULL; + struct dirent *entry = NULL; + + GF_ASSERT (this); + priv = this->private; + + GF_ASSERT (priv); + + snprintf (path, PATH_MAX, "%s/snaps", priv->workdir); + + dir = opendir (path); + + if (!dir) { + /* If snaps dir doesn't exists ignore the error for + backward compatibility */ + if (errno != ENOENT) { + ret = -1; + gf_log ("", GF_LOG_ERROR, "Unable to open dir %s", path); + } + goto out; + } + + glusterd_for_each_entry (entry, dir); + + while (entry) { + if (entry->d_type == DT_DIR) { + ret = glusterd_store_retrieve_snap (entry->d_name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to restore snapshot: %s", + entry->d_name); + goto out; + } + } + + glusterd_for_each_entry (entry, dir); + } + + /* Retrieve missed_snaps_list */ + ret = glusterd_store_retrieve_missed_snaps_list (this); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Failed to retrieve missed_snaps_list"); + goto out; + } + +out: + if (dir) + closedir (dir); + gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + + return ret; +} + +/* Writes all the contents of conf->missed_snap_list */ +int32_t +glusterd_store_write_missed_snapinfo (int32_t fd) +{ + char value[PATH_MAX] = ""; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + glusterd_missed_snap_info *missed_snapinfo = NULL; + glusterd_snap_op_t *snap_opinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT (priv); + + /* Write the missed_snap_entry */ + list_for_each_entry (missed_snapinfo, &priv->missed_snaps_list, + missed_snaps) { + list_for_each_entry (snap_opinfo, + &missed_snapinfo->snap_ops, + snap_ops_list) { + snprintf (value, sizeof(value), "%d:%s:%d:%d", + snap_opinfo->brick_num, + snap_opinfo->brick_path, + snap_opinfo->op, snap_opinfo->status); + ret = gf_store_save_value + (fd, + missed_snapinfo->node_snap_info, + value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to write missed snapinfo"); + goto out; + } + } + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Adds the missed snap entries to the in-memory conf->missed_snap_list * + * and writes them to disk */ +int32_t +glusterd_store_update_missed_snaps (dict_t *dict, int32_t missed_snap_count) +{ + int32_t fd = -1; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + + priv = this->private; + GF_ASSERT (priv); + + if (missed_snap_count < 1) { + gf_log (this->name, GF_LOG_DEBUG, "No missed snaps"); + ret = 0; + goto out; + } + + ret = glusterd_store_create_missed_snaps_list_shandle_on_absence (); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to obtain " + "missed_snaps_list store handle."); + goto out; + } + + fd = gf_store_mkstemp (priv->missed_snaps_list_shandle); + if (fd <= 0) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create tmp file"); + ret = -1; + goto out; + } + + ret = glusterd_add_missed_snaps_to_list (dict, missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snaps to list"); + goto out; + } + + ret = glusterd_store_write_missed_snapinfo (fd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to write missed snaps to disk"); + goto out; + } + + ret = gf_store_rename_tmppath (priv->missed_snaps_list_shandle); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to rename the tmp file"); + goto out; + } +out: + if (ret && (fd > 0)) { + ret = gf_store_unlink_tmppath (priv->missed_snaps_list_shandle); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to unlink the tmp file"); + } + ret = -1; + } + + if (fd > 0) + close (fd); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t glusterd_store_delete_peerinfo (glusterd_peerinfo_t *peerinfo) { int32_t ret = -1; @@ -2774,7 +3189,7 @@ glusterd_store_delete_peerinfo (glusterd_peerinfo_t *peerinfo) out: if (peerinfo->shandle) { - glusterd_store_handle_destroy (peerinfo->shandle); + gf_store_handle_destroy (peerinfo->shandle); peerinfo->shandle = NULL; } gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); @@ -2800,7 +3215,7 @@ glusterd_store_create_peer_dir () char path[PATH_MAX]; glusterd_store_peerinfo_dirpath_set (path, sizeof (path)); - ret = glusterd_store_mkdir (path); + ret = gf_store_mkdir (path); gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); return ret; @@ -2844,8 +3259,8 @@ glusterd_store_peerinfo_hostname_shandle_create (glusterd_peerinfo_t *peerinfo) glusterd_store_hostname_peerpath_set (peerinfo, peerfpath, sizeof (peerfpath)); - ret = glusterd_store_handle_create_on_absence (&peerinfo->shandle, - peerfpath); + ret = gf_store_handle_create_on_absence (&peerinfo->shandle, + peerfpath); return ret; } @@ -2857,8 +3272,8 @@ glusterd_store_peerinfo_uuid_shandle_create (glusterd_peerinfo_t *peerinfo) glusterd_store_uuid_peerpath_set (peerinfo, peerfpath, sizeof (peerfpath)); - ret = glusterd_store_handle_create_on_absence (&peerinfo->shandle, - peerfpath); + ret = gf_store_handle_create_on_absence (&peerinfo->shandle, + peerfpath); return ret; } @@ -2874,7 +3289,7 @@ glusterd_peerinfo_hostname_shandle_check_destroy (glusterd_peerinfo_t *peerinfo) ret = stat (peerfpath, &stbuf); if (!ret) { if (peerinfo->shandle) - glusterd_store_handle_destroy (peerinfo->shandle); + gf_store_handle_destroy (peerinfo->shandle); peerinfo->shandle = NULL; ret = unlink (peerfpath); } @@ -2903,18 +3318,18 @@ glusterd_store_peer_write (int fd, glusterd_peerinfo_t *peerinfo) char buf[50] = {0}; int32_t ret = 0; - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_PEER_UUID, - uuid_utoa (peerinfo->uuid)); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_PEER_UUID, + uuid_utoa (peerinfo->uuid)); if (ret) goto out; snprintf (buf, sizeof (buf), "%d", peerinfo->state.state); - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_PEER_STATE, buf); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_PEER_STATE, buf); if (ret) goto out; - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_PEER_HOSTNAME "1", - peerinfo->hostname); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_PEER_HOSTNAME "1", + peerinfo->hostname); if (ret) goto out; @@ -2931,7 +3346,7 @@ glusterd_store_perform_peer_store (glusterd_peerinfo_t *peerinfo) GF_ASSERT (peerinfo); - fd = glusterd_store_mkstemp (peerinfo->shandle); + fd = gf_store_mkstemp (peerinfo->shandle); if (fd <= 0) { ret = -1; goto out; @@ -2941,10 +3356,10 @@ glusterd_store_perform_peer_store (glusterd_peerinfo_t *peerinfo) if (ret) goto out; - ret = glusterd_store_rename_tmppath (peerinfo->shandle); + ret = gf_store_rename_tmppath (peerinfo->shandle); out: if (ret && (fd > 0)) - glusterd_store_unlink_tmppath (peerinfo->shandle); + gf_store_unlink_tmppath (peerinfo->shandle); if (fd > 0) close (fd); gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); @@ -2984,13 +3399,13 @@ glusterd_store_retrieve_peers (xlator_t *this) uuid_t uuid = {0,}; char *hostname = NULL; int32_t state = 0; - glusterd_store_handle_t *shandle = NULL; + gf_store_handle_t *shandle = NULL; char filepath[PATH_MAX] = {0,}; - glusterd_store_iter_t *iter = NULL; + gf_store_iter_t *iter = NULL; char *key = NULL; char *value = NULL; glusterd_peerctx_args_t args = {0}; - glusterd_store_op_errno_t op_errno = GD_STORE_SUCCESS; + gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; GF_ASSERT (this); priv = this->private; @@ -3012,16 +3427,15 @@ glusterd_store_retrieve_peers (xlator_t *this) while (entry) { snprintf (filepath, PATH_MAX, "%s/%s", path, entry->d_name); - ret = glusterd_store_handle_retrieve (filepath, &shandle); + ret = gf_store_handle_retrieve (filepath, &shandle); if (ret) goto out; - ret = glusterd_store_iter_new (shandle, &iter); + ret = gf_store_iter_new (shandle, &iter); if (ret) goto out; - ret = glusterd_store_iter_get_next (iter, &key, &value, - &op_errno); + ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); if (ret) goto out; @@ -3049,13 +3463,13 @@ glusterd_store_retrieve_peers (xlator_t *this) key = NULL; value = NULL; - ret = glusterd_store_iter_get_next (iter, &key, &value, - &op_errno); + ret = gf_store_iter_get_next (iter, &key, &value, + &op_errno); } if (op_errno != GD_STORE_EOF) goto out; - (void) glusterd_store_iter_destroy (iter); + (void) gf_store_iter_destroy (iter); ret = glusterd_friend_add (hostname, 0, state, &uuid, &peerinfo, 1, NULL); @@ -3128,7 +3542,11 @@ glusterd_restore () goto out; } - ret = glusterd_store_retrieve_volumes (this); + ret = glusterd_store_retrieve_volumes (this, NULL); + if (ret) + goto out; + + ret = glusterd_store_retrieve_snaps (this); if (ret) goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index 68977dd9c..1b5cebc0c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -35,38 +35,55 @@ typedef enum glusterd_store_ver_ac_{ } glusterd_volinfo_ver_ac_t; -#define GLUSTERD_STORE_UUID_KEY "UUID" - -#define GLUSTERD_STORE_KEY_VOL_TYPE "type" -#define GLUSTERD_STORE_KEY_VOL_COUNT "count" -#define GLUSTERD_STORE_KEY_VOL_STATUS "status" -#define GLUSTERD_STORE_KEY_VOL_PORT "port" -#define GLUSTERD_STORE_KEY_VOL_SUB_COUNT "sub_count" -#define GLUSTERD_STORE_KEY_VOL_STRIPE_CNT "stripe_count" -#define GLUSTERD_STORE_KEY_VOL_REPLICA_CNT "replica_count" -#define GLUSTERD_STORE_KEY_VOL_BRICK "brick" -#define GLUSTERD_STORE_KEY_VOL_VERSION "version" -#define GLUSTERD_STORE_KEY_VOL_TRANSPORT "transport-type" -#define GLUSTERD_STORE_KEY_VOL_ID "volume-id" -#define GLUSTERD_STORE_KEY_RB_STATUS "rb_status" -#define GLUSTERD_STORE_KEY_RB_SRC_BRICK "rb_src" -#define GLUSTERD_STORE_KEY_RB_DST_BRICK "rb_dst" -#define GLUSTERD_STORE_KEY_RB_DST_PORT "rb_port" -#define GLUSTERD_STORE_KEY_VOL_DEFRAG "rebalance_status" -#define GLUSTERD_STORE_KEY_DEFRAG_OP "rebalance_op" -#define GLUSTERD_STORE_KEY_USERNAME "username" -#define GLUSTERD_STORE_KEY_PASSWORD "password" - -#define GLUSTERD_STORE_KEY_BRICK_HOSTNAME "hostname" -#define GLUSTERD_STORE_KEY_BRICK_PATH "path" -#define GLUSTERD_STORE_KEY_BRICK_PORT "listen-port" -#define GLUSTERD_STORE_KEY_BRICK_RDMA_PORT "rdma.listen-port" +#define GLUSTERD_STORE_UUID_KEY "UUID" + +#define GLUSTERD_STORE_KEY_VOL_TYPE "type" +#define GLUSTERD_STORE_KEY_VOL_COUNT "count" +#define GLUSTERD_STORE_KEY_VOL_STATUS "status" +#define GLUSTERD_STORE_KEY_VOL_PORT "port" +#define GLUSTERD_STORE_KEY_VOL_SUB_COUNT "sub_count" +#define GLUSTERD_STORE_KEY_VOL_STRIPE_CNT "stripe_count" +#define GLUSTERD_STORE_KEY_VOL_REPLICA_CNT "replica_count" +#define GLUSTERD_STORE_KEY_VOL_BRICK "brick" +#define GLUSTERD_STORE_KEY_VOL_VERSION "version" +#define GLUSTERD_STORE_KEY_VOL_TRANSPORT "transport-type" +#define GLUSTERD_STORE_KEY_VOL_ID "volume-id" +#define GLUSTERD_STORE_KEY_VOL_IS_RESTORED "is-volume-restored" +#define GLUSTERD_STORE_KEY_RB_STATUS "rb_status" +#define GLUSTERD_STORE_KEY_RB_SRC_BRICK "rb_src" +#define GLUSTERD_STORE_KEY_RB_DST_BRICK "rb_dst" +#define GLUSTERD_STORE_KEY_RB_DST_PORT "rb_port" +#define GLUSTERD_STORE_KEY_VOL_DEFRAG "rebalance_status" +#define GLUSTERD_STORE_KEY_DEFRAG_OP "rebalance_op" +#define GLUSTERD_STORE_KEY_USERNAME "username" +#define GLUSTERD_STORE_KEY_PASSWORD "password" +#define GLUSTERD_STORE_KEY_PARENT_VOLNAME "parent_volname" +#define GLUSTERD_STORE_KEY_VOL_OP_VERSION "op-version" +#define GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION "client-op-version" + +#define GLUSTERD_STORE_KEY_SNAP_NAME "name" +#define GLUSTERD_STORE_KEY_SNAP_ID "snap-id" +#define GLUSTERD_STORE_KEY_SNAP_DESC "desc" +#define GLUSTERD_STORE_KEY_SNAP_TIMESTAMP "time-stamp" +#define GLUSTERD_STORE_KEY_SNAP_STATUS "status" +#define GLUSTERD_STORE_KEY_SNAP_RESTORED "snap-restored" +#define GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT "snap-max-hard-limit" +#define GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT "snap-max-soft-limit" + +#define GLUSTERD_STORE_KEY_BRICK_HOSTNAME "hostname" +#define GLUSTERD_STORE_KEY_BRICK_PATH "path" +#define GLUSTERD_STORE_KEY_BRICK_PORT "listen-port" +#define GLUSTERD_STORE_KEY_BRICK_RDMA_PORT "rdma.listen-port" #define GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED "decommissioned" +#define GLUSTERD_STORE_KEY_BRICK_VGNAME "vg" +#define GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH "device_path" +#define GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS "snap-status" -#define GLUSTERD_STORE_KEY_PEER_UUID "uuid" -#define GLUSTERD_STORE_KEY_PEER_HOSTNAME "hostname" -#define GLUSTERD_STORE_KEY_PEER_STATE "state" -#define GLUSTERD_STORE_KEY_VOL_BACKEND "backend" +#define GLUSTERD_STORE_KEY_PEER_UUID "uuid" +#define GLUSTERD_STORE_KEY_PEER_HOSTNAME "hostname" +#define GLUSTERD_STORE_KEY_PEER_STATE "state" + +#define GLUSTERD_STORE_KEY_VOL_CAPS "caps" #define glusterd_for_each_entry(entry, dir) \ do {\ @@ -74,6 +91,7 @@ typedef enum glusterd_store_ver_ac_{ if (dir) {\ entry = readdir (dir);\ while (entry && (!strcmp (entry->d_name, ".") ||\ + !fnmatch ("*.tmp", entry->d_name, 0) ||\ !strcmp (entry->d_name, ".."))) {\ entry = readdir (dir);\ }\ @@ -81,16 +99,6 @@ typedef enum glusterd_store_ver_ac_{ } while (0); \ -typedef enum { - GD_STORE_SUCCESS, - GD_STORE_KEY_NULL, - GD_STORE_VALUE_NULL, - GD_STORE_KEY_VALUE_NULL, - GD_STORE_EOF, - GD_STORE_ENOMEM, - GD_STORE_STAT_FAILED -} glusterd_store_op_errno_t; - int32_t glusterd_store_volinfo (glusterd_volinfo_t *volinfo, glusterd_volinfo_ver_ac_t ac); @@ -98,14 +106,7 @@ int32_t glusterd_store_delete_volume (glusterd_volinfo_t *volinfo); int32_t -glusterd_store_handle_new (char *path, glusterd_store_handle_t **handle); - -int32_t -glusterd_store_save_value (int fd, char *key, char *value); - -int32_t -glusterd_store_retrieve_value (glusterd_store_handle_t *handle, - char *key, char **value); +glusterd_store_delete_snap (glusterd_snap_t *snap); int32_t glusterd_retrieve_uuid (); @@ -117,11 +118,8 @@ int32_t glusterd_store_delete_peerinfo (glusterd_peerinfo_t *peerinfo); int32_t -glusterd_store_delete_brick (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo); - -int32_t -glusterd_store_handle_destroy (glusterd_store_handle_t *handle); +glusterd_store_delete_brick (glusterd_brickinfo_t *brickinfo, + char *delete_path); int32_t glusterd_restore (); @@ -145,5 +143,22 @@ int32_t glusterd_store_retrieve_options (xlator_t *this); int32_t +glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo); + +int32_t glusterd_store_options (xlator_t *this, dict_t *opts); + +void +glusterd_replace_slash_with_hyphen (char *str); + +int32_t +glusterd_store_perform_volume_store (glusterd_volinfo_t *volinfo); + +int32_t +glusterd_store_snap (glusterd_snap_t *snap); + +int32_t +glusterd_store_update_missed_snaps (dict_t *dict, + int32_t missed_snap_count); + #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c index b943d8e94..438df8266 100644 --- a/xlators/mgmt/glusterd/src/glusterd-syncop.c +++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c @@ -17,20 +17,152 @@ #include "glusterd.h" #include "glusterd-op-sm.h" #include "glusterd-utils.h" +#include "glusterd-locks.h" + +extern glusterd_op_info_t opinfo; + +void +gd_synctask_barrier_wait (struct syncargs *args, int count) +{ + glusterd_conf_t *conf = THIS->private; + + synclock_unlock (&conf->big_lock); + synctask_barrier_wait (args, count); + synclock_lock (&conf->big_lock); + + syncbarrier_destroy (&args->barrier); +} static void -gd_collate_errors (struct syncargs *args, int op_ret, int op_errno, - char *op_errstr) +gd_mgmt_v3_collate_errors (struct syncargs *args, int op_ret, int op_errno, + char *op_errstr, int op_code, + glusterd_peerinfo_t *peerinfo, u_char *uuid) { - if (args->op_ret) - return; - args->op_ret = op_ret; - args->op_errno = op_errno; - if (op_ret && op_errstr && strcmp (op_errstr, "")) - args->errstr = gf_strdup (op_errstr); + char err_str[PATH_MAX] = "Please check log file for details."; + char op_err[PATH_MAX] = ""; + int len = -1; + char *peer_str = NULL; + + if (op_ret) { + args->op_ret = op_ret; + args->op_errno = op_errno; + + if (peerinfo) + peer_str = peerinfo->hostname; + else + peer_str = uuid_utoa (uuid); + + if (op_errstr && strcmp (op_errstr, "")) { + len = snprintf (err_str, sizeof(err_str) - 1, + "Error: %s", op_errstr); + err_str[len] = '\0'; + } + + switch (op_code){ + case GLUSTERD_MGMT_V3_LOCK: + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Locking failed " + "on %s. %s", peer_str, err_str); + break; + } + case GLUSTERD_MGMT_V3_UNLOCK: + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Unlocking failed " + "on %s. %s", peer_str, err_str); + break; + } + } + op_err[len] = '\0'; + + if (args->errstr) { + len = snprintf (err_str, sizeof(err_str) - 1, + "%s\n%s", args->errstr, + op_err); + GF_FREE (args->errstr); + args->errstr = NULL; + } else + len = snprintf (err_str, sizeof(err_str) - 1, + "%s", op_err); + err_str[len] = '\0'; + + gf_log ("", GF_LOG_ERROR, "%s", op_err); + args->errstr = gf_strdup (err_str); + } + + return; } static void +gd_collate_errors (struct syncargs *args, int op_ret, int op_errno, + char *op_errstr, int op_code, + glusterd_peerinfo_t *peerinfo, u_char *uuid) +{ + char err_str[PATH_MAX] = "Please check log file for details."; + char op_err[PATH_MAX] = ""; + int len = -1; + char *peer_str = NULL; + + if (op_ret) { + args->op_ret = op_ret; + args->op_errno = op_errno; + + if (peerinfo) + peer_str = peerinfo->hostname; + else + peer_str = uuid_utoa (uuid); + + if (op_errstr && strcmp (op_errstr, "")) { + len = snprintf (err_str, sizeof(err_str) - 1, + "Error: %s", op_errstr); + err_str[len] = '\0'; + } + + switch (op_code){ + case GLUSTERD_MGMT_CLUSTER_UNLOCK : + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Unlocking failed on %s. %s", + peer_str, err_str); + break; + } + case GLUSTERD_MGMT_STAGE_OP : + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Staging failed on %s. %s", + peer_str, err_str); + break; + } + case GLUSTERD_MGMT_COMMIT_OP : + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Commit failed on %s. %s", + peer_str, err_str); + break; + } + } + op_err[len] = '\0'; + + if (args->errstr) { + len = snprintf (err_str, sizeof(err_str) - 1, + "%s\n%s", args->errstr, + op_err); + GF_FREE (args->errstr); + args->errstr = NULL; + } else + len = snprintf (err_str, sizeof(err_str) - 1, + "%s", op_err); + err_str[len] = '\0'; + + gf_log ("", GF_LOG_ERROR, "%s", op_err); + args->errstr = gf_strdup (err_str); + } + + return; +} + +void gd_syncargs_init (struct syncargs *args, dict_t *op_ctx) { args->dict = op_ctx; @@ -70,9 +202,9 @@ gd_brick_op_req_free (gd1_mgmt_brick_op_req *req) } int -gd_syncop_submit_request (struct rpc_clnt *rpc, void *req, - void *cookie, rpc_clnt_prog_t *prog, - int procnum, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc) +gd_syncop_submit_request (struct rpc_clnt *rpc, void *req, void *local, + void *cookie, rpc_clnt_prog_t *prog, int procnum, + fop_cbk_fn_t cbkfn, xdrproc_t xdrproc) { int ret = -1; struct iobuf *iobuf = NULL; @@ -112,7 +244,8 @@ gd_syncop_submit_request (struct rpc_clnt *rpc, void *req, iov.iov_len = ret; count = 1; - frame->local = cookie; + frame->local = local; + frame->cookie = cookie; /* Send the msg */ ret = rpc_clnt_submit (rpc, prog, procnum, cbkfn, @@ -131,8 +264,9 @@ out: /* Defined in glusterd-rpc-ops.c */ extern struct rpc_clnt_program gd_mgmt_prog; extern struct rpc_clnt_program gd_brick_prog; +extern struct rpc_clnt_program gd_mgmt_v3_prog; -static int +int glusterd_syncop_aggr_rsp_dict (glusterd_op_t op, dict_t *aggr, dict_t *rsp) { int ret = 0; @@ -156,6 +290,9 @@ glusterd_syncop_aggr_rsp_dict (glusterd_op_t op, dict_t *aggr, dict_t *rsp) goto out; break; + case GD_OP_GSYNC_CREATE: + break; + case GD_OP_GSYNC_SET: ret = glusterd_gsync_use_rsp_dict (aggr, rsp, NULL); if (ret) @@ -190,6 +327,18 @@ glusterd_syncop_aggr_rsp_dict (glusterd_op_t op, dict_t *aggr, dict_t *rsp) break; + case GD_OP_SYS_EXEC: + ret = glusterd_sys_exec_output_rsp_dict (aggr, rsp); + if (ret) + goto out; + break; + + case GD_OP_SNAP: + ret = glusterd_snap_use_rsp_dict (aggr, rsp); + if (ret) + goto out; + break; + default: break; } @@ -198,20 +347,25 @@ out: } int32_t -gd_syncop_mgmt_lock_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) +_gd_syncop_mgmt_lock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) { int ret = -1; struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; gd1_mgmt_cluster_lock_rsp rsp = {{0},}; call_frame_t *frame = NULL; + int op_ret = -1; + int op_errno = -1; frame = myframe; args = frame->local; + peerinfo = frame->cookie; frame->local = NULL; + frame->cookie = NULL; if (-1 == req->rpc_status) { - args->op_errno = ENOTCONN; + op_errno = ENOTCONN; goto out; } @@ -220,48 +374,244 @@ gd_syncop_mgmt_lock_cbk (struct rpc_req *req, struct iovec *iov, if (ret < 0) goto out; - gd_collate_errors (args, rsp.op_ret, rsp.op_errno, NULL); uuid_copy (args->uuid, rsp.uuid); + /* Set peer as locked, so we unlock only the locked peers */ + if (rsp.op_ret == 0) + peerinfo->locked = _gf_true; + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; out: + gd_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_CLUSTER_LOCK, peerinfo, rsp.uuid); STACK_DESTROY (frame->root); synctask_barrier_wake(args); return 0; } +int32_t +gd_syncop_mgmt_lock_cbk (struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + _gd_syncop_mgmt_lock_cbk); +} int -gd_syncop_mgmt_lock (struct rpc_clnt *rpc, struct syncargs *args, +gd_syncop_mgmt_lock (glusterd_peerinfo_t *peerinfo, struct syncargs *args, uuid_t my_uuid, uuid_t recv_uuid) { int ret = -1; gd1_mgmt_cluster_lock_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; uuid_copy (req.uuid, my_uuid); - ret = gd_syncop_submit_request (rpc, &req, args, &gd_mgmt_prog, + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_prog, GLUSTERD_MGMT_CLUSTER_LOCK, gd_syncop_mgmt_lock_cbk, (xdrproc_t) xdr_gd1_mgmt_cluster_lock_req); + synclock_lock (&conf->big_lock); + return ret; +} + +int32_t +gd_syncop_mgmt_v3_lock_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_v3_lock_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int op_ret = -1; + int op_errno = -1; + + GF_ASSERT(req); + GF_ASSERT(iov); + GF_ASSERT(myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp); + if (ret < 0) + goto out; + + uuid_copy (args->uuid, rsp.uuid); + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; +out: + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_LOCK, + peerinfo, rsp.uuid); + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_syncop_mgmt_v3_lock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + gd_syncop_mgmt_v3_lock_cbk_fn); +} + +int +gd_syncop_mgmt_v3_lock (glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid, uuid_t txn_id) +{ + int ret = -1; + gd1_mgmt_v3_lock_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + + GF_ASSERT(op_ctx); + GF_ASSERT(peerinfo); + GF_ASSERT(args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); if (ret) - synctask_barrier_wake(args); + goto out; + + uuid_copy (req.uuid, my_uuid); + uuid_copy (req.txn_id, txn_id); + req.op = op; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_LOCK, + gd_syncop_mgmt_v3_lock_cbk, + (xdrproc_t) xdr_gd1_mgmt_v3_lock_req); + synclock_lock (&conf->big_lock); +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } int32_t -gd_syncop_mgmt_unlock_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) +gd_syncop_mgmt_v3_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_v3_unlock_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int op_ret = -1; + int op_errno = -1; + + GF_ASSERT(req); + GF_ASSERT(iov); + GF_ASSERT(myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp); + if (ret < 0) + goto out; + + uuid_copy (args->uuid, rsp.uuid); + + /* Set peer as locked, so we unlock only the locked peers */ + if (rsp.op_ret == 0) + peerinfo->locked = _gf_true; + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; +out: + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_UNLOCK, + peerinfo, rsp.uuid); + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_syncop_mgmt_v3_unlock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + gd_syncop_mgmt_v3_unlock_cbk_fn); +} + +int +gd_syncop_mgmt_v3_unlock (dict_t *op_ctx, glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid, uuid_t txn_id) +{ + int ret = -1; + gd1_mgmt_v3_unlock_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + + GF_ASSERT(op_ctx); + GF_ASSERT(peerinfo); + GF_ASSERT(args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + uuid_copy (req.txn_id, txn_id); + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_UNLOCK, + gd_syncop_mgmt_v3_unlock_cbk, + (xdrproc_t) xdr_gd1_mgmt_v3_unlock_req); + synclock_lock (&conf->big_lock); +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +_gd_syncop_mgmt_unlock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) { int ret = -1; struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; gd1_mgmt_cluster_unlock_rsp rsp = {{0},}; call_frame_t *frame = NULL; + int op_ret = -1; + int op_errno = -1; frame = myframe; args = frame->local; + peerinfo = frame->cookie; frame->local = NULL; if (-1 == req->rpc_status) { - args->op_errno = ENOTCONN; + op_errno = ENOTCONN; goto out; } @@ -270,36 +620,50 @@ gd_syncop_mgmt_unlock_cbk (struct rpc_req *req, struct iovec *iov, if (ret < 0) goto out; - gd_collate_errors (args, rsp.op_ret, rsp.op_errno, NULL); uuid_copy (args->uuid, rsp.uuid); + peerinfo->locked = _gf_false; + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; out: + gd_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_CLUSTER_UNLOCK, peerinfo, rsp.uuid); STACK_DESTROY (frame->root); synctask_barrier_wake(args); return 0; } +int32_t +gd_syncop_mgmt_unlock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + _gd_syncop_mgmt_unlock_cbk); +} + int -gd_syncop_mgmt_unlock (struct rpc_clnt *rpc, struct syncargs *args, +gd_syncop_mgmt_unlock (glusterd_peerinfo_t *peerinfo, struct syncargs *args, uuid_t my_uuid, uuid_t recv_uuid) { int ret = -1; gd1_mgmt_cluster_unlock_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; uuid_copy (req.uuid, my_uuid); - ret = gd_syncop_submit_request (rpc, &req, args, &gd_mgmt_prog, + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_prog, GLUSTERD_MGMT_CLUSTER_UNLOCK, gd_syncop_mgmt_unlock_cbk, (xdrproc_t) xdr_gd1_mgmt_cluster_lock_req); - if (ret) - synctask_barrier_wake(args); + synclock_lock (&conf->big_lock); return ret; } int32_t -gd_syncop_stage_op_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) +_gd_syncop_stage_op_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) { int ret = -1; gd1_mgmt_stage_op_rsp rsp = {{0},}; @@ -307,6 +671,9 @@ gd_syncop_stage_op_cbk (struct rpc_req *req, struct iovec *iov, xlator_t *this = NULL; dict_t *rsp_dict = NULL; call_frame_t *frame = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + int op_ret = -1; + int op_errno = -1; this = THIS; frame = myframe; @@ -314,8 +681,7 @@ gd_syncop_stage_op_cbk (struct rpc_req *req, struct iovec *iov, frame->local = NULL; if (-1 == req->rpc_status) { - args->op_ret = -1; - args->op_errno = ENOTCONN; + op_errno = ENOTCONN; goto out; } @@ -339,7 +705,15 @@ gd_syncop_stage_op_cbk (struct rpc_req *req, struct iovec *iov, } } - gd_collate_errors (args, rsp.op_ret, rsp.op_errno, rsp.op_errstr); + ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, "Staging response " + "for 'Volume %s' received from unknown " + "peer: %s", gd_op_list[rsp.op], + uuid_utoa (rsp.uuid)); + goto out; + } + uuid_copy (args->uuid, rsp.uuid); if (rsp.op == GD_OP_REPLACE_BRICK) { pthread_mutex_lock (&args->lock_dict); @@ -354,7 +728,13 @@ gd_syncop_stage_op_cbk (struct rpc_req *req, struct iovec *iov, pthread_mutex_unlock (&args->lock_dict); } + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + out: + gd_collate_errors (args, op_ret, op_errno, rsp.op_errstr, + GLUSTERD_MGMT_STAGE_OP, peerinfo, rsp.uuid); + if (rsp_dict) dict_unref (rsp_dict); @@ -363,6 +743,14 @@ out: return 0; } +int32_t +gd_syncop_stage_op_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + _gd_syncop_stage_op_cbk); +} + int gd_syncop_mgmt_stage_op (struct rpc_clnt *rpc, struct syncargs *args, @@ -370,7 +758,8 @@ gd_syncop_mgmt_stage_op (struct rpc_clnt *rpc, struct syncargs *args, dict_t *dict_out, dict_t *op_ctx) { gd1_mgmt_stage_op_req *req = NULL; - int ret = -1; + glusterd_conf_t *conf = THIS->private; + int ret = -1; req = GF_CALLOC (1, sizeof (*req), gf_gld_mt_mop_stage_req_t); if (!req) @@ -384,21 +773,20 @@ gd_syncop_mgmt_stage_op (struct rpc_clnt *rpc, struct syncargs *args, if (ret) goto out; - ret = gd_syncop_submit_request (rpc, req, args, &gd_mgmt_prog, + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (rpc, req, args, NULL, &gd_mgmt_prog, GLUSTERD_MGMT_STAGE_OP, gd_syncop_stage_op_cbk, (xdrproc_t) xdr_gd1_mgmt_stage_op_req); + synclock_lock (&conf->big_lock); out: gd_stage_op_req_free (req); - if (ret) - synctask_barrier_wake(args); - return ret; } int32_t -gd_syncop_brick_op_cbk (struct rpc_req *req, struct iovec *iov, +_gd_syncop_brick_op_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { struct syncargs *args = NULL; @@ -444,7 +832,7 @@ gd_syncop_brick_op_cbk (struct rpc_req *req, struct iovec *iov, args->errstr = gf_strdup (rsp.op_errstr); out: - if (strcmp (rsp.op_errstr, "") != 0) + if ((rsp.op_errstr) && (strcmp (rsp.op_errstr, "") != 0)) free (rsp.op_errstr); free (rsp.output.output_val); @@ -454,6 +842,14 @@ out: return 0; } +int32_t +gd_syncop_brick_op_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + _gd_syncop_brick_op_cbk); +} + int gd_syncop_mgmt_brick_op (struct rpc_clnt *rpc, glusterd_pending_node_t *pnode, int op, dict_t *dict_out, dict_t *op_ctx, @@ -483,14 +879,15 @@ gd_syncop_mgmt_brick_op (struct rpc_clnt *rpc, glusterd_pending_node_t *pnode, if (ret) goto out; - GD_SYNCOP (rpc, (&args), gd_syncop_brick_op_cbk, - req, &gd_brick_prog, req->op, - xdr_gd1_mgmt_brick_op_req); + GD_SYNCOP (rpc, (&args), NULL, gd_syncop_brick_op_cbk, req, + &gd_brick_prog, req->op, xdr_gd1_mgmt_brick_op_req); - if (args.errstr && errstr) - *errstr = args.errstr; - else - GF_FREE (args.errstr); + if (args.errstr) { + if ((strlen(args.errstr) > 0) && errstr) + *errstr = args.errstr; + else + GF_FREE (args.errstr); + } if (GD_OP_STATUS_VOLUME == op) { ret = dict_set_int32 (args.dict, "index", pnode->index); @@ -517,15 +914,18 @@ out: } int32_t -gd_syncop_commit_op_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) +_gd_syncop_commit_op_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) { - int ret = -1; - gd1_mgmt_commit_op_rsp rsp = {{0},}; - struct syncargs *args = NULL; - xlator_t *this = NULL; - dict_t *rsp_dict = NULL; - call_frame_t *frame = NULL; + int ret = -1; + gd1_mgmt_commit_op_rsp rsp = {{0},}; + struct syncargs *args = NULL; + xlator_t *this = NULL; + dict_t *rsp_dict = NULL; + call_frame_t *frame = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + int op_ret = -1; + int op_errno = -1; this = THIS; frame = myframe; @@ -533,7 +933,7 @@ gd_syncop_commit_op_cbk (struct rpc_req *req, struct iovec *iov, frame->local = NULL; if (-1 == req->rpc_status) { - args->op_errno = ENOTCONN; + op_errno = ENOTCONN; goto out; } @@ -558,7 +958,15 @@ gd_syncop_commit_op_cbk (struct rpc_req *req, struct iovec *iov, } } - gd_collate_errors (args, rsp.op_ret, rsp.op_errno, rsp.op_errstr); + ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, "Commit response " + "for 'Volume %s' received from unknown " + "peer: %s", gd_op_list[rsp.op], + uuid_utoa (rsp.uuid)); + goto out; + } + uuid_copy (args->uuid, rsp.uuid); pthread_mutex_lock (&args->lock_dict); { @@ -570,7 +978,13 @@ gd_syncop_commit_op_cbk (struct rpc_req *req, struct iovec *iov, " node/brick"); } pthread_mutex_unlock (&args->lock_dict); + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + out: + gd_collate_errors (args, op_ret, op_errno, rsp.op_errstr, + GLUSTERD_MGMT_COMMIT_OP, peerinfo, rsp.uuid); if (rsp_dict) dict_unref (rsp_dict); @@ -580,12 +994,21 @@ out: return 0; } +int32_t +gd_syncop_commit_op_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + _gd_syncop_commit_op_cbk); +} + int gd_syncop_mgmt_commit_op (struct rpc_clnt *rpc, struct syncargs *args, uuid_t my_uuid, uuid_t recv_uuid, int op, dict_t *dict_out, dict_t *op_ctx) { + glusterd_conf_t *conf = THIS->private; gd1_mgmt_commit_op_req *req = NULL; int ret = -1; @@ -601,21 +1024,21 @@ gd_syncop_mgmt_commit_op (struct rpc_clnt *rpc, struct syncargs *args, if (ret) goto out; - ret = gd_syncop_submit_request (rpc, req, args, &gd_mgmt_prog, + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (rpc, req, args, NULL, &gd_mgmt_prog, GLUSTERD_MGMT_COMMIT_OP , gd_syncop_commit_op_cbk, (xdrproc_t) xdr_gd1_mgmt_commit_op_req); + synclock_lock (&conf->big_lock); out: gd_commit_op_req_free (req); - if (ret) - synctask_barrier_wake(args); - return ret; } int -gd_build_peers_list (struct list_head *peers, struct list_head *xact_peers) +gd_build_peers_list (struct list_head *peers, struct list_head *xact_peers, + glusterd_op_t op) { glusterd_peerinfo_t *peerinfo = NULL; int npeers = 0; @@ -623,7 +1046,8 @@ gd_build_peers_list (struct list_head *peers, struct list_head *xact_peers) list_for_each_entry (peerinfo, peers, uuid_list) { if (!peerinfo->connected) continue; - if (peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) + if (op != GD_OP_SYNC_VOLUME && + peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) continue; list_add_tail (&peerinfo->op_peers_list, xact_peers); @@ -633,8 +1057,8 @@ gd_build_peers_list (struct list_head *peers, struct list_head *xact_peers) } int -gd_lock_op_phase (struct list_head *peers, glusterd_op_t op, dict_t *op_ctx, - char **op_errstr, int npeers) +gd_lock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, dict_t *op_ctx, + char **op_errstr, int npeers, uuid_t txn_id) { int ret = -1; int peer_cnt = 0; @@ -642,6 +1066,9 @@ gd_lock_op_phase (struct list_head *peers, glusterd_op_t op, dict_t *op_ctx, xlator_t *this = NULL; glusterd_peerinfo_t *peerinfo = NULL; struct syncargs args = {0}; + struct list_head *peers = NULL; + + peers = &conf->xaction_peers; if (!npeers) { ret = 0; @@ -652,20 +1079,38 @@ gd_lock_op_phase (struct list_head *peers, glusterd_op_t op, dict_t *op_ctx, synctask_barrier_init((&args)); peer_cnt = 0; list_for_each_entry (peerinfo, peers, op_peers_list) { - gd_syncop_mgmt_lock (peerinfo->rpc, &args, MY_UUID, peer_uuid); + if (conf->op_version < 3) { + /* Reset lock status */ + peerinfo->locked = _gf_false; + gd_syncop_mgmt_lock (peerinfo, &args, + MY_UUID, peer_uuid); + } else + gd_syncop_mgmt_v3_lock (op, op_ctx, peerinfo, &args, + MY_UUID, peer_uuid, txn_id); peer_cnt++; } - synctask_barrier_wait((&args), peer_cnt); - ret = args.op_ret; - if (ret) { - gf_asprintf (op_errstr, "Another transaction could be " - "in progress. Please try again after " - "sometime."); - gf_log (this->name, GF_LOG_ERROR, "Failed to acquire lock"); - goto out; + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + else { + ret = gf_asprintf (op_errstr, "Another transaction could be " + "in progress. Please try again after " + "sometime."); + if (ret == -1) + *op_errstr = NULL; + + gf_log (this->name, GF_LOG_ERROR, + "Failed to acquire lock"); + + } } - ret = 0; + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent lock op req for 'Volume %s' " + "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret); out: return ret; } @@ -730,11 +1175,17 @@ stage_done: op, req_dict, op_ctx); peer_cnt++; } - synctask_barrier_wait((&args), peer_cnt); - ret = args.op_ret; - if (dict_get_str (op_ctx, "errstr", &errstr) == 0) + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + else if (dict_get_str (op_ctx, "errstr", &errstr) == 0) *op_errstr = gf_strdup (errstr); + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent stage op req for 'Volume %s' " + "to %d peers", gd_op_list[op], peer_cnt); out: if (rsp_dict) dict_unref (rsp_dict); @@ -767,12 +1218,14 @@ gd_commit_op_phase (struct list_head *peers, glusterd_op_t op, dict_t *op_ctx, hostname = "localhost"; goto commit_done; } - ret = glusterd_syncop_aggr_rsp_dict (op, op_ctx, rsp_dict); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "%s", - "Failed to aggregate response " - "from node/brick"); - goto out; + if (op != GD_OP_SYNC_VOLUME) { + ret = glusterd_syncop_aggr_rsp_dict (op, op_ctx, rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "%s", + "Failed to aggregate response " + "from node/brick"); + goto out; + } } dict_unref (rsp_dict); rsp_dict = NULL; @@ -786,14 +1239,13 @@ commit_done: gf_asprintf (op_errstr, OPERRSTR_COMMIT_FAIL, hostname); goto out; - } else { - glusterd_op_modify_op_ctx (op, op_ctx); - } + } if (!npeers) { ret = 0; goto out; } + gd_syncargs_init (&args, op_ctx); synctask_barrier_init((&args)); peer_cnt = 0; @@ -803,21 +1255,33 @@ commit_done: op, req_dict, op_ctx); peer_cnt++; } - synctask_barrier_wait((&args), peer_cnt); + gd_synctask_barrier_wait((&args), peer_cnt); ret = args.op_ret; - if (dict_get_str (op_ctx, "errstr", &errstr) == 0) + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + else if (dict_get_str (op_ctx, "errstr", &errstr) == 0) *op_errstr = gf_strdup (errstr); + gf_log (this->name, GF_LOG_DEBUG, "Sent commit op req for 'Volume %s' " + "to %d peers", gd_op_list[op], peer_cnt); out: + if (!ret) + glusterd_op_modify_op_ctx (op, op_ctx); + if (rsp_dict) dict_unref (rsp_dict); + + GF_FREE (args.errstr); + args.errstr = NULL; + return ret; } int -gd_unlock_op_phase (struct list_head *peers, glusterd_op_t op, int op_ret, +gd_unlock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, int op_ret, rpcsvc_request_t *req, dict_t *op_ctx, char *op_errstr, - int npeers) + int npeers, char *volname, gf_boolean_t is_acquired, + uuid_t txn_id) { glusterd_peerinfo_t *peerinfo = NULL; glusterd_peerinfo_t *tmp = NULL; @@ -826,21 +1290,37 @@ gd_unlock_op_phase (struct list_head *peers, glusterd_op_t op, int op_ret, int ret = -1; xlator_t *this = NULL; struct syncargs args = {0}; + struct list_head *peers = NULL; + + peers = &conf->xaction_peers; if (!npeers) { ret = 0; goto out; } + /* If the lock has not been held during this + * transaction, do not send unlock requests */ + if (!is_acquired) + goto out; + this = THIS; synctask_barrier_init((&args)); peer_cnt = 0; list_for_each_entry_safe (peerinfo, tmp, peers, op_peers_list) { - gd_syncop_mgmt_unlock (peerinfo->rpc, &args, MY_UUID, tmp_uuid); - list_del_init (&peerinfo->op_peers_list); + if (conf->op_version < 3) { + /* Only unlock peers that were locked */ + if (peerinfo->locked) + gd_syncop_mgmt_unlock (peerinfo, &args, + MY_UUID, tmp_uuid); + } else + gd_syncop_mgmt_v3_unlock (op_ctx, peerinfo, + &args, MY_UUID, + tmp_uuid, txn_id); peer_cnt++; + list_del_init (&peerinfo->op_peers_list); } - synctask_barrier_wait((&args), peer_cnt); + gd_synctask_barrier_wait((&args), peer_cnt); ret = args.op_ret; if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to unlock " @@ -850,7 +1330,20 @@ gd_unlock_op_phase (struct list_head *peers, glusterd_op_t op, int op_ret, out: glusterd_op_send_cli_response (op, op_ret, 0, req, op_ctx, op_errstr); glusterd_op_clear_op (op); - glusterd_unlock (MY_UUID); + if (is_acquired) { + /* Based on the op-version, we release * + * the cluster or mgmt_v3 lock */ + if (conf->op_version < 3) + glusterd_unlock (MY_UUID); + else { + ret = glusterd_mgmt_v3_unlock (volname, MY_UUID, + "vol"); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to release lock for %s", + volname); + } + } return 0; } @@ -867,7 +1360,8 @@ gd_get_brick_count (struct list_head *bricks) } int -gd_brick_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, char **op_errstr) +gd_brick_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + char **op_errstr) { glusterd_pending_node_t *pending_node = NULL; struct list_head selected = {0,}; @@ -876,8 +1370,10 @@ gd_brick_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, char **op int ret = -1; rpc_clnt_t *rpc = NULL; dict_t *rsp_dict = NULL; + glusterd_conf_t *conf = NULL; this = THIS; + conf = this->private; rsp_dict = dict_new (); if (!rsp_dict) { ret = -1; @@ -893,9 +1389,11 @@ gd_brick_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, char **op goto out; } - ret = glusterd_syncop_aggr_rsp_dict (op, op_ctx, rsp_dict); - if (ret) - goto out; + if (op == GD_OP_HEAL_VOLUME) { + ret = glusterd_syncop_aggr_rsp_dict (op, op_ctx, rsp_dict); + if (ret) + goto out; + } dict_unref (rsp_dict); rsp_dict = NULL; @@ -935,20 +1433,63 @@ out: void gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req) { - int ret = -1; - int npeers = 0; - dict_t *req_dict = NULL; - glusterd_conf_t *conf = NULL; - glusterd_op_t op = 0; - int32_t tmp_op = 0; - char *op_errstr = NULL; - xlator_t *this = NULL; + int ret = -1; + int npeers = 0; + dict_t *req_dict = NULL; + glusterd_conf_t *conf = NULL; + glusterd_op_t op = 0; + int32_t tmp_op = 0; + char *op_errstr = NULL; + char *tmp = NULL; + char *volname = NULL; + xlator_t *this = NULL; + gf_boolean_t is_acquired = _gf_false; + uuid_t *txn_id = NULL; + uuid_t *originator_uuid = NULL; + glusterd_op_info_t txn_opinfo; this = THIS; GF_ASSERT (this); conf = this->private; GF_ASSERT (conf); + /* Generate a transaction-id for this operation and + * save it in the dict */ + txn_id = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t); + if (!txn_id) { + ret = -1; + goto out; + } + + uuid_generate (*txn_id); + + ret = dict_set_bin (op_ctx, "transaction_id", + txn_id, sizeof(*txn_id)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set transaction id."); + goto out; + } else + gf_log (this->name, GF_LOG_DEBUG, + "Transaction_id = %s", uuid_utoa (*txn_id)); + + /* Save the MY_UUID as the originator_uuid */ + originator_uuid = GF_CALLOC (1, sizeof(uuid_t), + gf_common_mt_uuid_t); + if (!originator_uuid) { + ret = -1; + goto out; + } + + uuid_copy (*originator_uuid, MY_UUID); + ret = dict_set_bin (op_ctx, "originator_uuid", + originator_uuid, sizeof (uuid_t)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set originator_uuid."); + goto out; + } + ret = dict_get_int32 (op_ctx, GD_SYNC_OPCODE_KEY, &tmp_op); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to get volume " @@ -957,24 +1498,77 @@ gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req) } op = tmp_op; - ret = glusterd_lock (MY_UUID); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Unable to acquire lock"); - gf_asprintf (&op_errstr, "Another transaction is in progress. " - "Please try again after sometime."); - goto out; + + /* Based on the op_version, acquire a cluster or mgmt_v3 lock */ + if (conf->op_version < 3) { + ret = glusterd_lock (MY_UUID); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire lock"); + gf_asprintf (&op_errstr, + "Another transaction is in progress. " + "Please try again after sometime."); + goto out; + } + } else { + + /* If no volname is given as a part of the command, locks will + * not be held */ + ret = dict_get_str (op_ctx, "volname", &tmp); + if (ret) { + gf_log ("", GF_LOG_DEBUG, "Failed to get volume " + "name"); + goto local_locking_done; + } else { + /* Use a copy of volname, as cli response will be + * sent before the unlock, and the volname in the + * dict, might be removed */ + volname = gf_strdup (tmp); + if (!volname) + goto out; + } + + ret = glusterd_mgmt_v3_lock (volname, MY_UUID, "vol"); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire lock for %s", volname); + gf_asprintf (&op_errstr, + "Another transaction is in progress " + "for %s. Please try again after sometime.", + volname); + goto out; + } } - /* storing op globally to access in synctask code paths - * This is still acceptable, as we are performing this under - * the 'cluster' lock*/ - glusterd_op_set_op (op); - INIT_LIST_HEAD (&conf->xaction_peers); - npeers = gd_build_peers_list (&conf->peers, &conf->xaction_peers); + is_acquired = _gf_true; + +local_locking_done: - ret = gd_lock_op_phase (&conf->xaction_peers, op, op_ctx, &op_errstr, npeers); + /* Save opinfo for this transaction with the transaction id */ + glusterd_txn_opinfo_init (&txn_opinfo, NULL, &op, NULL, NULL); + ret = glusterd_set_txn_opinfo (txn_id, &txn_opinfo); if (ret) - goto out; + gf_log (this->name, GF_LOG_ERROR, + "Unable to set transaction's opinfo"); + + opinfo = txn_opinfo; + + INIT_LIST_HEAD (&conf->xaction_peers); + + /* Make 'volume status tasks' command a local operation. + * This is accomplished by setting npeers to 0. + */ + if (!glusterd_is_status_tasks_op (op, op_ctx)) + npeers = gd_build_peers_list (&conf->peers, + &conf->xaction_peers, op); + + /* If no volname is given as a part of the command, locks will + * not be held */ + if (volname) { + ret = gd_lock_op_phase (conf, op, op_ctx, &op_errstr, npeers, *txn_id); + if (ret) + goto out; + } ret = glusterd_op_build_payload (&req_dict, &op_errstr, op_ctx); if (ret) { @@ -1001,14 +1595,25 @@ gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req) ret = 0; out: - (void) gd_unlock_op_phase (&conf->xaction_peers, op, ret, req, - op_ctx, op_errstr, npeers); + (void) gd_unlock_op_phase (conf, op, ret, req, op_ctx, op_errstr, + npeers, volname, is_acquired, *txn_id); + + /* Clearing the transaction opinfo */ + ret = glusterd_clear_txn_opinfo (txn_id); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to clear transaction's opinfo"); + + if (volname) + GF_FREE (volname); if (req_dict) dict_unref (req_dict); - if (op_errstr) + if (op_errstr) { GF_FREE (op_errstr); + op_errstr = NULL; + } return; } diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.h b/xlators/mgmt/glusterd/src/glusterd-syncop.h index 658ed4e2a..e83ea2f4c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-syncop.h +++ b/xlators/mgmt/glusterd/src/glusterd-syncop.h @@ -11,33 +11,40 @@ #define __RPC_SYNCOP_H #include "syncop.h" +#include "glusterd-sm.h" +#include "glusterd.h" #define GD_SYNC_OPCODE_KEY "sync-mgmt-operation" /* gd_syncop_* */ -#define GD_SYNCOP(rpc, stb, cbk, req, prog, procnum, xdrproc) do { \ - int ret = 0; \ - struct synctask *task = NULL; \ - task = synctask_get (); \ - stb->task = task; \ - \ - ret = gd_syncop_submit_request (rpc, req, stb, \ - prog, procnum, cbk, \ - (xdrproc_t)xdrproc); \ - if (!ret) \ - synctask_yield (stb->task); \ +#define GD_SYNCOP(rpc, stb, cookie, cbk, req, prog, procnum, xdrproc) do { \ + int ret = 0; \ + struct synctask *task = NULL; \ + glusterd_conf_t *conf= THIS->private; \ + \ + task = synctask_get (); \ + stb->task = task; \ + \ + /*This is to ensure that the brick_op_cbk is able to \ + * take the big lock*/ \ + synclock_unlock (&conf->big_lock); \ + ret = gd_syncop_submit_request (rpc, req, stb, cookie, \ + prog, procnum, cbk, \ + (xdrproc_t)xdrproc); \ + if (!ret) \ + synctask_yield (stb->task); \ + synclock_lock (&conf->big_lock); \ } while (0) -int gd_syncop_submit_request (struct rpc_clnt *rpc, void *req, - void *cookie, rpc_clnt_prog_t *prog, - int procnum, fop_cbk_fn_t cbkfn, - xdrproc_t xdrproc); +int gd_syncop_submit_request (struct rpc_clnt *rpc, void *req, void *local, + void *cookie, rpc_clnt_prog_t *prog, int procnum, + fop_cbk_fn_t cbkfn, xdrproc_t xdrproc); -int gd_syncop_mgmt_lock (struct rpc_clnt *rpc, struct syncargs *arg, +int gd_syncop_mgmt_lock (glusterd_peerinfo_t *peerinfo, struct syncargs *arg, uuid_t my_uuid, uuid_t recv_uuid); -int gd_syncop_mgmt_unlock (struct rpc_clnt *rpc, struct syncargs *arg, +int gd_syncop_mgmt_unlock (glusterd_peerinfo_t *peerinfo, struct syncargs *arg, uuid_t my_uuid, uuid_t recv_uuid); int gd_syncop_mgmt_stage_op (struct rpc_clnt *rpc, struct syncargs *arg, uuid_t my_uuid, uuid_t recv_uuid, int op, @@ -45,4 +52,20 @@ int gd_syncop_mgmt_stage_op (struct rpc_clnt *rpc, struct syncargs *arg, int gd_syncop_mgmt_commit_op (struct rpc_clnt *rpc, struct syncargs *arg, uuid_t my_uuid, uuid_t recv_uuid, int op, dict_t *dict_out, dict_t *op_ctx); + +void +gd_synctask_barrier_wait (struct syncargs *args, int count); + +int +gd_build_peers_list (struct list_head *peers, struct list_head *xact_peers, + glusterd_op_t op); +int +gd_brick_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + char **op_errstr); + +int +glusterd_syncop_aggr_rsp_dict (glusterd_op_t op, dict_t *aggr, dict_t *rsp); + +void +gd_syncargs_init (struct syncargs *args, dict_t *op_ctx); #endif /* __RPC_SYNCOP_H */ diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index e151b2f71..e8ae05851 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -11,7 +11,6 @@ #define _CONFIG_H #include "config.h" #endif -#include <openssl/md5.h> #include <inttypes.h> #include "globals.h" @@ -23,6 +22,7 @@ #include "timer.h" #include "defaults.h" #include "compat.h" +#include "syncop.h" #include "run.h" #include "compat-errno.h" #include "statedump.h" @@ -35,6 +35,8 @@ #include "glusterd-store.h" #include "glusterd-volgen.h" #include "glusterd-pmap.h" +#include "glusterfs-acl.h" +#include "glusterd-locks.h" #include "xdr-generic.h" #include <sys/resource.h> @@ -49,6 +51,10 @@ #include <fnmatch.h> #include <sys/statvfs.h> #include <ifaddrs.h> +#ifdef HAVE_BD_XLATOR +#include <lvm2app.h> +#endif + #ifdef GF_LINUX_HOST_OS #include <mntent.h> @@ -73,17 +79,19 @@ static glusterd_lock_t lock; -static void -md5_wrapper(const unsigned char *data, size_t len, char *md5) +char* +gd_peer_uuid_str (glusterd_peerinfo_t *peerinfo) { - unsigned short i = 0; - unsigned short lim = MD5_DIGEST_LENGTH*2+1; - unsigned char scratch[MD5_DIGEST_LENGTH] = {0,}; - MD5(data, len, scratch); - for (; i < MD5_DIGEST_LENGTH; i++) - snprintf(md5 + i * 2, lim-i*2, "%02x", scratch[i]); + if ((peerinfo == NULL) || uuid_is_null (peerinfo->uuid)) + return NULL; + + if (peerinfo->uuid_str[0] == '\0') + uuid_utoa_r (peerinfo->uuid, peerinfo->uuid_str); + + return peerinfo->uuid_str; } + int32_t glusterd_get_lock_owner (uuid_t *uuid) { @@ -121,189 +129,6 @@ glusterd_is_fuse_available () return _gf_false; } -gf_boolean_t -glusterd_is_loopback_localhost (const struct sockaddr *sa, char *hostname) -{ - GF_ASSERT (sa); - - gf_boolean_t is_local = _gf_false; - const struct in_addr *addr4 = NULL; - const struct in6_addr *addr6 = NULL; - uint8_t *ap = NULL; - struct in6_addr loopbackaddr6 = IN6ADDR_LOOPBACK_INIT; - - switch (sa->sa_family) { - case AF_INET: - addr4 = &(((struct sockaddr_in *)sa)->sin_addr); - ap = (uint8_t*)&addr4->s_addr; - if (ap[0] == 127) - is_local = _gf_true; - break; - - case AF_INET6: - addr6 = &(((struct sockaddr_in6 *)sa)->sin6_addr); - if (memcmp (addr6, &loopbackaddr6, - sizeof (loopbackaddr6)) == 0) - is_local = _gf_true; - break; - - default: - if (hostname) - gf_log ("glusterd", GF_LOG_ERROR, - "unknown address family %d for %s", - sa->sa_family, hostname); - break; - } - - return is_local; -} - -char * -get_ip_from_addrinfo (struct addrinfo *addr, char **ip) -{ - char buf[64]; - void *in_addr = NULL; - struct sockaddr_in *s4 = NULL; - struct sockaddr_in6 *s6 = NULL; - - switch (addr->ai_family) - { - case AF_INET: - s4 = (struct sockaddr_in *)addr->ai_addr; - in_addr = &s4->sin_addr; - break; - - case AF_INET6: - s6 = (struct sockaddr_in6 *)addr->ai_addr; - in_addr = &s6->sin6_addr; - break; - - default: - gf_log ("glusterd", GF_LOG_ERROR, "Invalid family"); - return NULL; - } - - if (!inet_ntop(addr->ai_family, in_addr, buf, sizeof(buf))) { - gf_log ("glusterd", GF_LOG_ERROR, "String conversion failed"); - return NULL; - } - - *ip = strdup (buf); - return *ip; -} - -gf_boolean_t -glusterd_interface_search (char *ip) -{ - int32_t ret = -1; - gf_boolean_t found = _gf_false; - struct ifaddrs *ifaddr, *ifa; - int family; - char host[NI_MAXHOST]; - xlator_t *this = NULL; - char *pct = NULL; - - this = THIS; - - ret = getifaddrs (&ifaddr); - - if (ret != 0) { - gf_log (this->name, GF_LOG_ERROR, "getifaddrs() failed: %s\n", - gai_strerror(ret)); - goto out; - } - - for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) { - if (!ifa->ifa_addr) { - /* - * This seemingly happens if an interface hasn't - * been bound to a particular protocol (seen with - * TUN devices). - */ - continue; - } - family = ifa->ifa_addr->sa_family; - - if (family != AF_INET && family != AF_INET6) - continue; - - ret = getnameinfo (ifa->ifa_addr, - (family == AF_INET) ? sizeof(struct sockaddr_in) : - sizeof(struct sockaddr_in6), - host, NI_MAXHOST, NULL, 0, NI_NUMERICHOST); - - if (ret != 0) { - gf_log (this->name, GF_LOG_ERROR, - "getnameinfo() failed: %s\n", - gai_strerror(ret)); - goto out; - } - - /* - * Sometimes the address comes back as addr%eth0 or - * similar. Since % is an invalid character, we can - * strip it out with confidence that doing so won't - * harm anything. - */ - pct = index(host,'%'); - if (pct) { - *pct = '\0'; - } - - if (strncmp (ip, host, NI_MAXHOST) == 0) { - gf_log (this->name, GF_LOG_DEBUG, - "%s is local address at interface %s", - ip, ifa->ifa_name); - found = _gf_true; - goto out; - } - } -out: - if(ifaddr) - freeifaddrs (ifaddr); - return found; -} - - -gf_boolean_t -glusterd_is_local_addr (char *hostname) -{ - int32_t ret = -1; - struct addrinfo *result = NULL; - struct addrinfo *res = NULL; - gf_boolean_t found = _gf_false; - char *ip = NULL; - xlator_t *this = NULL; - - this = THIS; - ret = getaddrinfo (hostname, NULL, NULL, &result); - - if (ret != 0) { - gf_log (this->name, GF_LOG_ERROR, "error in getaddrinfo: %s\n", - gai_strerror(ret)); - goto out; - } - - for (res = result; res != NULL; res = res->ai_next) { - gf_log (this->name, GF_LOG_DEBUG, "%s ", - get_ip_from_addrinfo (res, &ip)); - - found = glusterd_is_loopback_localhost (res->ai_addr, hostname) - || glusterd_interface_search (ip); - if (found) - goto out; - } - -out: - if (result) - freeaddrinfo (result); - - if (!found) - gf_log (this->name, GF_LOG_DEBUG, "%s is not local", hostname); - - return found; -} - int32_t glusterd_lock (uuid_t uuid) { @@ -611,8 +436,11 @@ glusterd_volinfo_new (glusterd_volinfo_t **volinfo) if (!new_volinfo) goto out; + LOCK_INIT (&new_volinfo->lock); INIT_LIST_HEAD (&new_volinfo->vol_list); + INIT_LIST_HEAD (&new_volinfo->snapvol_list); INIT_LIST_HEAD (&new_volinfo->bricks); + INIT_LIST_HEAD (&new_volinfo->snap_volumes); new_volinfo->dict = dict_new (); if (!new_volinfo->dict) { @@ -628,6 +456,10 @@ glusterd_volinfo_new (glusterd_volinfo_t **volinfo) goto out; } + snprintf (new_volinfo->parent_volname, GLUSTERD_MAX_VOLUME_NAME, "N/A"); + + new_volinfo->snap_max_hard_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + new_volinfo->xl = THIS; *volinfo = new_volinfo; @@ -639,6 +471,219 @@ out: return ret; } +/* This function will create a new volinfo and then + * dup the entries from volinfo to the new_volinfo. + * + * @param volinfo volinfo which will be duplicated + * @param dup_volinfo new volinfo which will be created + * @param set_userauth if this true then auth info is also set + * + * @return 0 on success else -1 + */ +int32_t +glusterd_volinfo_dup (glusterd_volinfo_t *volinfo, + glusterd_volinfo_t **dup_volinfo, + gf_boolean_t set_userauth) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *new_volinfo = NULL; + + this = THIS; + GF_ASSERT (this); + GF_VALIDATE_OR_GOTO (this->name, volinfo, out); + GF_VALIDATE_OR_GOTO (this->name, dup_volinfo, out); + + ret = glusterd_volinfo_new (&new_volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "not able to create the " + "duplicate volinfo for the volume %s", + volinfo->volname); + goto out; + } + + new_volinfo->type = volinfo->type; + new_volinfo->replica_count = volinfo->replica_count; + new_volinfo->stripe_count = volinfo->stripe_count; + new_volinfo->dist_leaf_count = volinfo->dist_leaf_count; + new_volinfo->sub_count = volinfo->sub_count; + new_volinfo->transport_type = volinfo->transport_type; + new_volinfo->nfs_transport_type = volinfo->nfs_transport_type; + new_volinfo->brick_count = volinfo->brick_count; + + dict_copy (volinfo->dict, new_volinfo->dict); + gd_update_volume_op_versions (new_volinfo); + + if (set_userauth) { + glusterd_auth_set_username (new_volinfo, + volinfo->auth.username); + glusterd_auth_set_password (new_volinfo, + volinfo->auth.password); + } + + *dup_volinfo = new_volinfo; + ret = 0; +out: + if (ret && (NULL != new_volinfo)) { + (void) glusterd_volinfo_delete (new_volinfo); + } + return ret; +} + +/* This function will duplicate brickinfo + * + * @param brickinfo Source brickinfo + * @param dup_brickinfo Destination brickinfo + * + * @return 0 on success else -1 + */ +int32_t +glusterd_brickinfo_dup (glusterd_brickinfo_t *brickinfo, + glusterd_brickinfo_t *dup_brickinfo) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + GF_VALIDATE_OR_GOTO (this->name, brickinfo, out); + GF_VALIDATE_OR_GOTO (this->name, dup_brickinfo, out); + + strcpy (dup_brickinfo->hostname, brickinfo->hostname); + strcpy (dup_brickinfo->path, brickinfo->path); + strcpy (dup_brickinfo->device_path, brickinfo->device_path); + ret = gf_canonicalize_path (dup_brickinfo->path); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to canonicalize " + "brick path"); + goto out; + } + uuid_copy (dup_brickinfo->uuid, brickinfo->uuid); + + dup_brickinfo->port = brickinfo->port; + dup_brickinfo->rdma_port = brickinfo->rdma_port; + if (NULL != brickinfo->logfile) { + dup_brickinfo->logfile = gf_strdup (brickinfo->logfile); + if (NULL == dup_brickinfo->logfile) { + ret = -1; + goto out; + } + } + dup_brickinfo->status = brickinfo->status; + dup_brickinfo->snap_status = brickinfo->snap_status; +out: + return ret; +} + +/* This function will copy snap volinfo to the new + * passed volinfo and regenerate backend store files + * for the restored snap. + * + * @param new_volinfo new volinfo + * @param snap_volinfo volinfo of snap volume + * + * @return 0 on success and -1 on failure + * + * TODO: Duplicate all members of volinfo, e.g. geo-rep sync slaves + */ +int32_t +glusterd_snap_volinfo_restore (dict_t *rsp_dict, + glusterd_volinfo_t *new_volinfo, + glusterd_volinfo_t *snap_volinfo) +{ + int32_t brick_count = -1; + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *new_brickinfo = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + + GF_VALIDATE_OR_GOTO (this->name, new_volinfo, out); + GF_VALIDATE_OR_GOTO (this->name, snap_volinfo, out); + + brick_count = 0; + list_for_each_entry (brickinfo, &snap_volinfo->bricks, brick_list) { + ret = glusterd_brickinfo_new (&new_brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to create " + "new brickinfo"); + goto out; + } + + /* Duplicate brickinfo */ + ret = glusterd_brickinfo_dup (brickinfo, new_brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to dup " + "brickinfo"); + goto out; + } + + /* If the brick is not of this peer, or snapshot is missed * + * for the brick do not replace the xattr for it */ + if ((!uuid_compare (brickinfo->uuid, MY_UUID)) && + (brickinfo->snap_status != -1)) { + /* We need to replace the volume id of all the bricks + * to the volume id of the origin volume. new_volinfo + * has the origin volume's volume id*/ + ret = sys_lsetxattr (new_brickinfo->path, + GF_XATTR_VOL_ID_KEY, + new_volinfo->volume_id, + sizeof (new_volinfo->volume_id), + XATTR_REPLACE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "set extended attribute %s on %s. " + "Reason: %s, snap: %s", + GF_XATTR_VOL_ID_KEY, + new_brickinfo->path, strerror (errno), + new_volinfo->volname); + goto out; + } + } + + /* If a snapshot is pending for this brick then + * restore should also be pending + */ + if (brickinfo->snap_status == -1) { + /* Adding missed delete to the dict */ + ret = glusterd_add_missed_snaps_to_dict + (rsp_dict, + snap_volinfo->volname, + brickinfo, + brick_count + 1, + GF_SNAP_OPTION_TYPE_RESTORE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snapshot info " + "for %s:%s in the rsp_dict", + brickinfo->hostname, + brickinfo->path); + goto out; + } + } + + list_add_tail (&new_brickinfo->brick_list, + &new_volinfo->bricks); + /* ownership of new_brickinfo is passed to new_volinfo */ + new_brickinfo = NULL; + brick_count++; + } + + /* Regenerate all volfiles */ + ret = glusterd_create_volfiles_and_notify_services (new_volinfo); + +out: + if (ret && (NULL != new_brickinfo)) { + (void) glusterd_brickinfo_delete (new_brickinfo); + } + + return ret; +} + void glusterd_auth_cleanup (glusterd_volinfo_t *volinfo) { @@ -731,6 +776,7 @@ glusterd_volinfo_delete (glusterd_volinfo_t *volinfo) GF_ASSERT (volinfo); list_del_init (&volinfo->vol_list); + list_del_init (&volinfo->snapvol_list); ret = glusterd_volume_brickinfos_delete (volinfo); if (ret) @@ -751,7 +797,6 @@ out: return ret; } - int32_t glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo) { @@ -803,6 +848,7 @@ glusterd_brickinfo_new_from_brick (char *brick, char *path = NULL; char *tmp_host = NULL; char *tmp_path = NULL; + char *vg = NULL; GF_ASSERT (brick); GF_ASSERT (brickinfo); @@ -821,6 +867,17 @@ glusterd_brickinfo_new_from_brick (char *brick, if (ret) goto out; +#ifdef HAVE_BD_XLATOR + vg = strchr (path, '?'); + /* ? is used as a delimiter for vg */ + if (vg) { + strncpy (new_brickinfo->vg, vg + 1, PATH_MAX - 1); + *vg = '\0'; + } + new_brickinfo->caps = CAPS_BD; +#else + vg = NULL; /* Avoid compiler warnings when BD not enabled */ +#endif ret = gf_canonicalize_path (path); if (ret) goto out; @@ -924,6 +981,169 @@ out: return available; } +#ifdef HAVE_BD_XLATOR +/* + * Sets the tag of the format "trusted.glusterfs.volume-id:<uuid>" in + * the brick VG. It is used to avoid using same VG for another brick. + * @volume-id - gfid, @brick - brick info, @msg - Error message returned + * to the caller + */ +int +glusterd_bd_set_vg_tag (unsigned char *volume_id, glusterd_brickinfo_t *brick, + char *msg, int msg_size) +{ + lvm_t handle = NULL; + vg_t vg = NULL; + char *uuid = NULL; + int ret = -1; + + gf_asprintf (&uuid, "%s:%s", GF_XATTR_VOL_ID_KEY, + uuid_utoa (volume_id)); + if (!uuid) { + snprintf (msg, sizeof(*msg), "Could not allocate memory " + "for tag"); + return -1; + } + + handle = lvm_init (NULL); + if (!handle) { + snprintf (msg, sizeof(*msg), "lvm_init failed"); + goto out; + } + + vg = lvm_vg_open (handle, brick->vg, "w", 0); + if (!vg) { + snprintf (msg, sizeof(*msg), "Could not open VG %s", + brick->vg); + goto out; + } + + if (lvm_vg_add_tag (vg, uuid) < 0) { + snprintf (msg, sizeof(*msg), "Could not set tag %s for " + "VG %s", uuid, brick->vg); + goto out; + } + lvm_vg_write (vg); + ret = 0; +out: + GF_FREE (uuid); + + if (vg) + lvm_vg_close (vg); + if (handle) + lvm_quit (handle); + + return ret; +} +#endif + +int +glusterd_validate_and_create_brickpath (glusterd_brickinfo_t *brickinfo, + uuid_t volume_id, char **op_errstr, + gf_boolean_t is_force) +{ + int ret = -1; + char parentdir[PATH_MAX] = {0,}; + struct stat parent_st = {0,}; + struct stat brick_st = {0,}; + struct stat root_st = {0,}; + char msg[2048] = {0,}; + gf_boolean_t is_created = _gf_false; + + ret = mkdir (brickinfo->path, 0777); + if (ret) { + if (errno != EEXIST) { + snprintf (msg, sizeof (msg), "Failed to create brick " + "directory for brick %s:%s. Reason : %s ", + brickinfo->hostname, brickinfo->path, + strerror (errno)); + goto out; + } + } else { + is_created = _gf_true; + } + + ret = lstat (brickinfo->path, &brick_st); + if (ret) { + snprintf (msg, sizeof (msg), "lstat failed on %s. Reason : %s", + brickinfo->path, strerror (errno)); + goto out; + } + + if ((!is_created) && (!S_ISDIR (brick_st.st_mode))) { + snprintf (msg, sizeof (msg), "The provided path %s which is " + "already present, is not a directory", + brickinfo->path); + ret = -1; + goto out; + } + + snprintf (parentdir, sizeof (parentdir), "%s/..", brickinfo->path); + + ret = lstat ("/", &root_st); + if (ret) { + snprintf (msg, sizeof (msg), "lstat failed on /. Reason : %s", + strerror (errno)); + goto out; + } + + ret = lstat (parentdir, &parent_st); + if (ret) { + snprintf (msg, sizeof (msg), "lstat failed on %s. Reason : %s", + parentdir, strerror (errno)); + goto out; + } + + if (!is_force) { + if (brick_st.st_dev != parent_st.st_dev) { + snprintf (msg, sizeof (msg), "The brick %s:%s is a " + "mount point. Please create a sub-directory " + "under the mount point and use that as the " + "brick directory. Or use 'force' at the end " + "of the command if you want to override this " + "behavior.", brickinfo->hostname, + brickinfo->path); + ret = -1; + goto out; + } + else if (parent_st.st_dev == root_st.st_dev) { + snprintf (msg, sizeof (msg), "The brick %s:%s is " + "is being created in the root partition. It " + "is recommended that you don't use the " + "system's root partition for storage backend." + " Or use 'force' at the end of the command if" + " you want to override this behavior.", + brickinfo->hostname, brickinfo->path); + ret = -1; + goto out; + } + } + +#ifdef HAVE_BD_XLATOR + if (brickinfo->vg[0]) { + ret = glusterd_bd_set_vg_tag (volume_id, brickinfo, msg, + sizeof(msg)); + if (ret) + goto out; + } +#endif + ret = glusterd_check_and_set_brick_xattr (brickinfo->hostname, + brickinfo->path, volume_id, + op_errstr, is_force); + if (ret) + goto out; + + ret = 0; + +out: + if (ret && is_created) + rmdir (brickinfo->path); + if (ret && !*op_errstr && msg[0] != '\0') + *op_errstr = gf_strdup (msg); + + return ret; +} + int32_t glusterd_volume_brickinfo_get (uuid_t uuid, char *hostname, char *path, glusterd_volinfo_t *volinfo, @@ -1015,12 +1235,15 @@ glusterd_friend_cleanup (glusterd_peerinfo_t *peerinfo) GF_ASSERT (peerinfo); glusterd_peerctx_t *peerctx = NULL; gf_boolean_t quorum_action = _gf_false; + glusterd_conf_t *priv = THIS->private; if (peerinfo->quorum_contrib != QUORUM_NONE) quorum_action = _gf_true; if (peerinfo->rpc) { /* cleanup the saved-frames before last unref */ + synclock_unlock (&priv->big_lock); rpc_clnt_connection_cleanup (&peerinfo->rpc->conn); + synclock_lock (&priv->big_lock); peerctx = peerinfo->rpc->mydata; peerinfo->rpc->mydata = NULL; @@ -1038,6 +1261,68 @@ glusterd_friend_cleanup (glusterd_peerinfo_t *peerinfo) return 0; } +int +glusterd_volinfo_find_by_volume_id (uuid_t volume_id, glusterd_volinfo_t **volinfo) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *voliter = NULL; + glusterd_conf_t *priv = NULL; + + if (!volume_id) + return -1; + + this = THIS; + priv = this->private; + + list_for_each_entry (voliter, &priv->volumes, vol_list) { + if (uuid_compare (volume_id, voliter->volume_id)) + continue; + *volinfo = voliter; + ret = 0; + gf_log (this->name, GF_LOG_DEBUG, "Volume %s found", + voliter->volname); + break; + } + return ret; +} + +int +glusterd_snap_volinfo_find_by_volume_id (uuid_t volume_id, + glusterd_volinfo_t **volinfo) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *voliter = NULL; + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (volinfo); + + if (uuid_is_null(volume_id)) { + gf_log (this->name, GF_LOG_WARNING, "Volume UUID is NULL"); + goto out; + } + + list_for_each_entry (snap, &priv->snapshots, snap_list) { + list_for_each_entry (voliter, &snap->volumes, vol_list) { + if (uuid_compare (volume_id, voliter->volume_id)) + continue; + *volinfo = voliter; + ret = 0; + goto out; + } + } + + gf_log (this->name, GF_LOG_WARNING, "Snap volume not found"); +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + int32_t glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo) { @@ -1047,7 +1332,6 @@ glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo) glusterd_conf_t *priv = NULL; GF_ASSERT (volname); - this = THIS; GF_ASSERT (this); @@ -1056,7 +1340,8 @@ glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo) list_for_each_entry (tmp_volinfo, &priv->volumes, vol_list) { if (!strcmp (tmp_volinfo->volname, volname)) { - gf_log (this->name, GF_LOG_DEBUG, "Volume %s found", volname); + gf_log (this->name, GF_LOG_DEBUG, "Volume %s found", + volname); ret = 0; *volinfo = tmp_volinfo; break; @@ -1068,99 +1353,102 @@ glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo) } int32_t -glusterd_service_stop (const char *service, char *pidfile, int sig, - gf_boolean_t force_kill) +glusterd_snap_volinfo_find (char *snap_volname, glusterd_snap_t *snap, + glusterd_volinfo_t **volinfo) { - int32_t ret = -1; - pid_t pid = -1; - FILE *file = NULL; - gf_boolean_t is_locked = _gf_false; - xlator_t *this = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_conf_t *priv = NULL; this = THIS; - GF_ASSERT (this); - file = fopen (pidfile, "r+"); + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (snap); + GF_ASSERT (snap_volname); - if (!file) { - gf_log (this->name, GF_LOG_ERROR, "Unable to open pidfile: %s", - pidfile); - if (errno == ENOENT) { - gf_log (this->name, GF_LOG_TRACE, "%s may not be " - "running", service); + list_for_each_entry (snap_vol, &snap->volumes, vol_list) { + if (!strcmp (snap_vol->volname, snap_volname)) { ret = 0; + *volinfo = snap_vol; goto out; } - ret = -1; - goto out; } - ret = lockf (fileno (file), F_TLOCK, 0); - if (!ret) { - is_locked = _gf_true; - ret = unlink (pidfile); - if (ret && (ENOENT != errno)) { - gf_log (this->name, GF_LOG_ERROR, "Unable to " - "unlink stale pidfile: %s", pidfile); - } else if (ret && (ENOENT == errno)){ + + gf_log (this->name, GF_LOG_WARNING, "Snap volume %s not found", + snap_volname); +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_snap_volinfo_find_from_parent_volname (char *origin_volname, + glusterd_snap_t *snap, + glusterd_volinfo_t **volinfo) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (snap); + GF_ASSERT (origin_volname); + + list_for_each_entry (snap_vol, &snap->volumes, vol_list) { + if (!strcmp (snap_vol->parent_volname, origin_volname)) { ret = 0; - gf_log (this->name, GF_LOG_DEBUG, "Brick already " - "stopped"); + *volinfo = snap_vol; + goto out; } - goto out; } + gf_log (this->name, GF_LOG_DEBUG, "Snap volume not found(snap: %s, " + "origin-volume: %s", snap->snapname, origin_volname); - ret = fscanf (file, "%d", &pid); - if (ret <= 0) { - gf_log (this->name, GF_LOG_ERROR, "Unable to read pidfile: %s", - pidfile); - ret = -1; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_service_stop (const char *service, char *pidfile, int sig, + gf_boolean_t force_kill) +{ + int32_t ret = -1; + pid_t pid = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + if (!glusterd_is_service_running (pidfile, &pid)) { + ret = 0; + gf_log (this->name, GF_LOG_INFO, "%s already stopped", service); goto out; } - fclose (file); - file = NULL; - gf_log (this->name, GF_LOG_DEBUG, "Stopping gluster %s running in pid: " "%d", service, pid); ret = kill (pid, sig); + if (!force_kill) + goto out; - if (force_kill) { - sleep (1); - file = fopen (pidfile, "r+"); - if (!file) { - ret = 0; - goto out; - } - ret = lockf (fileno (file), F_TLOCK, 0); - if (ret && ((EAGAIN == errno) || (EACCES == errno))) { - ret = kill (pid, SIGKILL); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Unable to " - "kill pid %d reason: %s", pid, - strerror(errno)); - goto out; - } - - } else if (0 == ret){ - is_locked = _gf_true; - } - ret = unlink (pidfile); - if (ret && (ENOENT != errno)) { + sleep (1); + if (glusterd_is_service_running (pidfile, NULL)) { + ret = kill (pid, SIGKILL); + if (ret) { gf_log (this->name, GF_LOG_ERROR, "Unable to " - "unlink pidfile: %s", pidfile); + "kill pid %d reason: %s", pid, + strerror(errno)); goto out; } } ret = 0; out: - if (is_locked && file) - if (lockf (fileno (file), F_ULOCK, 0) < 0) - gf_log (this->name, GF_LOG_WARNING, "Cannot unlock " - "pidfile: %s reason: %s", pidfile, - strerror(errno)); - if (file) - fclose (file); return ret; } @@ -1206,33 +1494,41 @@ glusterd_set_brick_socket_filepath (glusterd_volinfo_t *volinfo, */ int32_t glusterd_brick_connect (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) + glusterd_brickinfo_t *brickinfo, char *socketpath) { int ret = 0; - char socketpath[PATH_MAX] = {0}; + char volume_id_str[64]; + char *brickid = NULL; dict_t *options = NULL; struct rpc_clnt *rpc = NULL; + glusterd_conf_t *priv = THIS->private; GF_ASSERT (volinfo); GF_ASSERT (brickinfo); + GF_ASSERT (socketpath); if (brickinfo->rpc == NULL) { - glusterd_set_brick_socket_filepath (volinfo, brickinfo, - socketpath, - sizeof (socketpath)); - /* Setting frame-timeout to 10mins (600seconds). * Unix domain sockets ensures that the connection is reliable. * The default timeout of 30mins used for unreliable network * connections is too long for unix domain socket connections. */ - ret = rpc_clnt_transport_unix_options_build (&options, - socketpath, 600); + ret = rpc_transport_unix_options_build (&options, socketpath, + 600); if (ret) goto out; + + uuid_utoa_r (volinfo->volume_id, volume_id_str); + ret = gf_asprintf (&brickid, "%s:%s:%s", volume_id_str, + brickinfo->hostname, brickinfo->path); + if (ret < 0) + goto out; + + synclock_unlock (&priv->big_lock); ret = glusterd_rpc_create (&rpc, options, glusterd_brick_rpc_notify, - brickinfo); + brickid); + synclock_lock (&priv->big_lock); if (ret) goto out; brickinfo->rpc = rpc; @@ -1242,6 +1538,35 @@ out: return ret; } +/* Caller should ensure that brick process is not running*/ +static void +_reap_brick_process (char *pidfile, char *brickpath) +{ + unlink (pidfile); + /* Brick process is not running and pmap may have an entry for it.*/ + pmap_registry_remove (THIS, 0, brickpath, + GF_PMAP_PORT_BRICKSERVER, NULL); +} + +static int +_mk_rundir_p (glusterd_volinfo_t *volinfo) +{ + char voldir[PATH_MAX] = {0,}; + char rundir[PATH_MAX] = {0,}; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + int ret = -1; + + this = THIS; + priv = this->private; + GLUSTERD_GET_VOLUME_DIR (voldir, volinfo, priv); + snprintf (rundir, sizeof (rundir)-1, "%s/run", voldir); + ret = mkdir_p (rundir, 0777, _gf_true); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Failed to create rundir"); + return ret; +} + int32_t glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo, @@ -1250,22 +1575,17 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, int32_t ret = -1; xlator_t *this = NULL; glusterd_conf_t *priv = NULL; - char pidfile[PATH_MAX] = {0,}; + char pidfile[PATH_MAX+1] = {0,}; char volfile[PATH_MAX] = {0,}; - char path[PATH_MAX] = {0,}; runner_t runner = {0,}; - char rundir[PATH_MAX] = {0,}; char exp_path[PATH_MAX] = {0,}; char logfile[PATH_MAX] = {0,}; int port = 0; int rdma_port = 0; - FILE *file = NULL; - gf_boolean_t is_locked = _gf_false; char socketpath[PATH_MAX] = {0}; char glusterd_uuid[1024] = {0,}; -#ifdef DEBUG char valgrind_logfile[PATH_MAX] = {0}; -#endif + GF_ASSERT (volinfo); GF_ASSERT (brickinfo); @@ -1275,100 +1595,83 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, priv = this->private; GF_ASSERT (priv); - GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); - snprintf (rundir, PATH_MAX, "%s/run", path); - ret = mkdir (rundir, 0777); - - if ((ret == -1) && (EEXIST != errno)) { - gf_log (this->name, GF_LOG_ERROR, "Unable to create rundir %s." - "Reason : %s", rundir, strerror (errno)); + if (brickinfo->snap_status == -1) { + gf_log (this->name, GF_LOG_INFO, + "Snapshot is pending on %s:%s. " + "Hence not starting the brick", + brickinfo->hostname, + brickinfo->path); + ret = 0; goto out; } + ret = _mk_rundir_p (volinfo); + if (ret) + goto out; + glusterd_set_brick_socket_filepath (volinfo, brickinfo, socketpath, sizeof (socketpath)); - GLUSTERD_GET_BRICK_PIDFILE (pidfile, path, brickinfo->hostname, - brickinfo->path); - file = fopen (pidfile, "r+"); - if (file) { - ret = lockf (fileno (file), F_TLOCK, 0); - if (ret && ((EAGAIN == errno) || (EACCES == errno))) { - ret = 0; - gf_log (this->name, GF_LOG_DEBUG, "brick %s:%s " - "already started", brickinfo->hostname, - brickinfo->path); - goto connect; - } - } - - ret = pmap_registry_search (this, brickinfo->path, - GF_PMAP_PORT_BRICKSERVER); - if (ret) { - ret = 0; - file = fopen (pidfile, "r+"); - if (file) { - ret = lockf (fileno (file), F_TLOCK, 0); - if (ret && ((EAGAIN == errno) || (EACCES == errno))) { - ret = 0; - gf_log (this->name, GF_LOG_DEBUG, "brick %s:%s " - "already started", brickinfo->hostname, - brickinfo->path); - goto connect; - } else if (0 == ret) { - is_locked = _gf_true; - } - } - /* This means, pmap has the entry, remove it */ - ret = pmap_registry_remove (this, 0, brickinfo->path, - GF_PMAP_PORT_BRICKSERVER, NULL); - } - unlink (pidfile); + GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv); + if (glusterd_is_service_running (pidfile, NULL)) + goto connect; - gf_log (this->name, GF_LOG_DEBUG, "About to start glusterfs" - " for brick %s:%s", brickinfo->hostname, - brickinfo->path); - GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, exp_path); - snprintf (volfile, PATH_MAX, "%s.%s.%s", volinfo->volname, - brickinfo->hostname, exp_path); - - if (!brickinfo->logfile && volinfo->logdir) { - snprintf (logfile, PATH_MAX, "%s/%s.log", volinfo->logdir, - exp_path); - brickinfo->logfile = gf_strdup (logfile); - } else if (!brickinfo->logfile) { - snprintf (logfile, PATH_MAX, "%s/bricks/%s.log", - DEFAULT_LOG_FILE_DIRECTORY, exp_path); - brickinfo->logfile = gf_strdup (logfile); - } + _reap_brick_process (pidfile, brickinfo->path); port = brickinfo->port; if (!port) port = pmap_registry_alloc (THIS); + /* Build the exp_path, before starting the glusterfsd even in + valgrind mode. Otherwise all the glusterfsd processes start + writing the valgrind log to the same file. + */ + GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, exp_path); runinit (&runner); -#ifdef DEBUG if (priv->valgrind) { + /* Run bricks with valgrind */ if (volinfo->logdir) { snprintf (valgrind_logfile, PATH_MAX, - "%s/valgrind-%s-%s.log", volinfo->logdir, + "%s/valgrind-%s-%s.log", + volinfo->logdir, volinfo->volname, exp_path); } else { - snprintf (valgrind_logfile, PATH_MAX, - "%s/bricks/valgrind-%s-%s.log", - DEFAULT_LOG_FILE_DIRECTORY, - volinfo->volname, exp_path); + snprintf (valgrind_logfile, PATH_MAX, + "%s/bricks/valgrind-%s-%s.log", + DEFAULT_LOG_FILE_DIRECTORY, + volinfo->volname, exp_path); } - /* Run bricks with valgrind */ + runner_add_args (&runner, "valgrind", "--leak-check=full", - "--trace-children=yes", NULL); + "--trace-children=yes", "--track-origins=yes", + NULL); runner_argprintf (&runner, "--log-file=%s", valgrind_logfile); - } -#endif + } + + if (volinfo->is_snap_volume) { + snprintf (volfile, PATH_MAX,"/%s/%s/%s.%s.%s", + GLUSTERD_VOL_SNAP_DIR_PREFIX, + volinfo->snapshot->snapname, volinfo->volname, + brickinfo->hostname, exp_path); + } else { + snprintf (volfile, PATH_MAX, "%s.%s.%s", volinfo->volname, + brickinfo->hostname, exp_path); + } + + if (volinfo->logdir) { + snprintf (logfile, PATH_MAX, "%s/%s.log", + volinfo->logdir, exp_path); + } else { + snprintf (logfile, PATH_MAX, "%s/bricks/%s.log", + DEFAULT_LOG_FILE_DIRECTORY, exp_path); + } + if (!brickinfo->logfile) + brickinfo->logfile = gf_strdup (logfile); + (void) snprintf (glusterd_uuid, 1024, "*-posix.glusterd-uuid=%s", uuid_utoa (MY_UUID)); - runner_add_args (&runner, SBIN_DIR"/glusterfsd", + runner_add_args (&runner, SBIN_DIR"/glusterfsd", "-s", brickinfo->hostname, "--volfile-id", volfile, "-p", pidfile, "-S", socketpath, "--brick-name", brickinfo->path, @@ -1376,7 +1679,7 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, "--xlator-option", glusterd_uuid, NULL); - runner_add_arg (&runner, "--brick-port"); + runner_add_arg (&runner, "--brick-port"); if (volinfo->transport_type != GF_TRANSPORT_BOTH_TCP_RDMA) { runner_argprintf (&runner, "%d", port); } else { @@ -1397,30 +1700,30 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, runner_add_arg (&runner, "--mem-accounting"); runner_log (&runner, "", GF_LOG_DEBUG, "Starting GlusterFS"); - if (wait) + if (wait) { + synclock_unlock (&priv->big_lock); ret = runner_run (&runner); - else + synclock_lock (&priv->big_lock); + + } else { ret = runner_run_nowait (&runner); + } if (ret) goto out; - //pmap_registry_bind (THIS, port, brickinfo->path); brickinfo->port = port; brickinfo->rdma_port = rdma_port; connect: - ret = glusterd_brick_connect (volinfo, brickinfo); - if (ret) + ret = glusterd_brick_connect (volinfo, brickinfo, socketpath); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to connect to brick %s:%s on %s", + brickinfo->hostname, brickinfo->path, socketpath); goto out; + } out: - if (is_locked && file) - if (lockf (fileno (file), F_ULOCK, 0) < 0) - gf_log (this->name, GF_LOG_WARNING, "Cannot unlock " - "pidfile: %s reason: %s", pidfile, - strerror(errno)); - if (file) - fclose (file); return ret; } @@ -1458,15 +1761,25 @@ glusterd_brick_unlink_socket_file (glusterd_volinfo_t *volinfo, int32_t glusterd_brick_disconnect (glusterd_brickinfo_t *brickinfo) { + rpc_clnt_t *rpc = NULL; + glusterd_conf_t *priv = THIS->private; + GF_ASSERT (brickinfo); - if (brickinfo->rpc) { - /* cleanup the saved-frames before last unref */ - rpc_clnt_connection_cleanup (&brickinfo->rpc->conn); + if (!brickinfo) { + gf_log_callingfn ("glusterd", GF_LOG_WARNING, "!brickinfo"); + return -1; + } - rpc_clnt_unref (brickinfo->rpc); - brickinfo->rpc = NULL; + rpc = brickinfo->rpc; + brickinfo->rpc = NULL; + + if (rpc) { + synclock_unlock (&priv->big_lock); + rpc_clnt_unref (rpc); + synclock_lock (&priv->big_lock); } + return 0; } @@ -1475,11 +1788,10 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo, gf_boolean_t del_brick) { - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; - char pidfile[PATH_MAX] = {0,}; - char path[PATH_MAX] = {0,}; - int ret = 0; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + char pidfile[PATH_MAX] = {0,}; + int ret = 0; GF_ASSERT (volinfo); GF_ASSERT (brickinfo); @@ -1493,11 +1805,7 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, if (GLUSTERD_STATUS_STARTED == volinfo->status) { (void) glusterd_brick_disconnect (brickinfo); - - GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); - GLUSTERD_GET_BRICK_PIDFILE (pidfile, path, brickinfo->hostname, - brickinfo->path); - + GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv); ret = glusterd_service_stop ("brick", pidfile, SIGTERM, _gf_false); if (ret == 0) { glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED); @@ -1682,13 +1990,14 @@ glusterd_volume_compute_cksum (glusterd_volinfo_t *volinfo) snprintf (filepath, sizeof (filepath), "%s/%s", path, GLUSTERD_VOLUME_INFO_FILE); - snprintf (sort_filepath, sizeof (sort_filepath), "/tmp/%s.XXXXXX", - volinfo->volname); + snprintf (sort_filepath, sizeof (sort_filepath), + "/tmp/%s.XXXXXX", volinfo->volname); sort_fd = mkstemp (sort_filepath); if (sort_fd < 0) { gf_log (this->name, GF_LOG_ERROR, "Could not generate temp " - "file, reason: %s for volume: %s", strerror (errno), + "file, reason: %s for %s: %s", strerror (errno), + (volinfo->is_snap_volume)?"snap":"volume", volinfo->volname); goto out; } else { @@ -1729,7 +2038,6 @@ glusterd_volume_compute_cksum (glusterd_volinfo_t *volinfo) goto out; volinfo->cksum = cksum; - out: if (fd > 0) close (fd); @@ -1808,7 +2116,10 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, glusterd_dict_ctx_t ctx = {0}; char *rebalance_id_str = NULL; char *rb_id_str = NULL; + xlator_t *this = NULL; + this = THIS; + GF_ASSERT (this); GF_ASSERT (dict); GF_ASSERT (volinfo); @@ -1823,6 +2134,15 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, if (ret) goto out; + snprintf (key, sizeof (key), "volume%d.is_volume_restored", count); + ret = dict_set_int32 (dict, key, volinfo->is_volume_restored); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to set " + "is_volume_restored option for %s volume", + volinfo->volname); + goto out; + } + memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.brick_count", count); ret = dict_set_int32 (dict, key, volinfo->brick_count); @@ -1877,6 +2197,20 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, if (ret) goto out; + snprintf (key, sizeof (key), "volume%d.is_snap_volume", count); + ret = dict_set_uint32 (dict, key, volinfo->is_snap_volume); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Unable to set %s", key); + goto out; + } + + snprintf (key, sizeof (key), "volume%d.snap-max-hard-limit", count); + ret = dict_set_uint64 (dict, key, volinfo->snap_max_hard_limit); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Unable to set %s", key); + goto out; + } + volume_id_str = gf_strdup (uuid_utoa (volinfo->volume_id)); if (!volume_id_str) { ret = -1; @@ -2023,9 +2357,42 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, if (ret) goto out; + snprintf (key, sizeof (key), "volume%d.brick%d.snap_status", + count, i); + ret = dict_set_int32 (dict, key, brickinfo->snap_status); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set snap_status for %s:%s", + brickinfo->hostname, + brickinfo->path); + goto out; + } + + snprintf (key, sizeof (key), "volume%d.brick%d.device_path", + count, i); + ret = dict_set_str (dict, key, brickinfo->device_path); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set snap_device for %s:%s", + brickinfo->hostname, + brickinfo->path); + goto out; + } + i++; } + /* Add volume op-versions to dict. This prevents volume inconsistencies + * in the cluster + */ + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.op-version", count); + ret = dict_set_int32 (dict, key, volinfo->op_version); + if (ret) + goto out; + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.client-op-version", count); + ret = dict_set_int32 (dict, key, volinfo->client_op_version); out: GF_FREE (volume_id_str); @@ -2087,7 +2454,8 @@ out: } int32_t -glusterd_compare_friend_volume (dict_t *vols, int32_t count, int32_t *status) +glusterd_compare_friend_volume (dict_t *vols, int32_t count, int32_t *status, + char *hostname) { int32_t ret = -1; @@ -2123,8 +2491,8 @@ glusterd_compare_friend_volume (dict_t *vols, int32_t count, int32_t *status) //Mismatch detected ret = 0; gf_log ("", GF_LOG_ERROR, "Version of volume %s differ." - "local version = %d, remote version = %d", - volinfo->volname, volinfo->version, version); + "local version = %d, remote version = %d on peer %s", + volinfo->volname, volinfo->version, version, hostname); *status = GLUSTERD_VOL_COMP_UPDATE_REQ; goto out; } else if (version < volinfo->version) { @@ -2143,8 +2511,8 @@ glusterd_compare_friend_volume (dict_t *vols, int32_t count, int32_t *status) if (cksum != volinfo->cksum) { ret = 0; gf_log ("", GF_LOG_ERROR, "Cksums of volume %s differ." - " local cksum = %d, remote cksum = %d", - volinfo->volname, volinfo->cksum, cksum); + " local cksum = %u, remote cksum = %u on peer %s", + volinfo->volname, volinfo->cksum, cksum, hostname); *status = GLUSTERD_VOL_COMP_RJT; goto out; } @@ -2391,12 +2759,27 @@ out: return in; } +int +glusterd_spawn_daemons (void *opaque) +{ + glusterd_conf_t *conf = THIS->private; + gf_boolean_t start_bricks = !conf->restart_done; + + if (start_bricks) { + glusterd_restart_bricks (conf); + conf->restart_done = _gf_true; + } + glusterd_restart_gsyncds (conf); + glusterd_restart_rebalance (conf); + return 0; +} + void glusterd_do_volume_quorum_action (xlator_t *this, glusterd_volinfo_t *volinfo, gf_boolean_t meets_quorum) { glusterd_brickinfo_t *brickinfo = NULL; - glusterd_conf_t *conf = NULL; + glusterd_conf_t *conf = NULL; conf = this->private; if (volinfo->status != GLUSTERD_STATUS_STARTED) @@ -2514,6 +2897,8 @@ glusterd_import_new_brick (dict_t *vols, int32_t vol_count, { char key[512] = {0,}; int ret = -1; + int32_t snap_status = 0; + char *snap_device = NULL; char *hostname = NULL; char *path = NULL; glusterd_brickinfo_t *new_brickinfo = NULL; @@ -2541,12 +2926,31 @@ glusterd_import_new_brick (dict_t *vols, int32_t vol_count, goto out; } + snprintf (key, sizeof (key), "volume%d.brick%d.snap_status", + vol_count, brick_count); + ret = dict_get_int32 (vols, key, &snap_status); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload", key); + goto out; + } + + snprintf (key, sizeof (key), "volume%d.brick%d.device_path", + vol_count, brick_count); + ret = dict_get_str (vols, key, &snap_device); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload", key); + goto out; + } + ret = glusterd_brickinfo_new (&new_brickinfo); if (ret) goto out; strcpy (new_brickinfo->path, path); strcpy (new_brickinfo->hostname, hostname); + strcpy (new_brickinfo->device_path, snap_device); + new_brickinfo->snap_status = snap_status; + //peerinfo might not be added yet (void) glusterd_resolve_brick (new_brickinfo); ret = 0; @@ -2600,6 +3004,9 @@ glusterd_import_volinfo (dict_t *vols, int count, int rb_status = 0; char *rebalance_id_str = NULL; char *rb_id_str = NULL; + int op_version = 0; + int client_op_version = 0; + uint32_t is_snap_volume = 0; GF_ASSERT (vols); GF_ASSERT (volinfo); @@ -2611,6 +3018,22 @@ glusterd_import_volinfo (dict_t *vols, int count, goto out; } + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.is_snap_volume", count); + ret = dict_get_uint32 (vols, key, &is_snap_volume); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload for %s", + key, volname); + goto out; + } + + if (is_snap_volume == _gf_true) { + gf_log (THIS->name, GF_LOG_DEBUG, + "Not syncing snap volume %s", volname); + ret = 0; + goto out; + } + ret = glusterd_volinfo_new (&new_volinfo); if (ret) goto out; @@ -2736,6 +3159,25 @@ glusterd_import_volinfo (dict_t *vols, int count, goto out; } + new_volinfo->is_snap_volume = is_snap_volume; + + snprintf (key, sizeof (key), "volume%d.is_volume_restored", count); + ret = dict_get_uint32 (vols, key, &new_volinfo->is_volume_restored); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to get " + "is_volume_restored option for %s", + volname); + goto out; + } + + snprintf (key, sizeof (key), "volume%d.snap-max-hard-limit", count); + ret = dict_get_uint64 (vols, key, &new_volinfo->snap_max_hard_limit); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload for %s", + key, volname); + goto out; + } + memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.rebalance", count); ret = dict_get_uint32 (vols, key, &new_volinfo->rebal.defrag_cmd); @@ -2826,6 +3268,40 @@ glusterd_import_volinfo (dict_t *vols, int count, ret = glusterd_import_friend_volume_opts (vols, count, new_volinfo); if (ret) goto out; + + /* Import the volume's op-versions if available else set it to 1. + * Not having op-versions implies this informtation was obtained from a + * op-version 1 friend (gluster-3.3), ergo the cluster is at op-version + * 1 and all volumes are at op-versions 1. + * + * Either both the volume op-versions should be absent or both should be + * present. Only one being present is a failure + */ + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.op-version", count); + ret = dict_get_int32 (vols, key, &op_version); + if (ret) + ret = 0; + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.client-op-version", count); + ret = dict_get_int32 (vols, key, &client_op_version); + if (ret) + ret = 0; + + if (op_version && client_op_version) { + new_volinfo->op_version = op_version; + new_volinfo->client_op_version = client_op_version; + } else if (((op_version == 0) && (client_op_version != 0)) || + ((op_version != 0) && (client_op_version == 0))) { + ret = -1; + gf_log ("glusterd", GF_LOG_ERROR, + "Only one volume op-version found"); + goto out; + } else { + new_volinfo->op_version = 1; + new_volinfo->client_op_version = 1; + } + ret = glusterd_import_bricks (vols, count, new_volinfo); if (ret) goto out; @@ -2957,7 +3433,7 @@ glusterd_delete_stale_volume (glusterd_volinfo_t *stale_volinfo, (void) glusterd_delete_all_bricks (stale_volinfo); if (stale_volinfo->shandle) { unlink (stale_volinfo->shandle->path); - (void) glusterd_store_handle_destroy (stale_volinfo->shandle); + (void) gf_store_handle_destroy (stale_volinfo->shandle); stale_volinfo->shandle = NULL; } (void) glusterd_volinfo_delete (stale_volinfo); @@ -2984,6 +3460,12 @@ glusterd_import_friend_volume (dict_t *vols, size_t count) if (ret) goto out; + if (!new_volinfo) { + gf_log (this->name, GF_LOG_DEBUG, + "Not importing snap volume"); + goto out; + } + ret = glusterd_volinfo_find (new_volinfo->volname, &old_volinfo); if (0 == ret) { (void) glusterd_delete_stale_volume (old_volinfo, new_volinfo); @@ -3119,7 +3601,7 @@ out: } int32_t -glusterd_compare_friend_data (dict_t *vols, int32_t *status) +glusterd_compare_friend_data (dict_t *vols, int32_t *status, char *hostname) { int32_t ret = -1; int32_t count = 0; @@ -3136,7 +3618,8 @@ glusterd_compare_friend_data (dict_t *vols, int32_t *status) goto out; while (i <= count) { - ret = glusterd_compare_friend_volume (vols, i, status); + ret = glusterd_compare_friend_volume (vols, i, status, + hostname); if (ret) goto out; @@ -3387,6 +3870,7 @@ glusterd_nodesvc_connect (char *server, char *socketpath) { int ret = 0; dict_t *options = NULL; struct rpc_clnt *rpc = NULL; + glusterd_conf_t *priv = THIS->private; rpc = glusterd_nodesvc_get_rpc (server); @@ -3396,13 +3880,15 @@ glusterd_nodesvc_connect (char *server, char *socketpath) { * The default timeout of 30mins used for unreliable network * connections is too long for unix domain socket connections. */ - ret = rpc_clnt_transport_unix_options_build (&options, - socketpath, 600); + ret = rpc_transport_unix_options_build (&options, socketpath, + 600); if (ret) goto out; + synclock_unlock (&priv->big_lock); ret = glusterd_rpc_create (&rpc, options, glusterd_nodesvc_rpc_notify, server); + synclock_lock (&priv->big_lock); if (ret) goto out; (void) glusterd_nodesvc_set_rpc (server, rpc); @@ -3417,12 +3903,10 @@ glusterd_nodesvc_disconnect (char *server) struct rpc_clnt *rpc = NULL; rpc = glusterd_nodesvc_get_rpc (server); + (void)glusterd_nodesvc_set_rpc (server, NULL); - if (rpc) { - rpc_clnt_connection_cleanup (&rpc->conn); + if (rpc) rpc_clnt_unref (rpc); - (void)glusterd_nodesvc_set_rpc (server, NULL); - } return 0; } @@ -3441,9 +3925,7 @@ glusterd_nodesvc_start (char *server) char sockfpath[PATH_MAX] = {0,}; char volfileid[256] = {0}; char glusterd_uuid_option[1024] = {0}; -#ifdef DEBUG char valgrind_logfile[PATH_MAX] = {0}; -#endif this = THIS; GF_ASSERT(this); @@ -3480,7 +3962,6 @@ glusterd_nodesvc_start (char *server) runinit (&runner); -#ifdef DEBUG if (priv->valgrind) { snprintf (valgrind_logfile, PATH_MAX, "%s/valgrind-%s.log", @@ -3488,10 +3969,10 @@ glusterd_nodesvc_start (char *server) server); runner_add_args (&runner, "valgrind", "--leak-check=full", - "--trace-children=yes", NULL); + "--trace-children=yes", "--track-origins=yes", + NULL); runner_argprintf (&runner, "--log-file=%s", valgrind_logfile); } -#endif runner_add_args (&runner, SBIN_DIR"/glusterfs", "-s", "localhost", @@ -3617,6 +4098,10 @@ glusterd_nfs_pmap_deregister () else gf_log ("", GF_LOG_ERROR, "De-registration of NLM v1 failed"); + if (pmap_unset (ACL_PROGRAM, ACLV3_VERSION)) + gf_log ("", GF_LOG_INFO, "De-registered ACL v3 successfully"); + else + gf_log ("", GF_LOG_ERROR, "De-registration of ACL v3 failed"); } int @@ -3797,6 +4282,11 @@ glusterd_reconfigure_nfs () int ret = -1; gf_boolean_t identical = _gf_false; + /* + * Check both OLD and NEW volfiles, if they are SAME by size + * and cksum i.e. "character-by-character". If YES, then + * NOTHING has been changed, just return. + */ ret = glusterd_check_nfs_volfile_identical (&identical); if (ret) goto out; @@ -3806,6 +4296,31 @@ glusterd_reconfigure_nfs () goto out; } + /* + * They are not identical. Find out if the topology is changed + * OR just the volume options. If just the options which got + * changed, then inform the xlator to reconfigure the options. + */ + identical = _gf_false; /* RESET the FLAG */ + ret = glusterd_check_nfs_topology_identical (&identical); + if (ret) + goto out; + + /* Topology is not changed, but just the options. But write the + * options to NFS volfile, so that NFS will be reconfigured. + */ + if (identical) { + ret = glusterd_create_nfs_volfile(); + if (ret == 0) {/* Only if above PASSES */ + ret = glusterd_fetchspec_notify (THIS); + } + goto out; + } + + /* + * NFS volfile's topology has been changed. NFS server needs + * to be RESTARTED to ACT on the changed volfile. + */ ret = glusterd_check_generate_start_nfs (); out: @@ -4037,22 +4552,39 @@ out: int glusterd_restart_bricks (glusterd_conf_t *conf) { + int ret = 0; glusterd_volinfo_t *volinfo = NULL; glusterd_brickinfo_t *brickinfo = NULL; + glusterd_snap_t *snap = NULL; gf_boolean_t start_nodesvcs = _gf_false; - int ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); list_for_each_entry (volinfo, &conf->volumes, vol_list) { if (volinfo->status != GLUSTERD_STATUS_STARTED) continue; start_nodesvcs = _gf_true; - if (glusterd_is_volume_in_server_quorum (volinfo)) { - //these bricks will be restarted once the quorum is met - continue; + gf_log (this->name, GF_LOG_DEBUG, "starting the volume %s", + volinfo->volname); + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + glusterd_brick_start (volinfo, brickinfo, _gf_false); } + } - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - glusterd_brick_start (volinfo, brickinfo, _gf_true); + list_for_each_entry (snap, &conf->snapshots, snap_list) { + list_for_each_entry (volinfo, &snap->volumes, vol_list) { + if (volinfo->status != GLUSTERD_STATUS_STARTED) + continue; + start_nodesvcs = _gf_true; + gf_log (this->name, GF_LOG_DEBUG, "starting the snap " + "volume %s", volinfo->volname); + list_for_each_entry (brickinfo, &volinfo->bricks, + brick_list) { + glusterd_brick_start (volinfo, brickinfo, + _gf_false); + } } } @@ -4065,10 +4597,13 @@ glusterd_restart_bricks (glusterd_conf_t *conf) int _local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data) { + char *path_list = NULL; char *slave = NULL; int uuid_len = 0; + int ret = 0; char uuid_str[64] = {0}; glusterd_volinfo_t *volinfo = NULL; + char *conf_path = NULL; volinfo = data; GF_ASSERT (volinfo); @@ -4080,9 +4615,24 @@ _local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data) uuid_len = (slave - value->data - 1); strncpy (uuid_str, (char*)value->data, uuid_len); - glusterd_start_gsync (volinfo, slave, uuid_str, NULL); - return 0; + ret = glusterd_get_local_brickpaths (volinfo, &path_list); + + ret = dict_get_str (this, "conf_path", &conf_path); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to fetch conf file path."); + goto out; + } + + glusterd_start_gsync (volinfo, slave, path_list, conf_path, + uuid_str, NULL); + + GF_FREE (path_list); + path_list = NULL; + +out: + return ret; } int @@ -4131,7 +4681,7 @@ glusterd_get_brickinfo (xlator_t *this, const char *brickname, int port, list_for_each_entry (volinfo, &priv->volumes, vol_list) { list_for_each_entry (tmpbrkinfo, &volinfo->bricks, brick_list) { - if (localhost && !glusterd_is_local_addr (tmpbrkinfo->hostname)) + if (localhost && !gf_is_local_addr (tmpbrkinfo->hostname)) continue; if (!strcmp(tmpbrkinfo->path, brickname) && (tmpbrkinfo->port == port)) { @@ -4203,26 +4753,27 @@ out: } #ifdef GF_LINUX_HOST_OS -static int +int glusterd_get_brick_root (char *path, char **mount_point) { char *ptr = NULL; + char *mnt_pt = NULL; struct stat brickstat = {0}; struct stat buf = {0}; if (!path) goto err; - *mount_point = gf_strdup (path); - if (!*mount_point) + mnt_pt = gf_strdup (path); + if (!mnt_pt) goto err; - if (stat (*mount_point, &brickstat)) + if (stat (mnt_pt, &brickstat)) goto err; - while ((ptr = strrchr (*mount_point, '/')) && - ptr != *mount_point) { + while ((ptr = strrchr (mnt_pt, '/')) && + ptr != mnt_pt) { *ptr = '\0'; - if (stat (*mount_point, &buf)) { + if (stat (mnt_pt, &buf)) { gf_log (THIS->name, GF_LOG_ERROR, "error in " "stat: %s", strerror (errno)); goto err; @@ -4234,20 +4785,21 @@ glusterd_get_brick_root (char *path, char **mount_point) } } - if (ptr == *mount_point) { + if (ptr == mnt_pt) { if (stat ("/", &buf)) { gf_log (THIS->name, GF_LOG_ERROR, "error in " "stat: %s", strerror (errno)); goto err; } if (brickstat.st_dev == buf.st_dev) - strcpy (*mount_point, "/"); + strcpy (mnt_pt, "/"); } + *mount_point = mnt_pt; return 0; err: - GF_FREE (*mount_point); + GF_FREE (mnt_pt); return -1; } @@ -4369,6 +4921,31 @@ glusterd_add_inode_size_to_dict (dict_t *dict, int count) return ret; } +struct mntent * +glusterd_get_mnt_entry_info (char *mnt_pt, FILE *mtab) +{ + struct mntent *entry = NULL; + + mtab = setmntent (_PATH_MOUNTED, "r"); + if (!mtab) + goto out; + + entry = getmntent (mtab); + + while (1) { + if (!entry) + goto out; + + if (!strcmp (entry->mnt_dir, mnt_pt) && + strcmp (entry->mnt_type, "rootfs")) + break; + entry = getmntent (mtab); + } + +out: + return entry; +} + static int glusterd_add_brick_mount_details (glusterd_brickinfo_t *brickinfo, dict_t *dict, int count) @@ -4380,8 +4957,8 @@ glusterd_add_brick_mount_details (glusterd_brickinfo_t *brickinfo, char *fs_name = NULL; char *mnt_options = NULL; char *device = NULL; - FILE *mtab = NULL; struct mntent *entry = NULL; + FILE *mtab = NULL; snprintf (base_key, sizeof (base_key), "brick%d", count); @@ -4389,25 +4966,12 @@ glusterd_add_brick_mount_details (glusterd_brickinfo_t *brickinfo, if (ret) goto out; - mtab = setmntent (_PATH_MOUNTED, "r"); - if (!mtab) { + entry = glusterd_get_mnt_entry_info (mnt_pt, mtab); + if (!entry) { ret = -1; goto out; } - entry = getmntent (mtab); - - while (1) { - if (!entry) { - ret = -1; - goto out; - } - if (!strcmp (entry->mnt_dir, mnt_pt) && - strcmp (entry->mnt_type, "rootfs")) - break; - entry = getmntent (mtab); - } - /* get device file */ memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "%s.device", base_key); @@ -4440,6 +5004,45 @@ glusterd_add_brick_mount_details (glusterd_brickinfo_t *brickinfo, return ret; } + +char* +glusterd_get_brick_mount_details (glusterd_brickinfo_t *brickinfo) +{ + int ret = -1; + char *mnt_pt = NULL; + char *device = NULL; + FILE *mtab = NULL; + struct mntent *entry = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (brickinfo); + + ret = glusterd_get_brick_root (brickinfo->path, &mnt_pt); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get mount point " + "for %s brick", brickinfo->path); + goto out; + } + + entry = glusterd_get_mnt_entry_info (mnt_pt, mtab); + if (NULL == entry) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get mnt entry " + "for %s mount path", mnt_pt); + goto out; + } + + /* get the fs_name/device */ + device = gf_strdup (entry->mnt_fsname); + +out: + if (NULL != mtab) { + endmntent (mtab); + } + + return device; +} #endif int @@ -4540,7 +5143,6 @@ glusterd_add_brick_to_dict (glusterd_volinfo_t *volinfo, char key[1024] = {0}; char base_key[1024] = {0}; char pidfile[PATH_MAX] = {0}; - char path[PATH_MAX] = {0}; xlator_t *this = NULL; glusterd_conf_t *priv = NULL; @@ -4573,9 +5175,7 @@ glusterd_add_brick_to_dict (glusterd_volinfo_t *volinfo, if (ret) goto out; - GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); - GLUSTERD_GET_BRICK_PIDFILE (pidfile, path, brickinfo->hostname, - brickinfo->path); + GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv); brick_online = glusterd_is_service_running (pidfile, &pid); @@ -4799,10 +5399,11 @@ glusterd_hostname_to_uuid (char *hostname, uuid_t uuid) ret = glusterd_friend_find_by_hostname (hostname, &peerinfo); if (ret) { - if (glusterd_is_local_addr (hostname)) { + if (gf_is_local_addr (hostname)) { uuid_copy (uuid, MY_UUID); ret = 0; } else { + ret = 0; goto out; } } else { @@ -5118,8 +5719,13 @@ out: } if (*in_use) { - snprintf (msg, sizeof (msg), "%s or a prefix of it is " - "already part of a volume", path); + if (!strcmp (path, curdir)) { + snprintf (msg, sizeof (msg), "%s is already part of a " + "volume", path); + } else { + snprintf (msg, sizeof (msg), "parent directory %s is " + "already part of a volume", curdir); + } } if (strlen (msg)) { @@ -5131,16 +5737,13 @@ out: } int -glusterd_brick_create_path (char *host, char *path, uuid_t uuid, - char **op_errstr) +glusterd_check_and_set_brick_xattr (char *host, char *path, uuid_t uuid, + char **op_errstr, gf_boolean_t is_force) { int ret = -1; char msg[2048] = {0,}; gf_boolean_t in_use = _gf_false; - - ret = mkdir_p (path, 0777, _gf_true); - if (ret) - goto out; + int flags = 0; /* Check for xattr support in backend fs */ ret = sys_lsetxattr (path, "trusted.glusterfs.test", @@ -5154,20 +5757,23 @@ glusterd_brick_create_path (char *host, char *path, uuid_t uuid, } else { sys_lremovexattr (path, "trusted.glusterfs.test"); - } ret = glusterd_is_path_in_use (path, &in_use, op_errstr); if (ret) goto out; - if (in_use) { + if (in_use && !is_force) { ret = -1; goto out; } + + if (!is_force) + flags = XATTR_CREATE; + ret = sys_lsetxattr (path, GF_XATTR_VOL_ID_KEY, uuid, 16, - XATTR_CREATE); + flags); if (ret) { snprintf (msg, sizeof (msg), "Failed to set extended " "attributes %s, reason: %s", @@ -5181,7 +5787,6 @@ out: *op_errstr = gf_strdup (msg); return ret; - } int @@ -5493,11 +6098,15 @@ glusterd_delete_brick (glusterd_volinfo_t* volinfo, glusterd_brickinfo_t *brickinfo) { int ret = 0; + char voldir[PATH_MAX] = {0,}; + glusterd_conf_t *priv = THIS->private; GF_ASSERT (volinfo); GF_ASSERT (brickinfo); + GLUSTERD_GET_VOLUME_DIR(voldir, volinfo, priv); + glusterd_delete_volfile (volinfo, brickinfo); - glusterd_store_delete_brick (volinfo, brickinfo); + glusterd_store_delete_brick (brickinfo, voldir); glusterd_brickinfo_delete (brickinfo); volinfo->brick_count--; return ret; @@ -5519,12 +6128,92 @@ glusterd_delete_all_bricks (glusterd_volinfo_t* volinfo) } int +glusterd_get_local_brickpaths (glusterd_volinfo_t *volinfo, char **pathlist) +{ + char **path_tokens = NULL; + char *tmp_path_list = NULL; + char path[PATH_MAX] = ""; + int32_t count = 0; + int32_t pathlen = 0; + int32_t total_len = 0; + int32_t ret = 0; + int i = 0; + glusterd_brickinfo_t *brickinfo = NULL; + + if ((!volinfo) || (!pathlist)) + goto out; + + path_tokens = GF_CALLOC (sizeof(char*), volinfo->brick_count, + gf_gld_mt_charptr); + if (!path_tokens) { + gf_log ("", GF_LOG_DEBUG, "Could not allocate memory."); + ret = -1; + goto out; + } + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + + pathlen = snprintf (path, sizeof(path), + "--path=%s ", brickinfo->path); + if (pathlen < sizeof(path)) + path[pathlen] = '\0'; + else + path[sizeof(path)-1] = '\0'; + path_tokens[count] = gf_strdup (path); + if (!path_tokens[count]) { + gf_log ("", GF_LOG_DEBUG, + "Could not allocate memory."); + ret = -1; + goto out; + } + count++; + total_len += pathlen; + } + + tmp_path_list = GF_CALLOC (sizeof(char), total_len + 1, + gf_gld_mt_char); + if (!tmp_path_list) { + gf_log ("", GF_LOG_DEBUG, "Could not allocate memory."); + ret = -1; + goto out; + } + + for (i = 0; i < count; i++) + strcat (tmp_path_list, path_tokens[i]); + + if (count) + *pathlist = tmp_path_list; + + ret = count; +out: + for (i = 0; i < count; i++) { + GF_FREE (path_tokens[i]); + path_tokens[i] = NULL; + } + + GF_FREE (path_tokens); + path_tokens = NULL; + + if (ret == 0) { + gf_log ("", GF_LOG_DEBUG, "No Local Bricks Present."); + GF_FREE (tmp_path_list); + tmp_path_list = NULL; + } + + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int glusterd_start_gsync (glusterd_volinfo_t *master_vol, char *slave, - char *glusterd_uuid_str, char **op_errstr) + char *path_list, char *conf_path, + char *glusterd_uuid_str, + char **op_errstr) { int32_t ret = 0; int32_t status = 0; - char buf[PATH_MAX] = {0,}; char uuid_str [64] = {0}; runner_t runner = {0,}; xlator_t *this = NULL; @@ -5537,47 +6226,45 @@ glusterd_start_gsync (glusterd_volinfo_t *master_vol, char *slave, GF_ASSERT (priv); uuid_utoa_r (MY_UUID, uuid_str); - if (strcmp (uuid_str, glusterd_uuid_str)) - goto out; - ret = gsync_status (master_vol->volname, slave, &status); - if (status == 0) - goto out; - - snprintf (buf, PATH_MAX, "%s/"GEOREP"/%s", priv->workdir, master_vol->volname); - ret = mkdir_p (buf, 0777, _gf_true); - if (ret) { - errcode = -1; + if (!path_list) { + ret = 0; + gf_log ("", GF_LOG_DEBUG, "No Bricks in this node." + " Not starting gsyncd."); goto out; } - snprintf (buf, PATH_MAX, DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"/%s", - master_vol->volname); - ret = mkdir_p (buf, 0777, _gf_true); - if (ret) { - errcode = -1; + ret = gsync_status (master_vol->volname, slave, conf_path, &status); + if (status == 0) goto out; - } uuid_utoa_r (master_vol->volume_id, uuid_str); runinit (&runner); - runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL); - runner_argprintf (&runner, "%s/"GSYNC_CONF, priv->workdir); + runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", + path_list, "-c", NULL); + runner_argprintf (&runner, "%s", conf_path); runner_argprintf (&runner, ":%s", master_vol->volname); runner_add_args (&runner, slave, "--config-set", "session-owner", uuid_str, NULL); + synclock_unlock (&priv->big_lock); ret = runner_run (&runner); + synclock_lock (&priv->big_lock); if (ret == -1) { errcode = -1; goto out; } runinit (&runner); - runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "--monitor", "-c", NULL); - runner_argprintf (&runner, "%s/"GSYNC_CONF, priv->workdir); + runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", + path_list, "--monitor", "-c", NULL); + runner_argprintf (&runner, "%s", conf_path); runner_argprintf (&runner, ":%s", master_vol->volname); + runner_argprintf (&runner, "--glusterd-uuid=%s", + uuid_utoa (priv->uuid)); runner_add_arg (&runner, slave); + synclock_unlock (&priv->big_lock); ret = runner_run (&runner); + synclock_lock (&priv->big_lock); if (ret == -1) { gf_asprintf (op_errstr, GEOREP" start failed for %s %s", master_vol->volname, slave); @@ -5589,7 +6276,7 @@ glusterd_start_gsync (glusterd_volinfo_t *master_vol, char *slave, out: if ((ret != 0) && errcode == -1) { if (op_errstr) - *op_errstr = gf_strdup ("internal error, cannot start" + *op_errstr = gf_strdup ("internal error, cannot start " "the " GEOREP " session"); } @@ -5598,17 +6285,38 @@ out: } int32_t -glusterd_recreate_bricks (glusterd_conf_t *conf) +glusterd_recreate_volfiles (glusterd_conf_t *conf) { glusterd_volinfo_t *volinfo = NULL; int ret = 0; + int op_ret = 0; GF_ASSERT (conf); list_for_each_entry (volinfo, &conf->volumes, vol_list) { ret = generate_brick_volfiles (volinfo); + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, "Failed to " + "regenerate brick volfiles for %s", + volinfo->volname); + op_ret = ret; + } + ret = generate_client_volfiles (volinfo, GF_CLIENT_TRUSTED); + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, "Failed to " + "regenerate trusted client volfiles for %s", + volinfo->volname); + op_ret = ret; + } + ret = generate_client_volfiles (volinfo, GF_CLIENT_OTHER); + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, "Failed to " + "regenerate client volfiles for %s", + volinfo->volname); + op_ret = ret; + } } - return ret; + return op_ret; } int32_t @@ -5618,7 +6326,7 @@ glusterd_handle_upgrade_downgrade (dict_t *options, glusterd_conf_t *conf) char *type = NULL; gf_boolean_t upgrade = _gf_false; gf_boolean_t downgrade = _gf_false; - gf_boolean_t regenerate_brick_volfiles = _gf_false; + gf_boolean_t regenerate_volfiles = _gf_false; gf_boolean_t terminate = _gf_false; ret = dict_get_str (options, "upgrade", &type); @@ -5631,7 +6339,7 @@ glusterd_handle_upgrade_downgrade (dict_t *options, glusterd_conf_t *conf) goto out; } if (_gf_true == upgrade) - regenerate_brick_volfiles = _gf_true; + regenerate_volfiles = _gf_true; } ret = dict_get_str (options, "downgrade", &type); @@ -5656,8 +6364,8 @@ glusterd_handle_upgrade_downgrade (dict_t *options, glusterd_conf_t *conf) ret = 0; else terminate = _gf_true; - if (regenerate_brick_volfiles) { - ret = glusterd_recreate_bricks (conf); + if (regenerate_volfiles) { + ret = glusterd_recreate_volfiles (conf); } out: if (terminate && (ret == 0)) @@ -5732,7 +6440,6 @@ glusterd_brick_statedump (glusterd_volinfo_t *volinfo, xlator_t *this = NULL; glusterd_conf_t *conf = NULL; char pidfile_path[PATH_MAX] = {0,}; - char path[PATH_MAX] = {0,}; char dumpoptions_path[PATH_MAX] = {0,}; FILE *pidfile = NULL; pid_t pid = -1; @@ -5757,9 +6464,7 @@ glusterd_brick_statedump (glusterd_volinfo_t *volinfo, goto out; } - GLUSTERD_GET_VOLUME_DIR (path, volinfo, conf); - GLUSTERD_GET_BRICK_PIDFILE (pidfile_path, path, brickinfo->hostname, - brickinfo->path); + GLUSTERD_GET_BRICK_PIDFILE (pidfile_path, volinfo, brickinfo, conf); pidfile = fopen (pidfile_path, "r"); if (!pidfile) { @@ -6043,7 +6748,6 @@ glusterd_restart_rebalance (glusterd_conf_t *conf) return ret; } - void glusterd_volinfo_reset_defrag_stats (glusterd_volinfo_t *volinfo) { @@ -6056,6 +6760,7 @@ glusterd_volinfo_reset_defrag_stats (glusterd_volinfo_t *volinfo) rebal->lookedup_files = 0; rebal->rebalance_failures = 0; rebal->rebalance_time = 0; + rebal->skipped_files = 0; } @@ -6152,6 +6857,7 @@ glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo, uint64_t lookup = 0; gf_defrag_status_t status = GF_DEFRAG_STATUS_NOT_STARTED; uint64_t failures = 0; + uint64_t skipped = 0; xlator_t *this = NULL; double run_time = 0; @@ -6182,6 +6888,11 @@ glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo, gf_log (this->name, GF_LOG_TRACE, "failed to get failure count"); + ret = dict_get_uint64 (rsp_dict, "skipped", &skipped); + if (ret) + gf_log (this->name, GF_LOG_TRACE, + "failed to get skipped count"); + ret = dict_get_double (rsp_dict, "run-time", &run_time); if (ret) gf_log (this->name, GF_LOG_TRACE, @@ -6197,6 +6908,8 @@ glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo, volinfo->rebal.defrag_status = status; if (failures) volinfo->rebal.rebalance_failures = failures; + if (skipped) + volinfo->rebal.skipped_files = skipped; if (run_time) volinfo->rebal.rebalance_time = run_time; @@ -6204,6 +6917,68 @@ glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo, } int +glusterd_check_topology_identical (const char *filename1, + const char *filename2, + gf_boolean_t *identical) +{ + int ret = -1; /* FAILURE */ + xlator_t *this = NULL; + FILE *fp1 = NULL; + FILE *fp2 = NULL; + glusterfs_graph_t *grph1 = NULL; + glusterfs_graph_t *grph2 = NULL; + + if ((!filename1) || (!filename2) || (!identical)) + goto out; + + this = THIS; + + errno = 0; /* RESET the errno */ + + /* fopen() the volfile1 to create the graph */ + fp1 = fopen (filename1, "r"); + if (fp1 == NULL) { + gf_log (this->name, GF_LOG_ERROR, "fopen() on file: %s failed " + "(%s)", filename1, strerror (errno)); + goto out; + } + + /* fopen() the volfile2 to create the graph */ + fp2 = fopen (filename2, "r"); + if (fp2 == NULL) { + gf_log (this->name, GF_LOG_ERROR, "fopen() on file: %s failed " + "(%s)", filename2, strerror (errno)); + goto out; + } + + /* create the graph for filename1 */ + grph1 = glusterfs_graph_construct(fp1); + if (grph1 == NULL) + goto out; + + /* create the graph for filename2 */ + grph2 = glusterfs_graph_construct(fp2); + if (grph2 == NULL) + goto out; + + /* compare the graph topology */ + *identical = is_graph_topology_equal(grph1, grph2); + ret = 0; /* SUCCESS */ +out: + if (fp1) + fclose(fp1); + if (fp2) + fclose(fp2); + if (grph1) + glusterfs_graph_destroy(grph1); + if (grph2) + glusterfs_graph_destroy(grph2); + + gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); + return ret; +} + +int glusterd_check_files_identical (char *filename1, char *filename2, gf_boolean_t *identical) { @@ -6463,6 +7238,7 @@ glusterd_gsync_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict, char *op_errstr) { dict_t *ctx = NULL; int ret = 0; + char *conf_path = NULL; if (aggr) { ctx = aggr; @@ -6484,6 +7260,17 @@ glusterd_gsync_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict, char *op_errstr) ret = glusterd_append_gsync_status (ctx, rsp_dict); if (ret) goto out; + + ret = dict_get_str (rsp_dict, "conf_path", &conf_path); + if (!ret && conf_path) { + ret = dict_set_dynstr (ctx, "conf_path", + gf_strdup(conf_path)); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to store conf path."); + goto out; + } + } } if ((op_errstr) && (strcmp ("", op_errstr))) { ret = dict_set_dynstr (ctx, "errstr", gf_strdup(op_errstr)); @@ -6704,7 +7491,7 @@ glusterd_volume_status_copy_to_op_ctx_dict (dict_t *aggr, dict_t *rsp_dict) if (ret) goto out; - if (cmd & GF_CLI_STATUS_ALL && is_origin_glusterd ()) { + if (cmd & GF_CLI_STATUS_ALL && is_origin_glusterd (ctx_dict)) { ret = dict_get_int32 (rsp_dict, "vol_count", &vol_count); if (ret == 0) { ret = dict_set_int32 (ctx_dict, "vol_count", @@ -6726,6 +7513,12 @@ glusterd_volume_status_copy_to_op_ctx_dict (dict_t *aggr, dict_t *rsp_dict) } } + if ((cmd & GF_CLI_STATUS_TASKS) != 0) { + dict_copy (rsp_dict, aggr); + ret = 0; + goto out; + } + ret = dict_get_int32 (rsp_dict, "count", &rsp_node_count); if (ret) { ret = 0; //no bricks in the rsp @@ -6795,6 +7588,7 @@ glusterd_volume_rebalance_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict) int32_t current_index = 2; int32_t value32 = 0; uint64_t value = 0; + char *peer_uuid_str = NULL; GF_ASSERT (rsp_dict); conf = THIS->private; @@ -6837,9 +7631,10 @@ glusterd_volume_rebalance_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict) /* Finding the index of the node-uuid in the peer-list */ list_for_each_entry (peerinfo, &conf->peers, uuid_list) { - if (!strcmp(peerinfo->uuid_str, node_uuid_str)){ + peer_uuid_str = gd_peer_uuid_str (peerinfo); + if (strcmp (peer_uuid_str, node_uuid_str) == 0) break; - } + current_index++; } @@ -6926,6 +7721,18 @@ glusterd_volume_rebalance_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict) } memset (key, 0, 256); + snprintf (key, 256, "skipped-%d", index); + ret = dict_get_uint64 (rsp_dict, key, &value); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "skipped-%d", current_index); + ret = dict_set_uint64 (ctx_dict, key, value); + if (ret) { + gf_log (THIS->name, GF_LOG_DEBUG, + "failed to set skipped count"); + } + } + memset (key, 0, 256); snprintf (key, 256, "run-time-%d", index); ret = dict_get_double (rsp_dict, key, &elapsed_time); if (!ret) { @@ -6945,6 +7752,334 @@ out: } int +glusterd_snap_config_use_rsp_dict (dict_t *dst, dict_t *src) +{ + char buf[PATH_MAX] = ""; + char *volname = NULL; + int ret = -1; + int config_command = 0; + uint64_t i = 0; + uint64_t value = 0; + uint64_t voldisplaycount = 0; + + if (!dst || !src) { + gf_log ("", GF_LOG_ERROR, "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32 (dst, "config-command", &config_command); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "failed to get config-command type"); + goto out; + } + + switch (config_command) { + case GF_SNAP_CONFIG_DISPLAY: + ret = dict_get_uint64 (src, "snap-max-hard-limit", &value); + if (!ret) { + ret = dict_set_uint64 (dst, "snap-max-hard-limit", value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set snap_max_hard_limit"); + goto out; + } + } else { + /* Received dummy response from other nodes */ + ret = 0; + goto out; + } + + ret = dict_get_uint64 (src, "snap-max-soft-limit", &value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get snap_max_soft_limit"); + goto out; + } + + ret = dict_set_uint64 (dst, "snap-max-soft-limit", value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set snap_max_soft_limit"); + goto out; + } + + ret = dict_get_uint64 (src, "voldisplaycount", + &voldisplaycount); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get voldisplaycount"); + goto out; + } + + ret = dict_set_uint64 (dst, "voldisplaycount", + voldisplaycount); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set voldisplaycount"); + goto out; + } + + for (i = 0; i < voldisplaycount; i++) { + snprintf (buf, sizeof(buf), "volume%ld-volname", i); + ret = dict_get_str (src, buf, &volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get %s", buf); + goto out; + } + ret = dict_set_str (dst, buf, volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-snap-max-hard-limit", i); + ret = dict_get_uint64 (src, buf, &value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get %s", buf); + goto out; + } + ret = dict_set_uint64 (dst, buf, value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-active-hard-limit", i); + ret = dict_get_uint64 (src, buf, &value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get %s", buf); + goto out; + } + ret = dict_set_uint64 (dst, buf, value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-snap-max-soft-limit", i); + ret = dict_get_uint64 (src, buf, &value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get %s", buf); + goto out; + } + ret = dict_set_uint64 (dst, buf, value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set %s", buf); + goto out; + } + } + + break; + default: + break; + } + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +/* Aggregate missed_snap_counts from different nodes and save it * + * in the req_dict of the originator node */ +int +glusterd_snap_create_use_rsp_dict (dict_t *dst, dict_t *src) +{ + char *buf = NULL; + char *tmp_str = NULL; + char name_buf[PATH_MAX] = ""; + int32_t i = -1; + int32_t ret = -1; + int32_t src_missed_snap_count = -1; + int32_t dst_missed_snap_count = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + if (!dst || !src) { + gf_log (this->name, GF_LOG_ERROR, "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32 (src, "missed_snap_count", + &src_missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "No missed snaps"); + ret = 0; + goto out; + } + + ret = dict_get_int32 (dst, "missed_snap_count", + &dst_missed_snap_count); + if (ret) { + /* Initialize dst_missed_count for the first time */ + dst_missed_snap_count = 0; + } + + for (i = 0; i < src_missed_snap_count; i++) { + snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", + i); + ret = dict_get_str (src, name_buf, &buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch %s", name_buf); + goto out; + } + + snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", + dst_missed_snap_count); + + tmp_str = gf_strdup (buf); + if (!tmp_str) { + ret = -1; + goto out; + } + + ret = dict_set_dynstr (dst, name_buf, tmp_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set %s", name_buf); + goto out; + } + + tmp_str = NULL; + dst_missed_snap_count++; + } + + ret = dict_set_int32 (dst, "missed_snap_count", dst_missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set dst_missed_snap_count"); + goto out; + } + +out: + if (ret && tmp_str) + GF_FREE(tmp_str); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_snap_use_rsp_dict (dict_t *dst, dict_t *src) +{ + int ret = -1; + int32_t snap_command = 0; + + if (!dst || !src) { + gf_log ("", GF_LOG_ERROR, "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32 (dst, "type", &snap_command); + if (ret) { + gf_log ("", GF_LOG_ERROR, "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case GF_SNAP_OPTION_TYPE_CREATE: + case GF_SNAP_OPTION_TYPE_DELETE: + ret = glusterd_snap_create_use_rsp_dict (dst, src); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to use rsp dict"); + goto out; + } + break; + case GF_SNAP_OPTION_TYPE_CONFIG: + ret = glusterd_snap_config_use_rsp_dict (dst, src); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to use rsp dict"); + goto out; + } + break; + default: + // copy the response dictinary's contents to the dict to be + // sent back to the cli + dict_copy (src, dst); + break; + } + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_sys_exec_output_rsp_dict (dict_t *dst, dict_t *src) +{ + char output_name[PATH_MAX] = ""; + char *output = NULL; + int ret = 0; + int i = 0; + int len = 0; + int src_output_count = 0; + int dst_output_count = 0; + + if (!dst || !src) { + gf_log ("", GF_LOG_ERROR, "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32 (dst, "output_count", &dst_output_count); + + ret = dict_get_int32 (src, "output_count", &src_output_count); + if (ret) { + gf_log ("", GF_LOG_DEBUG, "No output from source"); + ret = 0; + goto out; + } + + for (i = 1; i <= src_output_count; i++) { + len = snprintf (output_name, sizeof(output_name) - 1, + "output_%d", i); + output_name[len] = '\0'; + ret = dict_get_str (src, output_name, &output); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to fetch %s", + output_name); + goto out; + } + + len = snprintf (output_name, sizeof(output_name) - 1, + "output_%d", i+dst_output_count); + output_name[len] = '\0'; + ret = dict_set_dynstr (dst, output_name, gf_strdup (output)); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to set %s", + output_name); + goto out; + } + } + + ret = dict_set_int32 (dst, "output_count", + dst_output_count+src_output_count); +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int glusterd_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict) { int ret = 0; @@ -7117,6 +8252,77 @@ out: } int +_heal_volume_add_shd_rsp_of_statistics (dict_t *this, char *key, data_t + *value, void *data) +{ + char new_key[256] = {0,}; + char int_str[16] = {0,}; + char key_begin_string[128] = {0,}; + data_t *new_value = NULL; + char *rxl_end = NULL; + char *rxl_child_end = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *key_begin_str = NULL; + int rxl_id = 0; + int rxl_child_id = 0; + int brick_id = 0; + int int_len = 0; + int ret = 0; + glusterd_heal_rsp_conv_t *rsp_ctx = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + + rsp_ctx = data; + key_begin_str = strchr (key, '-'); + if (!key_begin_str) + goto out; + + int_len = strlen (key) - strlen (key_begin_str); + strncpy (key_begin_string, key, int_len); + key_begin_string[int_len] = '\0'; + + rxl_end = strchr (key_begin_str + 1, '-'); + if (!rxl_end) + goto out; + + int_len = strlen (key_begin_str) - strlen (rxl_end) - 1; + strncpy (int_str, key_begin_str + 1, int_len); + int_str[int_len] = '\0'; + ret = gf_string2int (int_str, &rxl_id); + if (ret) + goto out; + + + rxl_child_end = strchr (rxl_end + 1, '-'); + if (!rxl_child_end) + goto out; + + int_len = strlen (rxl_end) - strlen (rxl_child_end) - 1; + strncpy (int_str, rxl_end + 1, int_len); + int_str[int_len] = '\0'; + ret = gf_string2int (int_str, &rxl_child_id); + if (ret) + goto out; + + volinfo = rsp_ctx->volinfo; + brick_id = rxl_id * volinfo->replica_count + rxl_child_id; + + brickinfo = glusterd_get_brickinfo_by_position (volinfo, brick_id); + if (!brickinfo) + goto out; + if (!glusterd_is_local_brick (rsp_ctx->this, volinfo, brickinfo)) + goto out; + + new_value = data_copy (value); + snprintf (new_key, sizeof (new_key), "%s-%d%s", key_begin_string, + brick_id, rxl_child_end); + dict_set (rsp_ctx->dict, new_key, new_value); + +out: + return 0; + +} + +int glusterd_heal_volume_brick_rsp (dict_t *req_dict, dict_t *rsp_dict, dict_t *op_ctx, char **op_errstr) { @@ -7124,6 +8330,7 @@ glusterd_heal_volume_brick_rsp (dict_t *req_dict, dict_t *rsp_dict, glusterd_heal_rsp_conv_t rsp_ctx = {0}; char *volname = NULL; glusterd_volinfo_t *volinfo = NULL; + int heal_op = -1; GF_ASSERT (rsp_dict); GF_ASSERT (op_ctx); @@ -7135,6 +8342,13 @@ glusterd_heal_volume_brick_rsp (dict_t *req_dict, dict_t *rsp_dict, goto out; } + ret = dict_get_int32 (req_dict, "heal-op", &heal_op); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get heal_op"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); if (ret) @@ -7143,7 +8357,12 @@ glusterd_heal_volume_brick_rsp (dict_t *req_dict, dict_t *rsp_dict, rsp_ctx.dict = op_ctx; rsp_ctx.volinfo = volinfo; rsp_ctx.this = THIS; - dict_foreach (rsp_dict, _heal_volume_add_shd_rsp, &rsp_ctx); + if (heal_op == GF_AFR_OP_STATISTICS) + dict_foreach (rsp_dict, _heal_volume_add_shd_rsp_of_statistics, + &rsp_ctx); + else + dict_foreach (rsp_dict, _heal_volume_add_shd_rsp, &rsp_ctx); + out: return ret; @@ -7288,6 +8507,13 @@ glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict, gf_log (THIS->name, GF_LOG_ERROR, "failed to set failure count"); + memset (key, 0 , 256); + snprintf (key, 256, "skipped-%d", i); + ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.skipped_files); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, + "failed to set skipped count"); + memset (key, 0, 256); snprintf (key, 256, "run-time-%d", i); ret = dict_set_double (op_ctx, key, volinfo->rebal.rebalance_time); @@ -7339,16 +8565,31 @@ glusterd_handle_node_rsp (dict_t *req_dict, void *pending_entry, * time a lock_owner is set */ gf_boolean_t -is_origin_glusterd () +is_origin_glusterd (dict_t *dict) { - int ret = 0; - uuid_t lock_owner = {0,}; + gf_boolean_t ret = _gf_false; + uuid_t lock_owner = {0,}; + uuid_t *originator_uuid = NULL; - ret = glusterd_get_lock_owner (&lock_owner); - if (ret) - return _gf_false; + GF_ASSERT (dict); - return (uuid_compare (MY_UUID, lock_owner) == 0); + ret = dict_get_bin (dict, "originator_uuid", + (void **) &originator_uuid); + if (ret) { + /* If not originator_uuid has been set, then the command + * has been originated from a glusterd running on older version + * Hence fetching the lock owner */ + ret = glusterd_get_lock_owner (&lock_owner); + if (ret) { + ret = _gf_false; + goto out; + } + ret = !uuid_compare (MY_UUID, lock_owner); + } else + ret = !uuid_compare (MY_UUID, *originator_uuid); + +out: + return ret; } int @@ -7411,50 +8652,243 @@ glusterd_copy_uuid_to_dict (uuid_t uuid, dict_t *dict, char *key) return 0; } -gf_boolean_t -glusterd_is_same_address (char *name1, char *name2) +int +_update_volume_op_versions (dict_t *this, char *key, data_t *value, void *data) { - struct addrinfo *addr1 = NULL; - struct addrinfo *addr2 = NULL; - struct addrinfo *p = NULL; - struct addrinfo *q = NULL; - gf_boolean_t ret = _gf_false; - int gai_err = 0; + int op_version = 0; + glusterd_volinfo_t *ctx = NULL; + gf_boolean_t enabled = _gf_true; + int ret = -1; - gai_err = getaddrinfo(name1,NULL,NULL,&addr1); - if (gai_err != 0) { - gf_log (name1, GF_LOG_WARNING, - "error in getaddrinfo: %s\n", gai_strerror(gai_err)); - goto out; + GF_ASSERT (data); + ctx = data; + + op_version = glusterd_get_op_version_for_key (key); + + if (gd_is_xlator_option (key) || gd_is_boolean_option (key)) { + ret = gf_string2boolean (value->data, &enabled); + if (ret) + return 0; + + if (!enabled) + return 0; } - gai_err = getaddrinfo(name2,NULL,NULL,&addr2); - if (gai_err != 0) { - gf_log (name2, GF_LOG_WARNING, - "error in getaddrinfo: %s\n", gai_strerror(gai_err)); - goto out; + if (op_version > ctx->op_version) + ctx->op_version = op_version; + + if (gd_is_client_option (key) && + (op_version > ctx->client_op_version)) + ctx->client_op_version = op_version; + + return 0; +} + +void +gd_update_volume_op_versions (glusterd_volinfo_t *volinfo) +{ + glusterd_conf_t *conf = NULL; + gf_boolean_t ob_enabled = _gf_false; + + GF_ASSERT (volinfo); + + conf = THIS->private; + GF_ASSERT (conf); + + /* Reset op-versions to minimum */ + volinfo->op_version = 1; + volinfo->client_op_version = 1; + + dict_foreach (volinfo->dict, _update_volume_op_versions, volinfo); + + /* Special case for open-behind + * If cluster op-version >= 2 and open-behind hasn't been explicitly + * disabled, volume op-versions must be updated to account for it + */ + + /* TODO: Remove once we have a general way to update automatically + * enabled features + */ + if (conf->op_version >= 2) { + ob_enabled = dict_get_str_boolean (volinfo->dict, + "performance.open-behind", + _gf_true); + if (ob_enabled) { + + if (volinfo->op_version < 2) + volinfo->op_version = 2; + if (volinfo->client_op_version < 2) + volinfo->client_op_version = 2; + } } - for (p = addr1; p; p = p->ai_next) { - for (q = addr2; q; q = q->ai_next) { - if (p->ai_addrlen != q->ai_addrlen) { - continue; - } - if (memcmp(p->ai_addr,q->ai_addr,p->ai_addrlen)) { + return; +} + +/* A task is committed/completed once the task-id for it is cleared */ +gf_boolean_t +gd_is_remove_brick_committed (glusterd_volinfo_t *volinfo) +{ + GF_ASSERT (volinfo); + + if ((GD_OP_REMOVE_BRICK == volinfo->rebal.op) && + !uuid_is_null (volinfo->rebal.rebalance_id)) + return _gf_false; + + return _gf_true; +} + +gf_boolean_t +glusterd_are_vol_all_peers_up (glusterd_volinfo_t *volinfo, + struct list_head *peers, + char **down_peerstr) +{ + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + gf_boolean_t ret = _gf_false; + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (!uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + + list_for_each_entry (peerinfo, peers, uuid_list) { + if (uuid_compare (peerinfo->uuid, brickinfo->uuid)) continue; + + /*Found peer who owns the brick, return false + * if peer is not connected or not friend */ + if (!(peerinfo->connected) || + (peerinfo->state.state != + GD_FRIEND_STATE_BEFRIENDED)) { + *down_peerstr = gf_strdup (peerinfo->hostname); + gf_log ("", GF_LOG_DEBUG, "Peer %s is down. ", + peerinfo->hostname); + goto out; } - ret = _gf_true; - goto out; } - } + } + ret = _gf_true; out: - if (addr1) { - freeaddrinfo(addr1); - } - if (addr2) { - freeaddrinfo(addr2); + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +gf_boolean_t +glusterd_is_status_tasks_op (glusterd_op_t op, dict_t *dict) +{ + int ret = -1; + uint32_t cmd = GF_CLI_STATUS_NONE; + gf_boolean_t is_status_tasks = _gf_false; + + if (op != GD_OP_STATUS_VOLUME) + goto out; + + ret = dict_get_uint32 (dict, "cmd", &cmd); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to get opcode"); + goto out; } + + if (cmd & GF_CLI_STATUS_TASKS) + is_status_tasks = _gf_true; + +out: + return is_status_tasks; +} + +int +glusterd_compare_snap_time(struct list_head *list1, struct list_head *list2) +{ + glusterd_snap_t *snap1 = NULL; + glusterd_snap_t *snap2 = NULL; + double diff_time = 0; + + GF_ASSERT (list1); + GF_ASSERT (list2); + + snap1 = list_entry(list1, glusterd_snap_t, snap_list); + snap2 = list_entry(list2, glusterd_snap_t, snap_list); + diff_time = difftime(snap1->time_stamp, snap2->time_stamp); + + return ((int)diff_time); +} + +int +glusterd_compare_snap_vol_time(struct list_head *list1, struct list_head *list2) +{ + glusterd_volinfo_t *snapvol1 = NULL; + glusterd_volinfo_t *snapvol2 = NULL; + double diff_time = 0; + + GF_ASSERT (list1); + GF_ASSERT (list2); + + snapvol1 = list_entry(list1, glusterd_volinfo_t, snapvol_list); + snapvol2 = list_entry(list2, glusterd_volinfo_t, snapvol_list); + diff_time = difftime(snapvol1->snapshot->time_stamp, + snapvol2->snapshot->time_stamp); + + return ((int)diff_time); +} + +int32_t +glusterd_missed_snapinfo_new (glusterd_missed_snap_info **missed_snapinfo) +{ + glusterd_missed_snap_info *new_missed_snapinfo = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (missed_snapinfo); + + new_missed_snapinfo = GF_CALLOC (1, sizeof(*new_missed_snapinfo), + gf_gld_mt_missed_snapinfo_t); + + if (!new_missed_snapinfo) + goto out; + + new_missed_snapinfo->node_snap_info = NULL; + INIT_LIST_HEAD (&new_missed_snapinfo->missed_snaps); + INIT_LIST_HEAD (&new_missed_snapinfo->snap_ops); + + *missed_snapinfo = new_missed_snapinfo; + + ret = 0; + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); return ret; +} + +int32_t +glusterd_missed_snap_op_new (glusterd_snap_op_t **snap_op) +{ + glusterd_snap_op_t *new_snap_op = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (snap_op); + new_snap_op = GF_CALLOC (1, sizeof(*new_snap_op), + gf_gld_mt_missed_snapinfo_t); + + if (!new_snap_op) + goto out; + + new_snap_op->brick_path = NULL; + new_snap_op->brick_num = -1; + new_snap_op->op = -1; + new_snap_op->status = -1; + INIT_LIST_HEAD (&new_snap_op->snap_ops_list); + + *snap_op = new_snap_op; + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index c7fbe447e..56bb799bf 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -81,6 +81,11 @@ glusterd_submit_request (struct rpc_clnt *rpc, void *req, int32_t glusterd_volinfo_new (glusterd_volinfo_t **volinfo); +int32_t +glusterd_volinfo_dup (glusterd_volinfo_t *volinfo, + glusterd_volinfo_t **dup_volinfo, + gf_boolean_t set_userauth); + char * glusterd_auth_get_username (glusterd_volinfo_t *volinfo); @@ -115,8 +120,23 @@ int32_t glusterd_peer_hostname_new (char *hostname, glusterd_peer_hostname_t **name); int32_t +glusterd_snap_volinfo_find (char *volname, glusterd_snap_t *snap, + glusterd_volinfo_t **volinfo); +int32_t +glusterd_snap_volinfo_find_from_parent_volname (char *origin_volname, + glusterd_snap_t *snap, + glusterd_volinfo_t **volinfo); + +int32_t glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo); +int +glusterd_volinfo_find_by_volume_id (uuid_t volume_id, glusterd_volinfo_t **volinfo); + +int +glusterd_snap_volinfo_find_by_volume_id (uuid_t volume_id, + glusterd_volinfo_t **volinfo); + int32_t glusterd_service_stop(const char *service, char *pidfile, int sig, gf_boolean_t force_kill); @@ -148,14 +168,11 @@ glusterd_volume_brickinfo_get_by_brick (char *brick, glusterd_volinfo_t *volinfo, glusterd_brickinfo_t **brickinfo); -gf_boolean_t -glusterd_is_local_addr (char *hostname); - int32_t glusterd_build_volume_dict (dict_t **vols); int32_t -glusterd_compare_friend_data (dict_t *vols, int32_t *status); +glusterd_compare_friend_data (dict_t *vols, int32_t *status, char *hostname); int glusterd_volume_compute_cksum (glusterd_volinfo_t *volinfo); @@ -282,6 +299,7 @@ glusterd_is_defrag_on (glusterd_volinfo_t *volinfo); int32_t glusterd_volinfo_bricks_delete (glusterd_volinfo_t *volinfo); + int glusterd_friend_find_by_uuid (uuid_t uuid, glusterd_peerinfo_t **peerinfo); @@ -317,8 +335,13 @@ glusterd_rb_check_bricks (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *dst_brick); int -glusterd_brick_create_path (char *host, char *path, uuid_t uuid, - char **op_errstr); +glusterd_check_and_set_brick_xattr (char *host, char *path, uuid_t uuid, + char **op_errstr, gf_boolean_t is_force); + +int +glusterd_validate_and_create_brickpath (glusterd_brickinfo_t *brickinfo, + uuid_t volume_id, char **op_errstr, + gf_boolean_t is_force); int glusterd_sm_tr_log_transition_add (glusterd_sm_tr_log_t *log, int old_state, int new_state, @@ -346,7 +369,7 @@ gf_boolean_t glusterd_peerinfo_is_uuid_unknown (glusterd_peerinfo_t *peerinfo); int32_t glusterd_brick_connect (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo); + glusterd_brickinfo_t *brickinfo, char *socketpath); int32_t glusterd_brick_disconnect (glusterd_brickinfo_t *brickinfo); int32_t @@ -354,13 +377,25 @@ glusterd_delete_volume (glusterd_volinfo_t *volinfo); int32_t glusterd_delete_brick (glusterd_volinfo_t* volinfo, glusterd_brickinfo_t *brickinfo); + int32_t glusterd_delete_all_bricks (glusterd_volinfo_t* volinfo); + +int +glusterd_spawn_daemons (void *opaque); + int glusterd_restart_gsyncds (glusterd_conf_t *conf); + int glusterd_start_gsync (glusterd_volinfo_t *master_vol, char *slave, - char *glusterd_uuid_str, char **op_errstr); + char *path_list, char *conf_path, + char *glusterd_uuid_str, + char **op_errstr); +int +glusterd_get_local_brickpaths (glusterd_volinfo_t *volinfo, + char **pathlist); + int32_t glusterd_recreate_bricks (glusterd_conf_t *conf); int32_t @@ -443,6 +478,12 @@ glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo, int glusterd_check_files_identical (char *filename1, char *filename2, gf_boolean_t *identical); + +int +glusterd_check_topology_identical (const char *filename1, + const char *filename2, + gf_boolean_t *identical); + void glusterd_volinfo_reset_defrag_stats (glusterd_volinfo_t *volinfo); int @@ -464,6 +505,10 @@ int glusterd_volume_heal_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict); int glusterd_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict); +int +glusterd_sys_exec_output_rsp_dict (dict_t *aggr, dict_t *rsp_dict); +int +glusterd_snap_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict); int32_t glusterd_handle_node_rsp (dict_t *req_ctx, void *pending_entry, glusterd_op_t op, dict_t *rsp_dict, dict_t *op_ctx, @@ -480,11 +525,16 @@ glusterd_profile_volume_brick_rsp (void *pending_entry, dict_t *rsp_dict, dict_t *op_ctx, char **op_errstr, gd_node_type type); +gf_boolean_t +glusterd_are_vol_all_peers_up (glusterd_volinfo_t *volinfo, + struct list_head *peers, + char **down_peerstr); + /* Should be used only when an operation is in progress, as that is the only * time a lock_owner is set */ gf_boolean_t -is_origin_glusterd (); +is_origin_glusterd (dict_t *dict); gf_boolean_t glusterd_is_quorum_changed (dict_t *options, char *option, char *value); @@ -515,4 +565,70 @@ glusterd_copy_uuid_to_dict (uuid_t uuid, dict_t *dict, char *key); gf_boolean_t glusterd_is_same_address (char *name1, char *name2); + +void +gd_update_volume_op_versions (glusterd_volinfo_t *volinfo); + +char* +gd_peer_uuid_str (glusterd_peerinfo_t *peerinfo); + +gf_boolean_t +gd_is_remove_brick_committed (glusterd_volinfo_t *volinfo); + +gf_boolean_t +glusterd_are_vol_all_peers_up (glusterd_volinfo_t *volinfo, + struct list_head *peers, + char **down_peerstr); + +int +glusterd_get_slave_details_confpath (glusterd_volinfo_t *volinfo, dict_t *dict, + char **slave_ip, char **slave_vol, + char **conf_path, char **op_errstr); + +int +glusterd_check_restart_gsync_session (glusterd_volinfo_t *volinfo, char *slave, + dict_t *resp_dict, char *path_list, + char *conf_path, gf_boolean_t is_force); + +int +glusterd_check_gsync_running_local (char *master, char *slave, + char *conf_path, + gf_boolean_t *is_run); + +gf_boolean_t +glusterd_is_status_tasks_op (glusterd_op_t op, dict_t *dict); + +#ifdef GF_LINUX_HOST_OS +char* +glusterd_get_brick_mount_details (glusterd_brickinfo_t *brickinfo); +struct mntent * +glusterd_get_mnt_entry_info (char *mnt_pt, FILE *mtab); +int +glusterd_get_brick_root (char *path, char **mount_point); +#endif //LINUX_HOST + +int +glusterd_compare_snap_time(struct list_head *, struct list_head *); + +int +glusterd_compare_snap_vol_time(struct list_head *, struct list_head *); + +int32_t +glusterd_snap_volinfo_restore (dict_t *rsp_dict, + glusterd_volinfo_t *new_volinfo, + glusterd_volinfo_t *snap_volinfo); +int32_t +glusterd_lvm_snapshot_remove (dict_t *rsp_dict, glusterd_volinfo_t *snap_vol); + +int32_t +glusterd_missed_snapinfo_new (glusterd_missed_snap_info **missed_snapinfo); + +int32_t +glusterd_missed_snap_op_new (glusterd_snap_op_t **snap_op); + +int32_t +glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict, char *snap_uuid, + glusterd_brickinfo_t *brickinfo, + int32_t brick_number, int32_t op); + #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 53963989a..6f3c69e7d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -15,6 +15,7 @@ #include <fnmatch.h> #include <sys/wait.h> +#include <dlfcn.h> #if (HAVE_LIB_XML) #include <libxml/encoding.h> @@ -27,6 +28,8 @@ #include "logging.h" #include "dict.h" #include "graph-utils.h" +#include "glusterd-store.h" +#include "glusterd-hooks.h" #include "trie.h" #include "glusterd-mem-types.h" #include "cli1-xdr.h" @@ -34,6 +37,7 @@ #include "glusterd-op-sm.h" #include "glusterd-utils.h" #include "run.h" +#include "options.h" extern struct volopt_map_entry glusterd_volopt_map[]; @@ -460,6 +464,8 @@ process_option (char *key, data_t *value, void *param) vme.key = key; vme.voltype = odt->vme->voltype; vme.option = odt->vme->option; + vme.op_version = odt->vme->op_version; + if (!vme.option) { vme.option = strrchr (key, '.'); if (vme.option) @@ -590,6 +596,8 @@ get_server_xlator (char *xlator) subvol = GF_XLATOR_MARKER; if (strcmp (xlator, "io-stats") == 0) subvol = GF_XLATOR_IO_STATS; + if (strcmp (xlator, "bd") == 0) + subvol = GF_XLATOR_BD; return subvol; } @@ -1415,9 +1423,8 @@ server_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, char *password = NULL; char index_basepath[PATH_MAX] = {0}; char key[1024] = {0}; - char *vgname = NULL; - char *vg = NULL; glusterd_brickinfo_t *brickinfo = NULL; + char changelog_basepath[PATH_MAX] = {0,}; brickinfo = param; path = brickinfo->path; @@ -1436,47 +1443,62 @@ server_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, } } - if (volinfo->backend == GD_VOL_BK_BD) { - xl = volgen_graph_add (graph, "storage/bd_map", volname); - if (!xl) - return -1; + xl = volgen_graph_add (graph, "storage/posix", volname); + if (!xl) + return -1; - ret = xlator_set_option (xl, "device", "vg"); - if (ret) - return -1; + ret = xlator_set_option (xl, "directory", path); + if (ret) + return -1; - vg = gf_strdup (path); - vgname = strrchr (vg, '/'); - if (strchr(vg, '/') != vgname) { - gf_log ("glusterd", GF_LOG_ERROR, - "invalid vg specified %s", path); - GF_FREE (vg); - goto out; - } - vgname++; - ret = xlator_set_option (xl, "export", vgname); - GF_FREE (vg); - if (ret) - return -1; - } else { - xl = volgen_graph_add (graph, "storage/posix", volname); + ret = xlator_set_option (xl, "volume-id", + uuid_utoa (volinfo->volume_id)); + if (ret) + return -1; + + ret = check_and_add_debug_xl (graph, set_dict, volname, + "posix"); + if (ret) + return -1; +#ifdef HAVE_BD_XLATOR + if (*brickinfo->vg != '\0') { + /* Now add BD v2 xlator if volume is BD type */ + xl = volgen_graph_add (graph, "storage/bd", volname); if (!xl) return -1; - ret = xlator_set_option (xl, "directory", path); + ret = xlator_set_option (xl, "device", "vg"); if (ret) return -1; - - ret = xlator_set_option (xl, "volume-id", - uuid_utoa (volinfo->volume_id)); + ret = xlator_set_option (xl, "export", brickinfo->vg); if (ret) return -1; - ret = check_and_add_debug_xl (graph, set_dict, volname, - "posix"); + ret = check_and_add_debug_xl (graph, set_dict, volname, "bd"); if (ret) return -1; + } +#endif + + xl = volgen_graph_add (graph, "features/changelog", volname); + if (!xl) + return -1; + + ret = xlator_set_option (xl, "changelog-brick", path); + if (ret) + return -1; + + snprintf (changelog_basepath, sizeof (changelog_basepath), + "%s/%s", path, ".glusterfs/changelogs"); + ret = xlator_set_option (xl, "changelog-dir", changelog_basepath); + if (ret) + return -1; + + ret = check_and_add_debug_xl (graph, set_dict, volname, "changelog"); + if (ret) + return -1; + xl = volgen_graph_add (graph, "features/access-control", volname); if (!xl) return -1; @@ -1592,7 +1614,8 @@ server_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, } /* Check for read-only volume option, and add it to the graph */ - if (dict_get_str_boolean (set_dict, "features.read-only", 0)) { + if (dict_get_str_boolean (set_dict, "features.read-only", 0) + || volinfo -> is_snap_volume) { xl = volgen_graph_add (graph, "features/read-only", volname); if (!xl) { ret = -1; @@ -1609,6 +1632,18 @@ server_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, } } + /* Check for compress volume option, and add it to the graph on server side */ + if (dict_get_str_boolean (set_dict, "features.compress", 0)) { + xl = volgen_graph_add (graph, "features/cdc", volname); + if (!xl) { + ret = -1; + goto out; + } + ret = dict_set_str (set_dict, "compress.mode", "server"); + if (ret) + goto out; + } + xl = volgen_graph_add_as (graph, "debug/io-stats", path); if (!xl) return -1; @@ -1672,10 +1707,11 @@ static int perfxl_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme, void *param) { - char *volname = NULL; gf_boolean_t enabled = _gf_false; + glusterd_volinfo_t *volinfo = NULL; - volname = param; + GF_ASSERT (param); + volinfo = param; if (strcmp (vme->option, "!perf") != 0) return 0; @@ -1685,7 +1721,13 @@ perfxl_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme, if (!enabled) return 0; - if (volgen_graph_add (graph, vme->voltype, volname)) + /* Check op-version before adding the 'open-behind' xlator in the graph + */ + if (!strcmp (vme->key, "performance.open-behind") && + (vme->op_version > volinfo->client_op_version)) + return 0; + + if (volgen_graph_add (graph, vme->voltype, volinfo->volname)) return 0; else return -1; @@ -1850,7 +1892,7 @@ xml_add_volset_element (xmlTextWriterPtr writer, const char *name, #endif static int -get_key_from_volopt ( struct volopt_map_entry *vme, char **key) +_get_xlator_opt_key_from_vme ( struct volopt_map_entry *vme, char **key) { int ret = 0; @@ -1893,11 +1935,22 @@ get_key_from_volopt ( struct volopt_map_entry *vme, char **key) return ret; } +static void +_free_xlator_opt_key (char *key) +{ + GF_ASSERT (key); + + if (!strcmp (key, AUTH_ALLOW_OPT_KEY) || + !strcmp (key, AUTH_REJECT_OPT_KEY) || + !strcmp (key, NFS_DISABLE_OPT_KEY)) + GF_FREE (key); + + return; +} + int glusterd_get_volopt_content (dict_t * ctx, gf_boolean_t xml_out) { - - char *xlator_type = NULL; void *dl_handle = NULL; volume_opt_list_t vol_opt_handle = {{0},}; char *key = NULL; @@ -1923,36 +1976,48 @@ glusterd_get_volopt_content (dict_t * ctx, gf_boolean_t xml_out) for (vme = &glusterd_volopt_map[0]; vme->key; vme++) { - if ( ( vme->type == NO_DOC) || (vme->type == GLOBAL_NO_DOC) ) + if ((vme->type == NO_DOC) || (vme->type == GLOBAL_NO_DOC)) continue; - if (get_key_from_volopt (vme, &key)) - goto out; /*Some error while getin key*/ - if (vme->description) { descr = vme->description; def_val = vme->value; } else { - if (!xlator_type || strcmp (vme->voltype, xlator_type)){ - ret = xlator_volopt_dynload (vme->voltype, - &dl_handle, - &vol_opt_handle); - if (ret) { - dl_handle = NULL; - continue; - } + if (_get_xlator_opt_key_from_vme (vme, &key)) { + gf_log ("glusterd", GF_LOG_DEBUG, "Failed to " + "get %s key from volume option entry", + vme->key); + goto out; /*Some error while geting key*/ } + + ret = xlator_volopt_dynload (vme->voltype, + &dl_handle, + &vol_opt_handle); + + if (ret) { + gf_log ("glusterd", GF_LOG_DEBUG, + "xlator_volopt_dynload error(%d)", ret); + ret = 0; + goto cont; + } + ret = xlator_option_info_list (&vol_opt_handle, key, &def_val, &descr); - if (ret) /*Swallow Error i.e if option not found*/ - continue; + if (ret) { /*Swallow Error i.e if option not found*/ + gf_log ("glusterd", GF_LOG_DEBUG, + "Failed to get option for %s key", key); + ret = 0; + goto cont; + } } if (xml_out) { #if (HAVE_LIB_XML) if (xml_add_volset_element (writer,vme->key, - def_val, descr)) - goto out; + def_val, descr)) { + ret = -1; + goto cont; + } #else gf_log ("glusterd", GF_LOG_ERROR, "Libxml not present"); #endif @@ -1962,11 +2027,18 @@ glusterd_get_volopt_content (dict_t * ctx, gf_boolean_t xml_out) vme->key, def_val, descr); strcat (output_string, tmp_str); } - - if (!strcmp (key, AUTH_ALLOW_OPT_KEY) || - !strcmp (key, AUTH_REJECT_OPT_KEY) || - !strcmp (key, NFS_DISABLE_OPT_KEY)) - GF_FREE (key); +cont: + if (dl_handle) { + dlclose (dl_handle); + dl_handle = NULL; + vol_opt_handle.given_opt = NULL; + } + if (key) { + _free_xlator_opt_key (key); + key = NULL; + } + if (ret) + goto out; } #if (HAVE_LIB_XML) @@ -1993,7 +2065,7 @@ glusterd_get_volopt_content (dict_t * ctx, gf_boolean_t xml_out) } ret = dict_set_dynstr (ctx, "help-str", output); - out: +out: gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); return ret; @@ -2255,22 +2327,33 @@ volgen_graph_build_dht_cluster (volgen_graph_t *graph, int ret = -1; char *decommissioned_children = NULL; xlator_t *dht = NULL; - char *optstr = NULL; - gf_boolean_t use_nufa = _gf_false; + char *voltype = "cluster/distribute"; - if (dict_get_str(volinfo->dict,"cluster.nufa",&optstr) == 0) { - /* Keep static analyzers quiet by "using" the value. */ - ret = gf_string2boolean(optstr,&use_nufa); + /* NUFA and Switch section */ + if (dict_get_str_boolean (volinfo->dict, "cluster.nufa", 0) && + dict_get_str_boolean (volinfo->dict, "cluster.switch", 0)) { + gf_log (THIS->name, GF_LOG_ERROR, + "nufa and switch cannot be set together"); + ret = -1; + goto out; } + /* Check for NUFA volume option, and change the voltype */ + if (dict_get_str_boolean (volinfo->dict, "cluster.nufa", 0)) + voltype = "cluster/nufa"; + + /* Check for switch volume option, and change the voltype */ + if (dict_get_str_boolean (volinfo->dict, "cluster.switch", 0)) + voltype = "cluster/switch"; + clusters = volgen_graph_build_clusters (graph, volinfo, - use_nufa - ? "cluster/nufa" - : "cluster/distribute", + voltype, "%s-dht", - child_count, child_count); + child_count, + child_count); if (clusters < 0) goto out; + dht = first_of (graph); ret = _graph_get_decommissioned_children (dht, volinfo, &decommissioned_children); @@ -2364,7 +2447,7 @@ build_distribute: ret = volgen_graph_build_dht_cluster (graph, volinfo, dist_count); - if (ret) + if (ret == -1) goto out; ret = 0; @@ -2372,6 +2455,29 @@ out: return ret; } +static int client_graph_set_perf_options(volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, + dict_t *set_dict) +{ + data_t *tmp_data = NULL; + char *volname = NULL; + + /* + * Logic to make sure NFS doesn't have performance translators by + * default for a volume + */ + volname = volinfo->volname; + tmp_data = dict_get (set_dict, "nfs-volume-file"); + if (!tmp_data) + return volgen_graph_set_options_generic(graph, set_dict, + volname, + &perfxl_option_handler); + else + return volgen_graph_set_options_generic(graph, set_dict, + volname, + &nfsperfxl_option_handler); +} + static int client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, dict_t *set_dict, void *param) @@ -2379,7 +2485,6 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, int ret = 0; xlator_t *xl = NULL; char *volname = NULL; - data_t *tmp_data = NULL; volname = volinfo->volname; ret = volgen_graph_build_clients (graph, volinfo, set_dict, param); @@ -2387,9 +2492,34 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, goto out; ret = volume_volgen_graph_build_clusters (graph, volinfo); - if (ret) + if (ret == -1) goto out; + /* Check for compress volume option, and add it to the graph on client side */ + if (dict_get_str_boolean (set_dict, "features.compress", 0)) { + xl = volgen_graph_add (graph, "features/cdc", volname); + if (!xl) { + ret = -1; + goto out; + } + ret = dict_set_str (set_dict, "compress.mode", "client"); + if (ret) + goto out; + + } + + ret = glusterd_volinfo_get_boolean (volinfo, "features.encryption"); + if (ret == -1) + goto out; + if (ret) { + xl = volgen_graph_add (graph, "encryption/crypt", volname); + + if (!xl) { + ret = -1; + goto out; + } + } + ret = glusterd_volinfo_get_boolean (volinfo, VKEY_FEATURES_QUOTA); if (ret == -1) goto out; @@ -2402,16 +2532,20 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, } } - /* Logic to make sure NFS doesn't have performance translators by - default for a volume */ - tmp_data = dict_get (set_dict, "nfs-volume-file"); - if (!tmp_data) - ret = volgen_graph_set_options_generic (graph, set_dict, volname, - &perfxl_option_handler); - else - ret = volgen_graph_set_options_generic (graph, set_dict, volname, - &nfsperfxl_option_handler); + ret = glusterd_volinfo_get_boolean (volinfo, "features.file-snapshot"); + if (ret == -1) + goto out; + if (ret) { + xl = volgen_graph_add (graph, "features/qemu-block", volname); + + if (!xl) { + ret = -1; + goto out; + } + } + + ret = client_graph_set_perf_options(graph, volinfo, set_dict); if (ret) goto out; @@ -2809,6 +2943,8 @@ build_nfs_graph (volgen_graph_t *graph, dict_t *mod_dict) char *skey = NULL; int ret = 0; char nfs_xprt[16] = {0,}; + char *volname = NULL; + data_t *data = NULL; this = THIS; GF_ASSERT (this); @@ -2834,6 +2970,10 @@ build_nfs_graph (volgen_graph_t *graph, dict_t *mod_dict) if (ret) goto out; + ret = xlator_set_option (nfsxl, "nfs.drc", "on"); + if (ret) + goto out; + list_for_each_entry (voliter, &priv->volumes, vol_list) { if (voliter->status != GLUSTERD_STATUS_STARTED) continue; @@ -2894,6 +3034,12 @@ build_nfs_graph (volgen_graph_t *graph, dict_t *mod_dict) if (ret) goto out; + if (mod_dict && (data = dict_get (mod_dict, "volume-name"))) { + volname = data->data; + if (strcmp (volname, voliter->volname) == 0) + dict_copy (mod_dict, set_dict); + } + ret = build_client_graph (&cgraph, voliter, set_dict); if (ret) goto out; @@ -3029,8 +3175,6 @@ glusterd_generate_brick_volfile (glusterd_volinfo_t *volinfo, return ret; } - - static void get_vol_tstamp_file (char *filename, glusterd_volinfo_t *volinfo) { @@ -3131,7 +3275,7 @@ enumerate_transport_reqs (gf_transport_type type, char **types) } } -static int +int generate_client_volfiles (glusterd_volinfo_t *volinfo, glusterd_client_type_t client_type) { @@ -3297,6 +3441,54 @@ out: } int +glusterd_check_nfs_topology_identical (gf_boolean_t *identical) +{ + char nfsvol[PATH_MAX] = {0,}; + char tmpnfsvol[PATH_MAX] = {0,}; + glusterd_conf_t *conf = NULL; + xlator_t *this = THIS; + int ret = -1; + int tmpclean = 0; + int tmpfd = -1; + + if ((!identical) || (!this) || (!this->private)) + goto out; + + conf = (glusterd_conf_t *) this->private; + + /* Fetch the original NFS volfile */ + glusterd_get_nodesvc_volfile ("nfs", conf->workdir, + nfsvol, sizeof (nfsvol)); + + /* Create the temporary NFS volfile */ + snprintf (tmpnfsvol, sizeof (tmpnfsvol), "/tmp/gnfs-XXXXXX"); + tmpfd = mkstemp (tmpnfsvol); + if (tmpfd < 0) { + gf_log (this->name, GF_LOG_WARNING, + "Unable to create temp file %s: (%s)", + tmpnfsvol, strerror (errno)); + goto out; + } + + tmpclean = 1; /* SET the flag to unlink() tmpfile */ + + ret = glusterd_create_global_volfile (build_nfs_graph, + tmpnfsvol, NULL); + if (ret) + goto out; + + /* Compare the topology of volfiles */ + ret = glusterd_check_topology_identical (nfsvol, tmpnfsvol, + identical); +out: + if (tmpfd >= 0) + close (tmpfd); + if (tmpclean) + unlink (tmpnfsvol); + return ret; +} + +int glusterd_check_nfs_volfile_identical (gf_boolean_t *identical) { char nfsvol[PATH_MAX] = {0,}; @@ -3405,6 +3597,9 @@ validate_nfsopts (glusterd_volinfo_t *volinfo, char transport_type[16] = {0,}; char *tt = NULL; char err_str[4096] = {0,}; + xlator_t *this = THIS; + + GF_ASSERT (this); graph.errstr = op_errstr; @@ -3415,7 +3610,7 @@ validate_nfsopts (glusterd_volinfo_t *volinfo, snprintf (err_str, sizeof (err_str), "Changing nfs " "transport type is allowed only for volumes " "of transport type tcp,rdma"); - gf_log ("", GF_LOG_ERROR, "%s", err_str); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); *op_errstr = gf_strdup (err_str); ret = -1; goto out; @@ -3429,6 +3624,12 @@ validate_nfsopts (glusterd_volinfo_t *volinfo, } } + ret = dict_set_str (val_dict, "volume-name", volinfo->volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set volume name"); + goto out; + } + ret = build_nfs_graph (&graph, val_dict); if (!ret) ret = graph_reconf_validateopt (&graph.graph, op_errstr); @@ -3436,60 +3637,12 @@ validate_nfsopts (glusterd_volinfo_t *volinfo, volgen_graph_free (&graph); out: - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); + if (dict_get (val_dict, "volume-name")) + dict_del (val_dict, "volume-name"); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } -int -validate_wb_eagerlock (glusterd_volinfo_t *volinfo, dict_t *val_dict, - char **op_errstr) -{ - int ret = -1; - gf_boolean_t wb_val = _gf_false; - gf_boolean_t el_val = _gf_false; - char msg[2048] = {0}; - char *wb_key = NULL; - char *el_key = NULL; - - wb_key = "performance.write-behind"; - el_key = "cluster.eager-lock"; - ret = dict_get_str_boolean (val_dict, wb_key, -1); - if (ret < 0) - goto check_eager_lock; - wb_val = ret; - ret = glusterd_volinfo_get_boolean (volinfo, el_key); - if (ret < 0) - goto out; - el_val = ret; - goto done; - -check_eager_lock: - ret = dict_get_str_boolean (val_dict, el_key, -1); - if (ret < 0) { - ret = 0; //Keys of intereset to this fn are not present. - goto out; - } - el_val = ret; - ret = glusterd_volinfo_get_boolean (volinfo, wb_key); - if (ret < 0) - goto out; - wb_val = ret; - goto done; - -done: - ret = 0; - if (!wb_val && el_val) { - ret = -1; - snprintf (msg, sizeof (msg), "%s off and %s on is not " - "valid configuration", wb_key, el_key); - gf_log ("glusterd", GF_LOG_ERROR, "%s", msg); - if (op_errstr) - *op_errstr = gf_strdup (msg); - goto out; - } -out: - return ret; -} int validate_clientopts (glusterd_volinfo_t *volinfo, @@ -3650,10 +3803,6 @@ glusterd_validate_reconfopts (glusterd_volinfo_t *volinfo, dict_t *val_dict, goto out; } - ret = validate_wb_eagerlock (volinfo, val_dict, op_errstr); - if (ret) - goto out; - ret = validate_clientopts (volinfo, val_dict, op_errstr); if (ret) { gf_log ("", GF_LOG_DEBUG, @@ -3680,19 +3829,237 @@ out: return ret; } +static struct volopt_map_entry * +_gd_get_vmep (char *key) { + char *completion = NULL; + struct volopt_map_entry *vmep = NULL; + int ret = 0; + + COMPLETE_OPTION ((char *)key, completion, ret); + for (vmep = glusterd_volopt_map; vmep->key; vmep++) { + if (strcmp (vmep->key, key) == 0) + return vmep; + } + + return NULL; +} + uint32_t glusterd_get_op_version_for_key (char *key) { - char *completion = NULL; struct volopt_map_entry *vmep = NULL; - int ret = 0; - COMPLETE_OPTION(key, completion, ret); - for (vmep = glusterd_volopt_map; vmep->key; vmep++) { - if (strcmp (vmep->key, key) == 0) { - return vmep->op_version; + GF_ASSERT (key); + + vmep = _gd_get_vmep (key); + if (vmep) + return vmep->op_version; + + return 0; +} + +gf_boolean_t +gd_is_client_option (char *key) +{ + struct volopt_map_entry *vmep = NULL; + + GF_ASSERT (key); + + vmep = _gd_get_vmep (key); + if (vmep && (vmep->flags & OPT_FLAG_CLIENT_OPT)) + return _gf_true; + + return _gf_false; +} + +gf_boolean_t +gd_is_xlator_option (char *key) +{ + struct volopt_map_entry *vmep = NULL; + + GF_ASSERT (key); + + vmep = _gd_get_vmep (key); + if (vmep && (vmep->flags & OPT_FLAG_XLATOR_OPT)) + return _gf_true; + + return _gf_false; +} + +volume_option_type_t +_gd_get_option_type (char *key) +{ + struct volopt_map_entry *vmep = NULL; + void *dl_handle = NULL; + volume_opt_list_t vol_opt_list = {{0},}; + int ret = -1; + volume_option_t *opt = NULL; + char *xlopt_key = NULL; + volume_option_type_t opt_type = GF_OPTION_TYPE_MAX; + + GF_ASSERT (key); + + vmep = _gd_get_vmep (key); + + if (vmep) { + INIT_LIST_HEAD (&vol_opt_list.list); + ret = xlator_volopt_dynload (vmep->voltype, &dl_handle, + &vol_opt_list); + if (ret) + goto out; + + if (_get_xlator_opt_key_from_vme (vmep, &xlopt_key)) + goto out; + + opt = xlator_volume_option_get_list (&vol_opt_list, xlopt_key); + _free_xlator_opt_key (xlopt_key); + + if (opt) + opt_type = opt->type; + } + +out: + if (dl_handle) { + dlclose (dl_handle); + dl_handle = NULL; + } + + return opt_type; +} + +gf_boolean_t +gd_is_boolean_option (char *key) +{ + GF_ASSERT (key); + + if (GF_OPTION_TYPE_BOOL == _gd_get_option_type (key)) + return _gf_true; + + return _gf_false; +} + +/* This function will restore origin volume to it's snap. + * The restore operation will simply replace the Gluster origin + * volume with the snap volume. + * TODO: Multi-volume delete to be done. + * Cleanup in case of restore failure is pending. + * + * @param orig_vol volinfo of origin volume + * @param snap_vol volinfo of snapshot volume + * + * @return 0 on success and negative value on error + */ +int +gd_restore_snap_volume (dict_t *rsp_dict, + glusterd_volinfo_t *orig_vol, + glusterd_volinfo_t *snap_vol) +{ + int ret = -1; + glusterd_volinfo_t *new_volinfo = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *temp_volinfo = NULL; + glusterd_volinfo_t *voliter = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + conf = this->private; + GF_ASSERT (conf); + + GF_VALIDATE_OR_GOTO (this->name, orig_vol, out); + GF_VALIDATE_OR_GOTO (this->name, snap_vol, out); + snap = snap_vol->snapshot; + GF_VALIDATE_OR_GOTO (this->name, snap, out); + + /* Snap volume must be stoped before performing the + * restore operation. + */ + ret = glusterd_stop_volume (snap_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to stop " + "snap volume"); + goto out; + } + + /* Create a new volinfo for the restored volume */ + ret = glusterd_volinfo_dup (snap_vol, &new_volinfo, _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to create volinfo"); + goto out; + } + + /* Following entries need to be derived from origin volume. */ + strcpy (new_volinfo->volname, orig_vol->volname); + uuid_copy (new_volinfo->volume_id, orig_vol->volume_id); + new_volinfo->snap_count = orig_vol->snap_count; + new_volinfo->snap_max_hard_limit = orig_vol->snap_max_hard_limit; + new_volinfo->is_volume_restored = _gf_true; + + /* Bump the version of the restored volume, so that nodes * + * which are done can sync during handshake */ + new_volinfo->version = orig_vol->version; + + list_for_each_entry_safe (voliter, temp_volinfo, + &orig_vol->snap_volumes, snapvol_list) { + list_add_tail (&voliter->snapvol_list, + &new_volinfo->snap_volumes); + } + /* Copy the snap vol info to the new_volinfo.*/ + ret = glusterd_snap_volinfo_restore (rsp_dict, new_volinfo, snap_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to restore snap"); + (void)glusterd_volinfo_delete (new_volinfo); + goto out; + } + + /* If the orig_vol is already restored then we should delete + * the backend LVMs */ + if (orig_vol->is_volume_restored) { + ret = glusterd_lvm_snapshot_remove (rsp_dict, orig_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to remove " + "LVM backend"); + (void)glusterd_volinfo_delete (new_volinfo); + goto out; } } - return 0; + /* Once the new_volinfo is completely constructed then delete + * the orinal volinfo + */ + ret = glusterd_volinfo_delete (orig_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to delete volinfo"); + (void)glusterd_volinfo_delete (new_volinfo); + goto out; + } + + /* New volinfo always shows the status as created. Therefore + * set the status to stop. */ + glusterd_set_volume_status (new_volinfo, GLUSTERD_STATUS_STOPPED); + + list_add_tail (&new_volinfo->vol_list, &conf->volumes); + + /* Now delete the snap entry. As a first step delete the snap + * volume information stored in store. */ + ret = glusterd_snap_remove (rsp_dict, snap, _gf_false, _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Failed to delete " + "snap %s", snap->snapname); + goto out; + } + + ret = glusterd_store_volinfo (new_volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to store volinfo"); + goto out; + } + + ret = 0; +out: + + return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.h b/xlators/mgmt/glusterd/src/glusterd-volgen.h index 0a9647bdf..fcbaaf93e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.h +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.h @@ -23,6 +23,8 @@ #define VKEY_DIAG_LAT_MEASUREMENT "diagnostics.latency-measurement" #define VKEY_FEATURES_LIMIT_USAGE "features.limit-usage" #define VKEY_MARKER_XTIME GEOREP".indexing" +#define VKEY_MARKER_XTIME_FORCE GEOREP".ignore-pid-check" +#define VKEY_CHANGELOG "changelog.changelog" #define VKEY_FEATURES_QUOTA "features.quota" #define AUTH_ALLOW_MAP_KEY "auth.allow" @@ -60,7 +62,9 @@ typedef enum { typedef enum gd_volopt_flags_ { OPT_FLAG_NONE, - OPT_FLAG_FORCE = 1, + OPT_FLAG_FORCE = 0x01, // option needs force to be reset + OPT_FLAG_XLATOR_OPT = 0x02, // option enables/disables xlators + OPT_FLAG_CLIENT_OPT = 0x04, // option affects clients } gd_volopt_flags_t; typedef enum { @@ -71,6 +75,7 @@ typedef enum { GF_XLATOR_INDEX, GF_XLATOR_MARKER, GF_XLATOR_IO_STATS, + GF_XLATOR_BD, GF_XLATOR_NONE, } glusterd_server_xlator_t; @@ -99,6 +104,10 @@ struct volopt_map_entry { uint32_t op_version; char *description; vme_option_validation validate_fn; + /* If client_option is true, the option affects clients. + * this is used to calculate client-op-version of volumes + */ + //gf_boolean_t client_option; }; int glusterd_create_rb_volfiles (glusterd_volinfo_t *volinfo, @@ -115,6 +124,10 @@ int glusterd_create_shd_volfile (); int glusterd_delete_volfile (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo); +int +glusterd_delete_snap_volfile (glusterd_volinfo_t *volinfo, + glusterd_volinfo_t *snap_volinfo, + glusterd_brickinfo_t *brickinfo); int glusterd_volinfo_get (glusterd_volinfo_t *volinfo, char *key, char **value); int glusterd_volinfo_get_boolean (glusterd_volinfo_t *volinfo, char *key); @@ -128,12 +141,36 @@ glusterd_check_voloption_flags (char *key, int32_t flags); gf_boolean_t glusterd_is_valid_volfpath (char *volname, char *brick); int generate_brick_volfiles (glusterd_volinfo_t *volinfo); +int generate_snap_brick_volfiles (glusterd_volinfo_t *volinfo, + glusterd_volinfo_t *snap_volinfo); +int generate_client_volfiles (glusterd_volinfo_t *volinfo, + glusterd_client_type_t client_type); +int +generate_snap_client_volfiles (glusterd_volinfo_t *actual_volinfo, + glusterd_volinfo_t *snap_volinfo, + glusterd_client_type_t client_type, + gf_boolean_t vol_restore); int glusterd_get_volopt_content (dict_t *dict, gf_boolean_t xml_out); char* glusterd_get_trans_type_rb (gf_transport_type ttype); int glusterd_check_nfs_volfile_identical (gf_boolean_t *identical); +int +glusterd_check_nfs_topology_identical (gf_boolean_t *identical); uint32_t glusterd_get_op_version_for_key (char *key); + +gf_boolean_t +gd_is_client_option (char *key); + +gf_boolean_t +gd_is_xlator_option (char *key); + +gf_boolean_t +gd_is_boolean_option (char *key); + +int gd_restore_snap_volume (dict_t *rsp_dict, + glusterd_volinfo_t *orig_vol, + glusterd_volinfo_t *snap_vol); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index bc5f8f2a7..0d322b9ad 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -30,8 +30,9 @@ #define glusterd_op_start_volume_args_get(dict, volname, flags) \ glusterd_op_stop_volume_args_get (dict, volname, flags) + int -glusterd_handle_create_volume (rpcsvc_request_t *req) +__glusterd_handle_create_volume (rpcsvc_request_t *req) { int32_t ret = -1; gf_cli_req cli_req = {{0,}}; @@ -134,6 +135,11 @@ glusterd_handle_create_volume (rpcsvc_request_t *req) goto out; } + if (!dict_get (dict, "force")) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get 'force' flag"); + goto out; + } + uuid_generate (volume_id); free_ptr = gf_strdup (uuid_utoa (volume_id)); ret = dict_set_dynstr (dict, "volume-id", free_ptr); @@ -187,7 +193,14 @@ out: } int -glusterd_handle_cli_start_volume (rpcsvc_request_t *req) +glusterd_handle_create_volume (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_create_volume); +} + +int +__glusterd_handle_cli_start_volume (rpcsvc_request_t *req) { int32_t ret = -1; gf_cli_req cli_req = {{0,}}; @@ -252,9 +265,15 @@ out: return ret; } +int +glusterd_handle_cli_start_volume (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_cli_start_volume); +} int -glusterd_handle_cli_stop_volume (rpcsvc_request_t *req) +__glusterd_handle_cli_stop_volume (rpcsvc_request_t *req) { int32_t ret = -1; gf_cli_req cli_req = {{0,}}; @@ -322,7 +341,14 @@ out: } int -glusterd_handle_cli_delete_volume (rpcsvc_request_t *req) +glusterd_handle_cli_stop_volume (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_cli_stop_volume); +} + +int +__glusterd_handle_cli_delete_volume (rpcsvc_request_t *req) { int32_t ret = -1; gf_cli_req cli_req = {{0,},}; @@ -392,7 +418,14 @@ out: } int -glusterd_handle_cli_heal_volume (rpcsvc_request_t *req) +glusterd_handle_cli_delete_volume (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_cli_delete_volume); +} + +int +__glusterd_handle_cli_heal_volume (rpcsvc_request_t *req) { int32_t ret = -1; gf_cli_req cli_req = {{0,}}; @@ -476,7 +509,14 @@ out: } int -glusterd_handle_cli_statedump_volume (rpcsvc_request_t *req) +glusterd_handle_cli_heal_volume (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_cli_heal_volume); +} + +int +__glusterd_handle_cli_statedump_volume (rpcsvc_request_t *req) { int32_t ret = -1; gf_cli_req cli_req = {{0,}}; @@ -557,32 +597,104 @@ out: return ret; } +int +glusterd_handle_cli_statedump_volume (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_cli_statedump_volume); +} + #ifdef HAVE_BD_XLATOR +/* + * Validates if given VG in the brick exists or not. Also checks if VG has + * GF_XATTR_VOL_ID_KEY tag set to avoid using same VG for multiple bricks. + * Tag is checked only during glusterd_op_stage_create_volume. Tag is set during + * glusterd_validate_and_create_brickpath(). + * @brick - brick info, @check_tag - check for VG tag or not + * @msg - Error message to return to caller + */ int -glusterd_is_valid_vg (const char *name) +glusterd_is_valid_vg (glusterd_brickinfo_t *brick, int check_tag, char *msg) { - lvm_t handle = NULL; - vg_t vg = NULL; - char *vg_name = NULL; - int retval = -1; + lvm_t handle = NULL; + vg_t vg = NULL; + char *vg_name = NULL; + int retval = 0; + char *p = NULL; + char *ptr = NULL; + struct dm_list *dm_lvlist = NULL; + struct dm_list *dm_seglist = NULL; + struct lvm_lv_list *lv_list = NULL; + struct lvm_property_value prop = {0, }; + struct lvm_lvseg_list *seglist = NULL; + struct dm_list *taglist = NULL; + struct lvm_str_list *strl = NULL; handle = lvm_init (NULL); if (!handle) { - gf_log ("", GF_LOG_ERROR, "lvm_init failed"); + sprintf (msg, "lvm_init failed, could not validate vg"); return -1; } - vg_name = gf_strdup (name); - vg = lvm_vg_open (handle, basename (vg_name), "r", 0); + if (*brick->vg == '\0') { /* BD xlator has vg in brick->path */ + p = gf_strdup (brick->path); + vg_name = strtok_r (p, "/", &ptr); + } else + vg_name = brick->vg; + + vg = lvm_vg_open (handle, vg_name, "r", 0); if (!vg) { - gf_log ("", GF_LOG_ERROR, "no such vg: %s", vg_name); - goto out; + sprintf (msg, "no such vg: %s", vg_name); + retval = -1; + goto out; + } + if (!check_tag) + goto next; + + taglist = lvm_vg_get_tags (vg); + if (!taglist) + goto next; + + dm_list_iterate_items (strl, taglist) { + if (!strncmp(strl->str, GF_XATTR_VOL_ID_KEY, + strlen (GF_XATTR_VOL_ID_KEY))) { + sprintf (msg, "VG %s is already part of" + " a brick", vg_name); + retval = -1; + goto out; + } + } +next: + + brick->caps = CAPS_BD | CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT; + + dm_lvlist = lvm_vg_list_lvs (vg); + if (!dm_lvlist) + goto out; + + dm_list_iterate_items (lv_list, dm_lvlist) { + dm_seglist = lvm_lv_list_lvsegs (lv_list->lv); + dm_list_iterate_items (seglist, dm_seglist) { + prop = lvm_lvseg_get_property (seglist->lvseg, + "segtype"); + if (!prop.is_valid || !prop.value.string) + continue; + if (!strcmp (prop.value.string, "thin-pool")) { + brick->caps |= CAPS_THIN; + gf_log (THIS->name, GF_LOG_INFO, "Thin Pool " + "\"%s\" will be used for thin LVs", + lvm_lv_get_name (lv_list->lv)); + break; + } + } } + retval = 0; out: if (vg) lvm_vg_close (vg); lvm_quit (handle); - GF_FREE (vg_name); + if (p) + GF_FREE (p); return retval; } #endif @@ -607,9 +719,8 @@ glusterd_op_stage_create_volume (dict_t *dict, char **op_errstr) char msg[2048] = {0}; uuid_t volume_uuid; char *volume_uuid_str; -#ifdef HAVE_BD_XLATOR - char *dev_type = NULL; -#endif + gf_boolean_t is_force = _gf_false; + this = THIS; GF_ASSERT (this); priv = this->private; @@ -652,10 +763,6 @@ glusterd_op_stage_create_volume (dict_t *dict, char **op_errstr) goto out; } -#ifdef HAVE_BD_XLATOR - ret = dict_get_str (dict, "device", &dev_type); -#endif - ret = dict_get_str (dict, "bricks", &bricks); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Unable to get bricks for " @@ -663,6 +770,8 @@ glusterd_op_stage_create_volume (dict_t *dict, char **op_errstr) goto out; } + is_force = dict_get_str_boolean (dict, "force", _gf_false); + if (bricks) { brick_list = gf_strdup (bricks); if (!brick_list) { @@ -702,23 +811,18 @@ glusterd_op_stage_create_volume (dict_t *dict, char **op_errstr) goto out; } + if (!uuid_compare (brick_info->uuid, MY_UUID)) { + #ifdef HAVE_BD_XLATOR - if (dev_type) { - ret = glusterd_is_valid_vg (brick_info->path); - if (ret) { - snprintf (msg, sizeof(msg), "invalid vg %s", - brick_info->path); - goto out; + if (brick_info->vg[0]) { + ret = glusterd_is_valid_vg (brick_info, 1, msg); + if (ret) + goto out; } - - break; - } else #endif - if (!uuid_compare (brick_info->uuid, MY_UUID)) { - ret = glusterd_brick_create_path (brick_info->hostname, - brick_info->path, - volume_uuid, - op_errstr); + ret = glusterd_validate_and_create_brickpath (brick_info, + volume_uuid, op_errstr, + is_force); if (ret) goto out; brick_list = tmpptr; @@ -813,6 +917,7 @@ glusterd_op_stage_start_volume (dict_t *dict, char **op_errstr) uuid_t volume_id = {0,}; char volid[50] = {0,}; char xattr_volid[50] = {0,}; + int caps = 0; this = THIS; GF_ASSERT (this); @@ -862,11 +967,10 @@ glusterd_op_stage_start_volume (dict_t *dict, char **op_errstr) if (uuid_compare (brickinfo->uuid, MY_UUID)) continue; - if (volinfo->backend == GD_VOL_BK_BD) - continue; - ret = gf_lstat_dir (brickinfo->path, NULL); - if (ret) { + if (ret && (flags & GF_CLI_FLAG_OP_FORCE)) { + continue; + } else if (ret) { snprintf (msg, sizeof (msg), "Failed to find " "brick directory %s for volume %s. " "Reason : %s", brickinfo->path, @@ -875,13 +979,27 @@ glusterd_op_stage_start_volume (dict_t *dict, char **op_errstr) } ret = sys_lgetxattr (brickinfo->path, GF_XATTR_VOL_ID_KEY, volume_id, 16); - if (ret < 0) { + if (ret < 0 && (!(flags & GF_CLI_FLAG_OP_FORCE))) { snprintf (msg, sizeof (msg), "Failed to get " "extended attribute %s for brick dir %s. " "Reason : %s", GF_XATTR_VOL_ID_KEY, brickinfo->path, strerror (errno)); ret = -1; goto out; + } else if (ret < 0) { + ret = sys_lsetxattr (brickinfo->path, + GF_XATTR_VOL_ID_KEY, + volinfo->volume_id, 16, + XATTR_CREATE); + if (ret) { + snprintf (msg, sizeof (msg), "Failed to set " + "extended attribute %s on %s. Reason: " + "%s", GF_XATTR_VOL_ID_KEY, + brickinfo->path, strerror (errno)); + goto out; + } else { + continue; + } } if (uuid_compare (volinfo->volume_id, volume_id)) { snprintf (msg, sizeof (msg), "Volume id mismatch for " @@ -893,8 +1011,24 @@ glusterd_op_stage_start_volume (dict_t *dict, char **op_errstr) ret = -1; goto out; } +#ifdef HAVE_BD_XLATOR + if (brickinfo->vg[0]) + caps = CAPS_BD | CAPS_THIN | + CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT; + /* Check for VG/thin pool if its BD volume */ + if (brickinfo->vg[0]) { + ret = glusterd_is_valid_vg (brickinfo, 0, msg); + if (ret) + goto out; + /* if anyone of the brick does not have thin support, + disable it for entire volume */ + caps &= brickinfo->caps; + } else + caps = 0; +#endif } + volinfo->caps = caps; ret = 0; out: if (ret && (msg[0] != '\0')) { @@ -1049,6 +1183,16 @@ glusterd_op_stage_delete_volume (dict_t *dict, char **op_errstr) goto out; } + if (volinfo->snap_count > 0 || !list_empty(&volinfo->snap_volumes)) { + snprintf (msg, sizeof (msg), "Cannot delete Volume %s ," + "as it has %ld snapshots. " + "To delete the volume, " + "first delete all the snapshots under it.", + volname, volinfo->snap_count); + ret = -1; + goto out; + } + ret = 0; out: @@ -1146,14 +1290,22 @@ glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr) goto out; } - if ((heal_op != GF_AFR_OP_INDEX_SUMMARY) && - !glusterd_is_nodesvc_online ("glustershd")) { - ret = -1; - *op_errstr = gf_strdup ("Self-heal daemon is not running." - " Check self-heal daemon log file."); - gf_log (this->name, GF_LOG_WARNING, "%s", "Self-heal daemon is " - "not running. Check self-heal daemon log file."); - goto out; + switch (heal_op) { + case GF_AFR_OP_INDEX_SUMMARY: + case GF_AFR_OP_STATISTICS_HEAL_COUNT: + case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: + break; + default: + if (!glusterd_is_nodesvc_online("glustershd")){ + ret = -1; + *op_errstr = gf_strdup ("Self-heal daemon is " + "not running. Check self-heal " + "daemon log file."); + gf_log (this->name, GF_LOG_WARNING, "%s", + "Self-heal daemon is not running." + "Check self-heal daemon log file."); + goto out; + } } ret = 0; @@ -1277,109 +1429,6 @@ out: return ret; } -#ifdef HAVE_BD_XLATOR -int -glusterd_op_stage_bd (dict_t *dict, char **op_errstr) -{ - int ret = -1; - char *volname = NULL; - char *path = NULL; - char *size = NULL; - glusterd_volinfo_t *volinfo = NULL; - char msg[2048] = {0,}; - gf_xl_bd_op_t bd_op = GF_BD_OP_INVALID; - uint64_t bytes = 0; - - ret = dict_get_str (dict, "volname", &volname); - if (ret) { - snprintf (msg, sizeof(msg), "Failed to get volume name"); - gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - goto out; - } - - ret = dict_get_int32 (dict, "bd-op", (int32_t *)&bd_op); - if (ret) { - snprintf (msg, sizeof(msg), "Failed to get bd-op"); - gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - goto out; - } - - ret = dict_get_str (dict, "path", &path); - if (ret) { - snprintf (msg, sizeof(msg), "Failed to get path"); - gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - goto out; - } - - if (bd_op == GF_BD_OP_NEW_BD) { - ret = dict_get_str (dict, "size", &size); - if (ret) { - snprintf (msg, sizeof(msg), "Failed to get size"); - gf_log ("", GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - goto out; - } - if (gf_string2bytesize (size, &bytes) < 0) { - snprintf (msg, sizeof(msg), - "Invalid size %s, suffix with KB, MB etc", - size); - gf_log ("", GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - ret = -1; - goto out; - } - } else if (bd_op == GF_BD_OP_SNAPSHOT_BD) { - ret = dict_get_str (dict, "size", &size); - if (ret) { - snprintf (msg, sizeof(msg), "Failed to get size"); - gf_log ("", GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - goto out; - } - - if (gf_string2bytesize (size, &bytes) < 0) { - ret = -1; - snprintf (msg, sizeof(msg), - "Invalid size %s, suffix with KB, MB etc", - size); - gf_log ("", GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - goto out; - } - } - - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - snprintf (msg, sizeof(msg), "Volume %s does not exist", - volname); - gf_log ("", GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - goto out; - } - - ret = glusterd_validate_volume_id (dict, volinfo); - if (ret) - goto out; - - if (!glusterd_is_volume_started (volinfo)) { - snprintf (msg, sizeof(msg), "Volume %s is not started", - volname); - gf_log ("", GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - ret = -1; - goto out; - } - - ret = 0; -out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; -} -#endif - int glusterd_op_create_volume (dict_t *dict, char **op_errstr) { @@ -1401,9 +1450,8 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr) char *str = NULL; char *username = NULL; char *password = NULL; -#ifdef HAVE_BD_XLATOR - char *device = NULL; -#endif + int caps = 0; + char msg[1024] __attribute__((unused)) = {0, }; this = THIS; GF_ASSERT (this); @@ -1458,12 +1506,6 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr) goto out; } -#ifdef HAVE_BD_XLATOR - ret = dict_get_str (dict, "device", &device); - if (!ret) - volinfo->backend = GD_VOL_BK_BD; -#endif - /* replica-count 1 means, no replication, file is in one brick only */ volinfo->replica_count = 1; /* stripe-count 1 means, no striping, file is present as a whole */ @@ -1572,6 +1614,7 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr) if (count) brick = strtok_r (brick_list+1, " \n", &saveptr); + caps = CAPS_BD | CAPS_THIN | CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT; while ( i <= count) { ret = glusterd_brickinfo_new_from_brick (brick, &brickinfo); @@ -1584,11 +1627,36 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr) brickinfo->hostname, brickinfo->path); goto out; } + +#ifdef HAVE_BD_XLATOR + if (!uuid_compare (brickinfo->uuid, MY_UUID)) { + if (brickinfo->vg[0]) { + ret = glusterd_is_valid_vg (brickinfo, 0, msg); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "%s", + msg); + goto out; + } + + /* if anyone of the brick does not have thin + support, disable it for entire volume */ + caps &= brickinfo->caps; + + + } else + caps = 0; + } +#endif + list_add_tail (&brickinfo->brick_list, &volinfo->bricks); brick = strtok_r (NULL, " \n", &saveptr); i++; } + gd_update_volume_op_versions (volinfo); + + volinfo->caps = caps; + ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) { glusterd_store_delete_volume (volinfo); @@ -1605,6 +1673,7 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr) volinfo->rebal.defrag_status = 0; list_add_tail (&volinfo->vol_list, &priv->volumes); vol_added = _gf_true; + out: GF_FREE(free_ptr); if (!vol_added && volinfo) @@ -1638,7 +1707,10 @@ glusterd_op_start_volume (dict_t *dict, char **op_errstr) list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { ret = glusterd_brick_start (volinfo, brickinfo, _gf_true); - if (ret) + /* If 'force' try to start all bricks regardless of success or + * failure + */ + if (!(flags & GF_CLI_FLAG_OP_FORCE) && ret) goto out; } @@ -1655,6 +1727,47 @@ out: return ret; } +int +glusterd_stop_volume (glusterd_volinfo_t *volinfo) +{ + int ret = -1; + glusterd_brickinfo_t *brickinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + GF_VALIDATE_OR_GOTO (this->name, volinfo, out); + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + ret = glusterd_brick_stop (volinfo, brickinfo, _gf_false); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to stop " + "brick (%s)", brickinfo->path); + goto out; + } + } + + glusterd_set_volume_status (volinfo, GLUSTERD_STATUS_STOPPED); + + ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to store volinfo of " + "%s volume", volinfo->volname); + goto out; + } + + ret = glusterd_nodesvcs_handle_graph_change (volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to notify graph " + "change for %s volume", volinfo->volname); + goto out; + } + +out: + return ret; +} + int glusterd_op_stop_volume (dict_t *dict) @@ -1663,7 +1776,6 @@ glusterd_op_stop_volume (dict_t *dict) int flags = 0; char *volname = NULL; glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; xlator_t *this = NULL; this = THIS; @@ -1680,19 +1792,12 @@ glusterd_op_stop_volume (dict_t *dict) goto out; } - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - ret = glusterd_brick_stop (volinfo, brickinfo, _gf_false); - if (ret) - goto out; - } - - glusterd_set_volume_status (volinfo, GLUSTERD_STATUS_STOPPED); - - ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); - if (ret) + ret = glusterd_stop_volume (volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to stop %s volume", + volname); goto out; - - ret = glusterd_nodesvcs_handle_graph_change (volinfo); + } out: return ret; } @@ -1844,7 +1949,9 @@ glusterd_clearlocks_unmount (glusterd_volinfo_t *volinfo, char *mntpt) runner_add_args (&runner, "/bin/umount", "-f", NULL); runner_argprintf (&runner, "%s", mntpt); + synclock_unlock (&priv->big_lock); ret = runner_run (&runner); + synclock_lock (&priv->big_lock); if (ret) { ret = 0; gf_log ("", GF_LOG_DEBUG, @@ -1916,7 +2023,9 @@ glusterd_clearlocks_mount (glusterd_volinfo_t *volinfo, char **xl_opts, } runner_argprintf (&runner, "%s", mntpt); + synclock_unlock (&priv->big_lock); ret = runner_run (&runner); + synclock_lock (&priv->big_lock); if (ret) { gf_log (THIS->name, GF_LOG_DEBUG, "Could not start glusterfs"); diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index bc8d53e85..665a8b298 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -347,145 +347,212 @@ out: struct volopt_map_entry glusterd_volopt_map[] = { /* DHT xlator options */ - { .key = "cluster.lookup-unhashed", - .voltype = "cluster/distribute", - .op_version = 1 - }, - { .key = "cluster.min-free-disk", - .voltype = "cluster/distribute", - .op_version = 1 - }, - { .key = "cluster.min-free-inodes", - .voltype = "cluster/distribute", - .op_version = 1 - }, - { .key = "cluster.rebalance-stats", - .voltype = "cluster/distribute", - .op_version = 2 + { .key = "cluster.lookup-unhashed", + .voltype = "cluster/distribute", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.min-free-disk", + .voltype = "cluster/distribute", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.min-free-inodes", + .voltype = "cluster/distribute", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.rebalance-stats", + .voltype = "cluster/distribute", + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT }, { .key = "cluster.subvols-per-directory", .voltype = "cluster/distribute", .option = "directory-layout-spread", .op_version = 2, - .validate_fn = validate_subvols_per_directory - }, - { .key = "cluster.readdir-optimize", - .voltype = "cluster/distribute", - .op_version = 2 - }, - { .key = "cluster.nufa", - .voltype = "cluster/distribute", - .option = "!nufa", - .type = NO_DOC, - .op_version = 2 - }, - { .key = "cluster.rsync-hash-regex", - .voltype = "cluster/distribute", - .type = NO_DOC, - .op_version = 2 - }, - { .key = "cluster.extra-hash-regex", - .voltype = "cluster/distribute", - .type = NO_DOC, - .op_version = 2 + .validate_fn = validate_subvols_per_directory, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.readdir-optimize", + .voltype = "cluster/distribute", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.rsync-hash-regex", + .voltype = "cluster/distribute", + .type = NO_DOC, + .op_version = 3, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.extra-hash-regex", + .voltype = "cluster/distribute", + .type = NO_DOC, + .op_version = 3, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.dht-xattr-name", + .voltype = "cluster/distribute", + .option = "xattr-name", + .type = NO_DOC, + .op_version = 3, + .flags = OPT_FLAG_CLIENT_OPT + }, + + /* NUFA xlator options (Distribute special case) */ + { .key = "cluster.nufa", + .voltype = "cluster/distribute", + .option = "!nufa", + .type = NO_DOC, + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.local-volume-name", + .voltype = "cluster/nufa", + .option = "local-volume-name", + .type = NO_DOC, + .op_version = 3, + .flags = OPT_FLAG_CLIENT_OPT + }, + + /* Switch xlator options (Distribute special case) */ + { .key = "cluster.switch", + .voltype = "cluster/distribute", + .option = "!switch", + .type = NO_DOC, + .op_version = 3, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.switch-pattern", + .voltype = "cluster/switch", + .option = "pattern.switch.case", + .type = NO_DOC, + .op_version = 3, + .flags = OPT_FLAG_CLIENT_OPT }, /* AFR xlator options */ - { .key = "cluster.entry-change-log", - .voltype = "cluster/replicate", - .op_version = 1 - }, - { .key = "cluster.read-subvolume", - .voltype = "cluster/replicate", - .op_version = 1 - }, - { .key = "cluster.read-subvolume-index", - .voltype = "cluster/replicate", - .op_version = 2 - }, - { .key = "cluster.read-hash-mode", - .voltype = "cluster/replicate", - .op_version = 2 - }, - { .key = "cluster.background-self-heal-count", - .voltype = "cluster/replicate", - .op_version = 1 - }, - { .key = "cluster.metadata-self-heal", - .voltype = "cluster/replicate", - .op_version = 1 - }, - { .key = "cluster.data-self-heal", - .voltype = "cluster/replicate", - .op_version = 1 - }, - { .key = "cluster.entry-self-heal", - .voltype = "cluster/replicate", - .op_version = 1 - }, - { .key = "cluster.self-heal-daemon", - .voltype = "cluster/replicate", - .option = "!self-heal-daemon", - .op_version = 1 - }, - { .key = "cluster.heal-timeout", - .voltype = "cluster/replicate", - .option = "!heal-timeout", - .op_version = 2 - }, - { .key = "cluster.strict-readdir", - .voltype = "cluster/replicate", - .type = NO_DOC, - .op_version = 1 - }, - { .key = "cluster.self-heal-window-size", - .voltype = "cluster/replicate", - .option = "data-self-heal-window-size", - .op_version = 1 - }, - { .key = "cluster.data-change-log", - .voltype = "cluster/replicate", - .op_version = 1 - }, - { .key = "cluster.metadata-change-log", - .voltype = "cluster/replicate", - .op_version = 1 - }, - { .key = "cluster.data-self-heal-algorithm", - .voltype = "cluster/replicate", - .option = "data-self-heal-algorithm", - .op_version = 1 - }, - { .key = "cluster.eager-lock", - .voltype = "cluster/replicate", - .op_version = 1 - }, - { .key = "cluster.quorum-type", - .voltype = "cluster/replicate", - .option = "quorum-type", - .op_version = 1 - }, - { .key = "cluster.quorum-count", - .voltype = "cluster/replicate", - .option = "quorum-count", - .op_version = 1 - }, - { .key = "cluster.choose-local", - .voltype = "cluster/replicate", - .op_version = 2 - }, - { .key = "cluster.self-heal-readdir-size", - .voltype = "cluster/replicate", - .op_version = 2 - }, - { .key = "cluster.post-op-delay-secs", - .voltype = "cluster/replicate", - .type = NO_DOC, - .op_version = 2 - }, - { .key = "cluster.readdir-failover", - .voltype = "cluster/replicate", - .op_version = 2 + { .key = "cluster.entry-change-log", + .voltype = "cluster/replicate", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.read-subvolume", + .voltype = "cluster/replicate", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.read-subvolume-index", + .voltype = "cluster/replicate", + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.read-hash-mode", + .voltype = "cluster/replicate", + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.background-self-heal-count", + .voltype = "cluster/replicate", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.metadata-self-heal", + .voltype = "cluster/replicate", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.data-self-heal", + .voltype = "cluster/replicate", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.entry-self-heal", + .voltype = "cluster/replicate", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.self-heal-daemon", + .voltype = "cluster/replicate", + .option = "!self-heal-daemon", + .op_version = 1 + }, + { .key = "cluster.heal-timeout", + .voltype = "cluster/replicate", + .option = "!heal-timeout", + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.strict-readdir", + .voltype = "cluster/replicate", + .type = NO_DOC, + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.self-heal-window-size", + .voltype = "cluster/replicate", + .option = "data-self-heal-window-size", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.data-change-log", + .voltype = "cluster/replicate", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.metadata-change-log", + .voltype = "cluster/replicate", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.data-self-heal-algorithm", + .voltype = "cluster/replicate", + .option = "data-self-heal-algorithm", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.eager-lock", + .voltype = "cluster/replicate", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.quorum-type", + .voltype = "cluster/replicate", + .option = "quorum-type", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.quorum-count", + .voltype = "cluster/replicate", + .option = "quorum-count", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.choose-local", + .voltype = "cluster/replicate", + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.self-heal-readdir-size", + .voltype = "cluster/replicate", + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.post-op-delay-secs", + .voltype = "cluster/replicate", + .type = NO_DOC, + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.readdir-failover", + .voltype = "cluster/replicate", + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.ensure-durability", + .voltype = "cluster/replicate", + .op_version = 3, + .flags = OPT_FLAG_CLIENT_OPT }, /* Stripe xlator options */ @@ -493,12 +560,14 @@ struct volopt_map_entry glusterd_volopt_map[] = { .voltype = "cluster/stripe", .option = "block-size", .op_version = 1, - .validate_fn = validate_stripe + .validate_fn = validate_stripe, + .flags = OPT_FLAG_CLIENT_OPT }, - { .key = "cluster.stripe-coalesce", - .voltype = "cluster/stripe", - .option = "coalesce", - .op_version = 2 + { .key = "cluster.stripe-coalesce", + .voltype = "cluster/stripe", + .option = "coalesce", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT }, /* IO-stats xlator options */ @@ -524,20 +593,22 @@ struct volopt_map_entry glusterd_volopt_map[] = { .option = "!brick-log-level", .op_version = 1 }, - { .key = "diagnostics.client-log-level", - .voltype = "debug/io-stats", - .option = "!client-log-level", - .op_version = 1 + { .key = "diagnostics.client-log-level", + .voltype = "debug/io-stats", + .option = "!client-log-level", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT }, { .key = "diagnostics.brick-sys-log-level", .voltype = "debug/io-stats", .option = "!sys-log-level", .op_version = 1 }, - { .key = "diagnostics.client-sys-log-level", - .voltype = "debug/io-stats", - .option = "!sys-log-level", - .op_version = 1 + { .key = "diagnostics.client-sys-log-level", + .voltype = "debug/io-stats", + .option = "!sys-log-level", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT }, /* IO-cache xlator options */ @@ -545,27 +616,32 @@ struct volopt_map_entry glusterd_volopt_map[] = { .voltype = "performance/io-cache", .option = "max-file-size", .op_version = 1, - .validate_fn = validate_cache_max_min_size + .validate_fn = validate_cache_max_min_size, + .flags = OPT_FLAG_CLIENT_OPT }, { .key = "performance.cache-min-file-size", .voltype = "performance/io-cache", .option = "min-file-size", .op_version = 1, - .validate_fn = validate_cache_max_min_size + .validate_fn = validate_cache_max_min_size, + .flags = OPT_FLAG_CLIENT_OPT }, - { .key = "performance.cache-refresh-timeout", - .voltype = "performance/io-cache", - .option = "cache-timeout", - .op_version = 1 + { .key = "performance.cache-refresh-timeout", + .voltype = "performance/io-cache", + .option = "cache-timeout", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT }, - { .key = "performance.cache-priority", - .voltype = "performance/io-cache", - .option = "priority", - .op_version = 1 + { .key = "performance.cache-priority", + .voltype = "performance/io-cache", + .option = "priority", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT }, - { .key = "performance.cache-size", - .voltype = "performance/io-cache", - .op_version = 1 + { .key = "performance.cache-size", + .voltype = "performance/io-cache", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT }, /* IO-threads xlator options */ @@ -592,87 +668,130 @@ struct volopt_map_entry glusterd_volopt_map[] = { }, { .key = "performance.enable-least-priority", .voltype = "performance/io-threads", - .op_version = 2 + .op_version = 1 }, { .key = "performance.least-rate-limit", .voltype = "performance/io-threads", - .op_version = 1 + .op_version = 2 }, /* Other perf xlators' options */ - { .key = "performance.cache-size", - .voltype = "performance/quick-read", - .op_version = 1 - }, - { .key = "performance.flush-behind", - .voltype = "performance/write-behind", - .option = "flush-behind", - .op_version = 1 - }, - { .key = "performance.write-behind-window-size", - .voltype = "performance/write-behind", - .option = "cache-size", - .op_version = 1 - }, - { .key = "performance.strict-o-direct", - .voltype = "performance/write-behind", - .option = "strict-O_DIRECT", - .op_version = 2 - }, - { .key = "performance.strict-write-ordering", - .voltype = "performance/write-behind", - .option = "strict-write-ordering", - .op_version = 2 - }, - { .key = "performance.lazy-open", - .voltype = "performance/open-behind", - .option = "lazy-open", - .op_version = 2 - }, - { .key = "performance.read-ahead-page-count", - .voltype = "performance/read-ahead", - .option = "page-count", - .op_version = 1 - }, - { .key = "performance.md-cache-timeout", - .voltype = "performance/md-cache", - .option = "md-cache-timeout", - .op_version = 2 + { .key = "performance.cache-size", + .voltype = "performance/quick-read", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "performance.flush-behind", + .voltype = "performance/write-behind", + .option = "flush-behind", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "performance.write-behind-window-size", + .voltype = "performance/write-behind", + .option = "cache-size", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "performance.strict-o-direct", + .voltype = "performance/write-behind", + .option = "strict-O_DIRECT", + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "performance.strict-write-ordering", + .voltype = "performance/write-behind", + .option = "strict-write-ordering", + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "performance.lazy-open", + .voltype = "performance/open-behind", + .option = "lazy-open", + .op_version = 3, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "performance.read-ahead-page-count", + .voltype = "performance/read-ahead", + .option = "page-count", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "performance.md-cache-timeout", + .voltype = "performance/md-cache", + .option = "md-cache-timeout", + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + + /* Crypt xlator options */ + + { .key = "features.encryption", + .voltype = "encryption/crypt", + .option = "!feat", + .value = "off", + .op_version = 3, + .description = "enable/disable client-side encryption for " + "the volume.", + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT + }, + + { .key = "encryption.master-key", + .voltype = "encryption/crypt", + .op_version = 3, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "encryption.data-key-size", + .voltype = "encryption/crypt", + .op_version = 3, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "encryption.block-size", + .voltype = "encryption/crypt", + .op_version = 3, + .flags = OPT_FLAG_CLIENT_OPT }, /* Client xlator options */ - { .key = "network.frame-timeout", - .voltype = "protocol/client", - .op_version = 1 - }, - { .key = "network.ping-timeout", - .voltype = "protocol/client", - .op_version = 1 - }, - { .key = "network.tcp-window-size", - .voltype = "protocol/client", - .op_version = 1 - }, - { .key = "features.lock-heal", - .voltype = "protocol/client", - .option = "lk-heal", - .op_version = 1 - }, - { .key = "features.grace-timeout", - .voltype = "protocol/client", - .option = "grace-timeout", - .op_version = 1 - }, - { .key = "client.ssl", - .voltype = "protocol/client", - .option = "transport.socket.ssl-enabled", - .type = NO_DOC, - .op_version = 2 - }, - { .key = "network.remote-dio", - .voltype = "protocol/client", - .option = "filter-O_DIRECT", - .op_version = 1 + { .key = "network.frame-timeout", + .voltype = "protocol/client", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "network.ping-timeout", + .voltype = "protocol/client", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "network.tcp-window-size", + .voltype = "protocol/client", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "features.lock-heal", + .voltype = "protocol/client", + .option = "lk-heal", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "features.grace-timeout", + .voltype = "protocol/client", + .option = "grace-timeout", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "client.ssl", + .voltype = "protocol/client", + .option = "transport.socket.ssl-enabled", + .type = NO_DOC, + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "network.remote-dio", + .voltype = "protocol/client", + .option = "filter-O_DIRECT", + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT }, /* Server xlator options */ @@ -717,6 +836,12 @@ struct volopt_map_entry glusterd_volopt_map[] = { .option = "statedump-path", .op_version = 1 }, + { .key = "server.outstanding-rpc-limit", + .voltype = "protocol/server", + .option = "rpc.outstanding-rpc-limit", + .type = GLOBAL_DOC, + .op_version = 3 + }, { .key = "features.lock-heal", .voltype = "protocol/server", .option = "lk-heal", @@ -742,35 +867,52 @@ struct volopt_map_entry glusterd_volopt_map[] = { .option = "!perf", .value = "on", .op_version = 1, - .description = "enable/disable write-behind translator in the volume." + .description = "enable/disable write-behind translator in the " + "volume.", + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT }, { .key = "performance.read-ahead", .voltype = "performance/read-ahead", .option = "!perf", .value = "on", .op_version = 1, - .description = "enable/disable read-ahead translator in the volume." + .description = "enable/disable read-ahead translator in the volume.", + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT }, + { .key = "performance.readdir-ahead", + .voltype = "performance/readdir-ahead", + .option = "!perf", + .value = "off", + .op_version = 3, + .description = "enable/disable readdir-ahead translator in the volume.", + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT + }, + { .key = "performance.io-cache", .voltype = "performance/io-cache", .option = "!perf", .value = "on", .op_version = 1, - .description = "enable/disable io-cache translator in the volume." + .description = "enable/disable io-cache translator in the volume.", + .flags = OPT_FLAG_CLIENT_OPT }, { .key = "performance.quick-read", .voltype = "performance/quick-read", .option = "!perf", .value = "on", .op_version = 1, - .description = "enable/disable quick-read translator in the volume." + .description = "enable/disable quick-read translator in the volume.", + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT + }, { .key = "performance.open-behind", .voltype = "performance/open-behind", .option = "!perf", .value = "on", .op_version = 2, - .description = "enable/disable open-behind translator in the volume." + .description = "enable/disable open-behind translator in the volume.", + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT + }, { .key = "performance.stat-prefetch", .voltype = "performance/md-cache", @@ -778,7 +920,8 @@ struct volopt_map_entry glusterd_volopt_map[] = { .value = "on", .op_version = 1, .description = "enable/disable meta-data caching translator in the " - "volume." + "volume.", + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT }, { .key = "performance.client-io-threads", .voltype = "performance/io-threads", @@ -786,77 +929,143 @@ struct volopt_map_entry glusterd_volopt_map[] = { .value = "off", .op_version = 1, .description = "enable/disable io-threads translator in the client " - "graph of volume." - }, - { .key = "performance.nfs.write-behind", - .voltype = "performance/write-behind", - .option = "!nfsperf", + "graph of volume.", + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT + }, + { .key = "performance.nfs.write-behind", + .voltype = "performance/write-behind", + .option = "!nfsperf", + .value = "on", + .type = NO_DOC, + .op_version = 1, + .flags = OPT_FLAG_XLATOR_OPT + }, + { .key = "performance.nfs.read-ahead", + .voltype = "performance/read-ahead", + .option = "!nfsperf", + .value = "off", + .type = NO_DOC, + .op_version = 1, + .flags = OPT_FLAG_XLATOR_OPT + }, + { .key = "performance.nfs.io-cache", + .voltype = "performance/io-cache", + .option = "!nfsperf", + .value = "off", + .type = NO_DOC, + .op_version = 1, + .flags = OPT_FLAG_XLATOR_OPT + }, + { .key = "performance.nfs.quick-read", + .voltype = "performance/quick-read", + .option = "!nfsperf", + .value = "off", + .type = NO_DOC, + .op_version = 1, + .flags = OPT_FLAG_XLATOR_OPT + }, + { .key = "performance.nfs.stat-prefetch", + .voltype = "performance/md-cache", + .option = "!nfsperf", + .value = "off", + .type = NO_DOC, + .op_version = 1, + .flags = OPT_FLAG_XLATOR_OPT + }, + { .key = "performance.nfs.io-threads", + .voltype = "performance/io-threads", + .option = "!nfsperf", + .value = "off", + .type = NO_DOC, + .op_version = 1, + .flags = OPT_FLAG_XLATOR_OPT + }, + { .key = "performance.force-readdirp", + .voltype = "performance/md-cache", + .option = "force-readdirp", + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + + /* Feature translators */ + { .key = "features.file-snapshot", + .voltype = "features/qemu-block", + .option = "!feat", + .value = "off", + .op_version = 3, + .description = "enable/disable file-snapshot feature in the " + "volume.", + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT + }, + +#ifdef HAVE_LIB_Z + /* Compressor-decompressor xlator options + * defaults used from xlator/feature/compress/src/cdc.h + */ + { .key = "features.compress", + .voltype = "features/cdc", + .option = "!compress", .value = "off", .type = NO_DOC, - .op_version = 1 + .op_version = 2, + .description = "enable/disable compression translator" }, - { .key = "performance.nfs.read-ahead", - .voltype = "performance/read-ahead", - .option = "!nfsperf", - .value = "off", + { .key = "compress.mode", + .voltype = "features/cdc", .type = NO_DOC, - .op_version = 1 + .op_version = 2 }, - { .key = "performance.nfs.io-cache", - .voltype = "performance/io-cache", - .option = "!nfsperf", - .value = "off", + { .key = "compress.window-size", + .voltype = "features/cdc", .type = NO_DOC, - .op_version = 1 + .op_version = 2 }, - { .key = "performance.nfs.quick-read", - .voltype = "performance/quick-read", - .option = "!nfsperf", - .value = "off", + { .key = "compress.mem-level", + .voltype = "features/cdc", .type = NO_DOC, - .op_version = 1 + .op_version = 2 }, - { .key = "performance.nfs.stat-prefetch", - .voltype = "performance/md-cache", - .option = "!nfsperf", - .value = "off", + { .key = "compress.min-size", + .voltype = "features/cdc", .type = NO_DOC, - .op_version = 1 + .op_version = 2 }, - { .key = "performance.nfs.io-threads", - .voltype = "performance/io-threads", - .option = "!nfsperf", - .value = "off", + { .key = "compress.compression-level", + .voltype = "features/cdc", .type = NO_DOC, - .op_version = 1 + .op_version = 2 }, - { .key = "performance.force-readdirp", - .voltype = "performance/md-cache", - .option = "force-readdirp", + { .key = "compress.debug", + .voltype = "features/cdc", + .type = NO_DOC, .op_version = 2 }, + #endif /* Quota xlator options */ - { .key = VKEY_FEATURES_LIMIT_USAGE, - .voltype = "features/quota", - .option = "limit-set", - .type = NO_DOC, - .op_version = 1 + { .key = VKEY_FEATURES_LIMIT_USAGE, + .voltype = "features/quota", + .option = "limit-set", + .type = NO_DOC, + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT }, { .key = "features.quota-timeout", .voltype = "features/quota", .option = "timeout", .value = "0", .op_version = 1, - .validate_fn = validate_quota + .validate_fn = validate_quota, + .flags = OPT_FLAG_CLIENT_OPT }, { .key = "features.quota-deem-statfs", .voltype = "features/quota", .option = "deem-statfs", .value = "off", .type = DOC, - .op_version = 2, - .validate_fn = validate_quota + .op_version = 3, + .validate_fn = validate_quota, + .flags = OPT_FLAG_CLIENT_OPT }, /* Marker xlator options */ @@ -876,6 +1085,22 @@ struct volopt_map_entry glusterd_volopt_map[] = { .flags = OPT_FLAG_FORCE, .op_version = 1 }, + { .key = VKEY_MARKER_XTIME_FORCE, + .voltype = "features/marker", + .option = "gsync-force-xtime", + .value = "off", + .type = NO_DOC, + .flags = OPT_FLAG_FORCE, + .op_version = 2 + }, + { .key = VKEY_MARKER_XTIME_FORCE, + .voltype = "features/marker", + .option = "!gsync-force-xtime", + .value = "off", + .type = NO_DOC, + .flags = OPT_FLAG_FORCE, + .op_version = 2 + }, { .key = VKEY_FEATURES_QUOTA, .voltype = "features/marker", .option = "quota", @@ -886,12 +1111,13 @@ struct volopt_map_entry glusterd_volopt_map[] = { }, /* Debug xlators options */ - { .key = "debug.trace", - .voltype = "debug/trace", - .option = "!debug", - .value = "off", - .type = NO_DOC, - .op_version = 1 + { .key = "debug.trace", + .voltype = "debug/trace", + .option = "!debug", + .value = "off", + .type = NO_DOC, + .op_version = 1, + .flags = OPT_FLAG_XLATOR_OPT }, { .key = "debug.log-history", .voltype = "debug/trace", @@ -917,36 +1143,37 @@ struct volopt_map_entry glusterd_volopt_map[] = { .type = NO_DOC, .op_version = 2 }, - { .key = "debug.error-gen", - .voltype = "debug/error-gen", - .option = "!debug", - .value = "off", - .type = NO_DOC, - .op_version = 1 + { .key = "debug.error-gen", + .voltype = "debug/error-gen", + .option = "!debug", + .value = "off", + .type = NO_DOC, + .op_version = 1, + .flags = OPT_FLAG_XLATOR_OPT }, { .key = "debug.error-failure", .voltype = "debug/error-gen", .option = "failure", .type = NO_DOC, - .op_version = 2 + .op_version = 3 }, { .key = "debug.error-number", .voltype = "debug/error-gen", .option = "error-no", .type = NO_DOC, - .op_version = 2 + .op_version = 3 }, { .key = "debug.random-failure", .voltype = "debug/error-gen", .option = "random-failure", .type = NO_DOC, - .op_version = 2 + .op_version = 3 }, { .key = "debug.error-fops", .voltype = "debug/error-gen", .option = "enable", .type = NO_DOC, - .op_version = 2 + .op_version = 3 }, @@ -993,6 +1220,12 @@ struct volopt_map_entry glusterd_volopt_map[] = { .type = GLOBAL_DOC, .op_version = 1 }, + { .key = "nfs.outstanding-rpc-limit", + .voltype = "nfs/server", + .option = "rpc.outstanding-rpc-limit", + .type = GLOBAL_DOC, + .op_version = 3 + }, { .key = "nfs.port", .voltype = "nfs/server", .option = "nfs.port", @@ -1063,50 +1296,115 @@ struct volopt_map_entry glusterd_volopt_map[] = { .type = GLOBAL_DOC, .op_version = 1 }, + { .key = "nfs.acl", + .voltype = "nfs/server", + .option = "nfs.acl", + .type = GLOBAL_DOC, + .op_version = 3 + }, { .key = "nfs.mount-udp", .voltype = "nfs/server", .option = "nfs.mount-udp", .type = GLOBAL_DOC, .op_version = 1 }, + { .key = "nfs.mount-rmtab", + .voltype = "nfs/server", + .option = "nfs.mount-rmtab", + .type = GLOBAL_DOC, + .op_version = 1 + }, { .key = "nfs.server-aux-gids", .voltype = "nfs/server", .option = "nfs.server-aux-gids", .type = NO_DOC, .op_version = 2 }, + { .key = "nfs.drc", + .voltype = "nfs/server", + .option = "nfs.drc", + .type = GLOBAL_DOC, + .op_version = 3 + }, + { .key = "nfs.drc-size", + .voltype = "nfs/server", + .option = "nfs.drc-size", + .type = GLOBAL_DOC, + .op_version = 3 + }, + { .key = "nfs.read-size", + .voltype = "nfs/server", + .option = "nfs3.read-size", + .type = GLOBAL_DOC, + .op_version = 3 + }, + { .key = "nfs.write-size", + .voltype = "nfs/server", + .option = "nfs3.write-size", + .type = GLOBAL_DOC, + .op_version = 3 + }, + { .key = "nfs.readdir-size", + .voltype = "nfs/server", + .option = "nfs3.readdir-size", + .type = GLOBAL_DOC, + .op_version = 3 + }, /* Other options which don't fit any place above */ - { .key = "features.read-only", - .voltype = "features/read-only", - .option = "!read-only", - .value = "off", - .op_version = 2 - }, - { .key = "features.worm", - .voltype = "features/worm", - .option = "!worm", - .value = "off", - .op_version = 2 + { .key = "features.read-only", + .voltype = "features/read-only", + .option = "!read-only", + .value = "off", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT + }, + { .key = "features.worm", + .voltype = "features/worm", + .option = "!worm", + .value = "off", + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT }, { .key = "storage.linux-aio", .voltype = "storage/posix", - .op_version = 2 + .op_version = 1 + }, + { .key = "storage.batch-fsync-mode", + .voltype = "storage/posix", + .op_version = 3 + }, + { .key = "storage.batch-fsync-delay-usec", + .voltype = "storage/posix", + .op_version = 3 }, { .key = "storage.owner-uid", .voltype = "storage/posix", .option = "brick-uid", - .op_version = 2 + .op_version = 1 }, { .key = "storage.owner-gid", .voltype = "storage/posix", .option = "brick-gid", - .op_version = 2 + .op_version = 1 }, - { .key = "config.memory-accounting", - .voltype = "configuration", - .option = "!config", - .op_version = 2 + { .key = "storage.node-uuid-pathinfo", + .voltype = "storage/posix", + .op_version = 3 + }, + { .key = "storage.health-check-interval", + .voltype = "storage/posix", + .op_version = 3 + }, + { .key = "storage.bd-aio", + .voltype = "storage/bd", + .op_version = 3 + }, + { .key = "config.memory-accounting", + .voltype = "configuration", + .option = "!config", + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT }, { .key = "config.transport", .voltype = "configuration", @@ -1123,6 +1421,32 @@ struct volopt_map_entry glusterd_volopt_map[] = { .value = "0", .op_version = 2 }, + /* changelog translator - global tunables */ + { .key = "changelog.changelog", + .voltype = "features/changelog", + .type = NO_DOC, + .op_version = 2 + }, + { .key = "changelog.changelog-dir", + .voltype = "features/changelog", + .type = NO_DOC, + .op_version = 2 + }, + { .key = "changelog.encoding", + .voltype = "features/changelog", + .type = NO_DOC, + .op_version = 2 + }, + { .key = "changelog.rollover-time", + .voltype = "features/changelog", + .type = NO_DOC, + .op_version = 2 + }, + { .key = "changelog.fsync-interval", + .voltype = "features/changelog", + .type = NO_DOC, + .op_version = 2 + }, { .key = NULL } }; diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index c2d8d70e3..59288ada0 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -1,5 +1,5 @@ /* - Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + Copyright (c) 2006-2013 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. This file is licensed to you under your choice of the GNU Lesser @@ -36,6 +36,7 @@ #include "glusterd-store.h" #include "glusterd-hooks.h" #include "glusterd-utils.h" +#include "glusterd-locks.h" #include "common-utils.h" #include "run.h" @@ -44,29 +45,43 @@ #include "glusterd-mountbroker.h" extern struct rpcsvc_program gluster_handshake_prog; +extern struct rpcsvc_program gluster_cli_getspec_prog; extern struct rpcsvc_program gluster_pmap_prog; extern glusterd_op_info_t opinfo; extern struct rpcsvc_program gd_svc_mgmt_prog; +extern struct rpcsvc_program gd_svc_mgmt_v3_prog; extern struct rpcsvc_program gd_svc_peer_prog; extern struct rpcsvc_program gd_svc_cli_prog; +extern struct rpcsvc_program gd_svc_cli_prog_ro; extern struct rpc_clnt_program gd_brick_prog; extern struct rpcsvc_program glusterd_mgmt_hndsk_prog; +extern char snap_mount_folder[PATH_MAX]; + rpcsvc_cbk_program_t glusterd_cbk_prog = { .progname = "Gluster Callback", .prognum = GLUSTER_CBK_PROGRAM, .progver = GLUSTER_CBK_VERSION, }; -struct rpcsvc_program *all_programs[] = { +struct rpcsvc_program *gd_inet_programs[] = { &gd_svc_peer_prog, - &gd_svc_cli_prog, + &gd_svc_cli_prog_ro, &gd_svc_mgmt_prog, + &gd_svc_mgmt_v3_prog, &gluster_pmap_prog, &gluster_handshake_prog, &glusterd_mgmt_hndsk_prog, }; -int rpcsvc_programs_count = (sizeof (all_programs) / sizeof (all_programs[0])); +int gd_inet_programs_count = (sizeof (gd_inet_programs) / + sizeof (gd_inet_programs[0])); + +struct rpcsvc_program *gd_uds_programs[] = { + &gd_svc_cli_prog, + &gluster_cli_getspec_prog, +}; +int gd_uds_programs_count = (sizeof (gd_uds_programs) / + sizeof (gd_uds_programs[0])); const char *gd_op_list[GD_OP_MAX + 1] = { [GD_OP_NONE] = "Invalid op", @@ -94,6 +109,10 @@ const char *gd_op_list[GD_OP_MAX + 1] = { [GD_OP_LIST_VOLUME] = "Lists", [GD_OP_CLEARLOCKS_VOLUME] = "Clear locks", [GD_OP_DEFRAG_BRICK_VOLUME] = "Rebalance", + [GD_OP_COPY_FILE] = "Copy File", + [GD_OP_SYS_EXEC] = "Execute system commands", + [GD_OP_GSYNC_CREATE] = "Geo-replication Create", + [GD_OP_SNAP] = "Snapshot", [GD_OP_MAX] = "Invalid op" }; @@ -119,12 +138,12 @@ glusterd_uuid_init () GF_ASSERT (this); priv = this->private; - ret = glusterd_retrieve_uuid (); - if (ret == 0) { - gf_log (this->name, GF_LOG_INFO, - "retrieved UUID: %s", uuid_utoa (priv->uuid)); - return 0; - } + ret = glusterd_retrieve_uuid (); + if (ret == 0) { + gf_log (this->name, GF_LOG_INFO, + "retrieved UUID: %s", uuid_utoa (priv->uuid)); + return 0; + } ret = glusterd_uuid_generate_save (); @@ -427,8 +446,8 @@ glusterd_crt_georep_folders (char *georepdir, glusterd_conf_t *conf) if (strlen (conf->workdir)+2 > PATH_MAX-strlen(GEOREP)) { ret = -1; gf_log ("glusterd", GF_LOG_CRITICAL, - "Unable to create "GEOREP" directory %s", - georepdir); + "directory path %s/"GEOREP" is longer than PATH_MAX", + conf->workdir); goto out; } @@ -444,8 +463,8 @@ glusterd_crt_georep_folders (char *georepdir, glusterd_conf_t *conf) if (strlen (DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP) >= PATH_MAX) { ret = -1; gf_log ("glusterd", GF_LOG_CRITICAL, - "Unable to create "GEOREP" directory %s", - georepdir); + "directory path "DEFAULT_LOG_FILE_DIRECTORY"/" + GEOREP" is longer than PATH_MAX"); goto out; } ret = mkdir_p (DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP, 0777, _gf_true); @@ -459,8 +478,8 @@ glusterd_crt_georep_folders (char *georepdir, glusterd_conf_t *conf) if (strlen(DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves") >= PATH_MAX) { ret = -1; gf_log ("glusterd", GF_LOG_CRITICAL, - "Unable to create "GEOREP" directory %s", - georepdir); + "directory path "DEFAULT_LOG_FILE_DIRECTORY"/" + GEOREP"-slaves"" is longer than PATH_MAX"); goto out; } ret = mkdir_p (DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves", 0777, @@ -475,8 +494,8 @@ glusterd_crt_georep_folders (char *georepdir, glusterd_conf_t *conf) if (strlen(DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/mbr") >= PATH_MAX) { ret = -1; gf_log ("glusterd", GF_LOG_CRITICAL, - "Unable to create "GEOREP" moubtbroker directory %s", - georepdir); + "directory path "DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP + "-slaves/mbr"" is longer than PATH_MAX"); goto out; } ret = mkdir_p (DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/mbr", 0777, @@ -519,7 +538,7 @@ runinit_gsyncd_setrx (runner_t *runner, glusterd_conf_t *conf) { runinit (runner); runner_add_args (runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL); - runner_argprintf (runner, "%s/"GSYNC_CONF,conf->workdir); + runner_argprintf (runner, "%s/"GSYNC_CONF_TEMPLATE, conf->workdir); runner_add_arg (runner, "--config-set-rx"); } @@ -568,7 +587,8 @@ configure_syncdaemon (glusterd_conf_t *conf) RUN_GSYNCD_CMD; runinit_gsyncd_setrx (&runner, conf); - runner_add_args (&runner, "remote-gsyncd", GSYNCD_PREFIX"/gsyncd", ".", "^ssh:", NULL); + runner_add_args (&runner, "remote-gsyncd", "/nonexistent/gsyncd", + ".", "^ssh:", NULL); RUN_GSYNCD_CMD; /* gluster-command-dir */ @@ -580,7 +600,7 @@ configure_syncdaemon (glusterd_conf_t *conf) /* gluster-params */ runinit_gsyncd_setrx (&runner, conf); runner_add_args (&runner, "gluster-params", - "xlator-option=*-dht.assert-no-child-down=true", + "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true", ".", ".", NULL); RUN_GSYNCD_CMD; @@ -597,14 +617,30 @@ configure_syncdaemon (glusterd_conf_t *conf) /* pid-file */ runinit_gsyncd_setrx (&runner, conf); runner_add_arg (&runner, "pid-file"); - runner_argprintf (&runner, "%s/${mastervol}/${eSlave}.pid", georepdir); + runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}.pid", georepdir); runner_add_args (&runner, ".", ".", NULL); RUN_GSYNCD_CMD; /* state-file */ runinit_gsyncd_setrx (&runner, conf); runner_add_arg (&runner, "state-file"); - runner_argprintf (&runner, "%s/${mastervol}/${eSlave}.status", georepdir); + runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}.status", georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* state-detail-file */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_arg (&runner, "state-detail-file"); + runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}-detail.status", + georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* state-detail-file */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_arg (&runner, "state-detail-file"); + runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}-detail.status", + georepdir); runner_add_args (&runner, ".", ".", NULL); RUN_GSYNCD_CMD; @@ -632,10 +668,32 @@ configure_syncdaemon (glusterd_conf_t *conf) runinit_gsyncd_setrx (&runner, conf); runner_add_args (&runner, "gluster-log-file", - DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"/${mastervol}/${eSlave}.gluster.log", + DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"/${mastervol}/${eSlave}${local_id}.gluster.log", ".", ".", NULL); RUN_GSYNCD_CMD; + /* ignore-deletes */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_args (&runner, "ignore-deletes", "true", ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* special-sync-mode */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_args (&runner, "special-sync-mode", "partial", ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* change-detector == changelog */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_args(&runner, "change-detector", "changelog", ".", ".", NULL); + RUN_GSYNCD_CMD; + + runinit_gsyncd_setrx (&runner, conf); + runner_add_arg(&runner, "working-dir"); + runner_argprintf(&runner, "%s/${mastervol}/${eSlave}", + DEFAULT_VAR_RUN_DIRECTORY); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + /************ * slave pre-configuration ************/ @@ -649,7 +707,7 @@ configure_syncdaemon (glusterd_conf_t *conf) /* gluster-params */ runinit_gsyncd_setrx (&runner, conf); runner_add_args (&runner, "gluster-params", - "xlator-option=*-dht.assert-no-child-down=true", + "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true", ".", NULL); RUN_GSYNCD_CMD; @@ -868,27 +926,212 @@ _install_mount_spec (dict_t *opts, char *key, data_t *value, void *data) return -1; } + static int -glusterd_default_synctask_cbk (int ret, call_frame_t *frame, void *opaque) +gd_default_synctask_cbk (int ret, call_frame_t *frame, void *opaque) +{ + glusterd_conf_t *priv = THIS->private; + synclock_unlock (&priv->big_lock); + return ret; +} + +static void +glusterd_launch_synctask (synctask_fn_t fn, void *opaque) +{ + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int ret = -1; + + this = THIS; + priv = this->private; + + synclock_lock (&priv->big_lock); + ret = synctask_new (this->ctx->env, fn, gd_default_synctask_cbk, NULL, + opaque); + if (ret) + gf_log (this->name, GF_LOG_CRITICAL, "Failed to spawn bricks" + " and other volume related services"); +} + +int +glusterd_uds_rpcsvc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, + void *data) +{ + /* glusterd_rpcsvc_notify() does stuff that calls coming in from the + * unix domain socket don't need. This is just an empty function to be + * used for the uds listener. This will be used later if required. + */ + return 0; +} + +/* The glusterd unix domain socket listener only listens for cli */ +rpcsvc_t * +glusterd_init_uds_listener (xlator_t *this) +{ + int ret = -1; + dict_t *options = NULL; + rpcsvc_t *rpc = NULL; + data_t *sock_data = NULL; + char sockfile[PATH_MAX+1] = {0,}; + int i = 0; + + + GF_ASSERT (this); + + sock_data = dict_get (this->options, "glusterd-sockfile"); + if (!sock_data) { + strncpy (sockfile, DEFAULT_GLUSTERD_SOCKFILE, PATH_MAX); + } else { + strncpy (sockfile, sock_data->data, PATH_MAX); + } + + options = dict_new (); + if (!options) + goto out; + + ret = rpcsvc_transport_unix_options_build (&options, sockfile); + if (ret) + goto out; + + rpc = rpcsvc_init (this, this->ctx, options, 8); + if (rpc == NULL) { + ret = -1; + goto out; + } + + ret = rpcsvc_register_notify (rpc, glusterd_uds_rpcsvc_notify, + this); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Failed to register notify function"); + goto out; + } + + ret = rpcsvc_create_listeners (rpc, options, this->name); + if (ret != 1) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to create listener"); + goto out; + } + ret = 0; + + for (i = 0; i < gd_uds_programs_count; i++) { + ret = glusterd_program_register (this, rpc, gd_uds_programs[i]); + if (ret) { + i--; + for (; i >= 0; i--) + rpcsvc_program_unregister (rpc, + gd_uds_programs[i]); + + goto out; + } + } + +out: + if (options) + dict_unref (options); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to start glusterd " + "unix domain socket listener."); + if (rpc) { + GF_FREE (rpc); + rpc = NULL; + } + } + return rpc; +} + +void +glusterd_stop_uds_listener (xlator_t *this) { - return ret; + glusterd_conf_t *conf = NULL; + rpcsvc_listener_t *listener = NULL; + rpcsvc_listener_t *next = NULL; + + GF_ASSERT (this); + conf = this->private; + + (void) rpcsvc_program_unregister (conf->uds_rpc, &gd_svc_cli_prog); + (void) rpcsvc_program_unregister (conf->uds_rpc, &gluster_handshake_prog); + + list_for_each_entry_safe (listener, next, &conf->uds_rpc->listeners, + list) { + rpcsvc_listener_destroy (listener); + } + + (void) rpcsvc_unregister_notify (conf->uds_rpc, glusterd_rpcsvc_notify, + this); + + unlink (DEFAULT_GLUSTERD_SOCKFILE); + + GF_FREE (conf->uds_rpc); + conf->uds_rpc = NULL; + + return; } static int -glusterd_launch_synctask (xlator_t *this, synctask_fn_t fn) +glusterd_init_snap_folder (xlator_t *this) { - glusterd_conf_t *priv = NULL; - int ret = -1; + int ret = -1; + struct stat buf = {0,}; + + GF_ASSERT (this); + + /* Snapshot volumes are mounted under /var/run/gluster/snaps folder. + * But /var/run is normally a symbolic link to /run folder, which + * creates problems as the entry point in the mtab for the mount point + * and glusterd maintained entry point will be different. Therefore + * identify the correct run folder and use it for snap volume mounting. + */ + ret = lstat (GLUSTERD_VAR_RUN_DIR, &buf); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "stat fails on %s, exiting. (errno = %d)", + GLUSTERD_VAR_RUN_DIR, errno); + goto out; + } - priv = this->private; + /* If /var/run is symlink then use /run folder */ + if (S_ISLNK (buf.st_mode)) { + strcpy (snap_mount_folder, GLUSTERD_RUN_DIR); + } else { + strcpy (snap_mount_folder, GLUSTERD_VAR_RUN_DIR); + } - ret = synctask_new (this->ctx->env, fn, - glusterd_default_synctask_cbk, NULL, priv); + strcat (snap_mount_folder, GLUSTERD_DEFAULT_SNAPS_BRICK_DIR); - if (ret) - gf_log (this->name, GF_LOG_CRITICAL, "Failed to create synctask" - "for starting process"); - return ret; + ret = stat (snap_mount_folder, &buf); + if ((ret != 0) && (ENOENT != errno)) { + gf_log (this->name, GF_LOG_ERROR, + "stat fails on %s, exiting. (errno = %d)", + snap_mount_folder, errno); + ret = -1; + goto out; + } + + if ((!ret) && (!S_ISDIR(buf.st_mode))) { + gf_log (this->name, GF_LOG_CRITICAL, + "Provided snap path %s is not a directory," + "exiting", snap_mount_folder); + ret = -1; + goto out; + } + + if ((-1 == ret) && (ENOENT == errno)) { + /* Create missing folders */ + ret = mkdir_p (snap_mount_folder, 0777, _gf_false); + + if (-1 == ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "Unable to create directory %s" + " ,errno = %d", snap_mount_folder, errno); + goto out; + } + } + +out: + return ret; } /* @@ -902,6 +1145,7 @@ init (xlator_t *this) { int32_t ret = -1; rpcsvc_t *rpc = NULL; + rpcsvc_t *uds_rpc = NULL; glusterd_conf_t *conf = NULL; data_t *dir_data = NULL; struct stat buf = {0,}; @@ -912,10 +1156,8 @@ init (xlator_t *this) int first_time = 0; char *mountbroker_root = NULL; int i = 0; - -#ifdef DEBUG char *valgrind_str = NULL; -#endif + dir_data = dict_get (this->options, "working-directory"); if (!dir_data) { @@ -929,7 +1171,7 @@ init (xlator_t *this) if ((ret != 0) && (ENOENT != errno)) { gf_log (this->name, GF_LOG_ERROR, "stat fails on %s, exiting. (errno = %d)", - workdir, errno); + workdir, errno); exit (1); } @@ -954,9 +1196,17 @@ init (xlator_t *this) first_time = 1; } + setenv ("GLUSTERD_WORKING_DIR", workdir, 1); gf_log (this->name, GF_LOG_INFO, "Using %s as working directory", workdir); + ret = glusterd_init_snap_folder (this); + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, "Unable to create " + "snap backend folder"); + exit (1); + } + snprintf (cmd_log_filename, PATH_MAX,"%s/.cmd_log_history", DEFAULT_LOG_FILE_DIRECTORY); ret = gf_cmd_log_init (cmd_log_filename); @@ -1054,18 +1304,29 @@ init (xlator_t *this) goto out; } - for (i = 0; i < rpcsvc_programs_count; i++) { - ret = glusterd_program_register (this, rpc, all_programs[i]); + for (i = 0; i < gd_inet_programs_count; i++) { + ret = glusterd_program_register (this, rpc, + gd_inet_programs[i]); if (ret) { i--; for (; i >= 0; i--) rpcsvc_program_unregister (rpc, - all_programs[i]); + gd_inet_programs[i]); goto out; } } + /* Start a unix domain socket listener just for cli commands + * This should prevent ports from being wasted by being in TIMED_WAIT + * when cli commands are done continuously + */ + uds_rpc = glusterd_init_uds_listener (this); + if (uds_rpc == NULL) { + ret = -1; + goto out; + } + conf = GF_CALLOC (1, sizeof (glusterd_conf_t), gf_gld_mt_glusterd_conf_t); GF_VALIDATE_OR_GOTO(this->name, conf, out); @@ -1078,17 +1339,24 @@ init (xlator_t *this) INIT_LIST_HEAD (&conf->peers); INIT_LIST_HEAD (&conf->volumes); + INIT_LIST_HEAD (&conf->snapshots); + INIT_LIST_HEAD (&conf->missed_snaps_list); + pthread_mutex_init (&conf->mutex, NULL); conf->rpc = rpc; + conf->uds_rpc = uds_rpc; conf->gfs_mgmt = &gd_brick_prog; strncpy (conf->workdir, workdir, PATH_MAX); + synclock_init (&conf->big_lock); pthread_mutex_init (&conf->xprt_lock, NULL); INIT_LIST_HEAD (&conf->xprt_list); glusterd_friend_sm_init (); glusterd_op_sm_init (); glusterd_opinfo_init (); + glusterd_mgmt_v3_lock_init (); + glusterd_txn_opinfo_dict_init (); ret = glusterd_sm_tr_log_init (&conf->op_sm_log, glusterd_op_sm_state_name_get, glusterd_op_sm_event_name_get, @@ -1096,8 +1364,13 @@ init (xlator_t *this) if (ret) goto out; + conf->base_port = GF_IANA_PRIV_PORTS_START; + if (dict_get_uint32(this->options, "base-port", &conf->base_port) == 0) { + gf_log (this->name, GF_LOG_INFO, + "base-port override: %d", conf->base_port); + } + /* Set option to run bricks on valgrind if enabled in glusterd.vol */ -#ifdef DEBUG conf->valgrind = _gf_false; ret = dict_get_str (this->options, "run-with-valgrind", &valgrind_str); if (ret < 0) { @@ -1110,7 +1383,6 @@ init (xlator_t *this) "run-with-valgrind value not a boolean string"); } } -#endif this->private = conf; (void) glusterd_nodesvc_set_online_status ("glustershd", _gf_false); @@ -1147,6 +1419,13 @@ init (xlator_t *this) if (ret < 0) goto out; + /* If there are no 'friends', this would be the best time to + * spawn process/bricks that may need (re)starting since last + * time (this) glusterd was up.*/ + + if (list_empty (&conf->peers)) { + glusterd_launch_synctask (glusterd_spawn_daemons, NULL); + } ret = glusterd_options_init (this); if (ret < 0) goto out; @@ -1155,13 +1434,6 @@ init (xlator_t *this) if (ret) goto out; - glusterd_launch_synctask (this, - (synctask_fn_t) glusterd_restart_bricks); - glusterd_launch_synctask (this, - (synctask_fn_t) glusterd_restart_gsyncds); - glusterd_launch_synctask (this, - (synctask_fn_t) glusterd_restart_rebalance); - ret = glusterd_hooks_spawn_worker (this); if (ret) goto out; @@ -1197,11 +1469,17 @@ fini (xlator_t *this) goto out; conf = this->private; + + glusterd_stop_uds_listener (this); + FREE (conf->pmap); if (conf->handle) - glusterd_store_handle_destroy (conf->handle); + gf_store_handle_destroy (conf->handle); glusterd_sm_tr_log_delete (&conf->op_sm_log); + glusterd_mgmt_v3_lock_fini (); + glusterd_txn_opinfo_dict_fini (); GF_FREE (conf); + this->private = NULL; out: return; @@ -1288,11 +1566,9 @@ struct volume_options options[] = { { .key = {GEOREP"-log-group"}, .type = GF_OPTION_TYPE_ANY, }, -#ifdef DEBUG { .key = {"run-with-valgrind"}, .type = GF_OPTION_TYPE_BOOL, }, -#endif { .key = {"server-quorum-type"}, .type = GF_OPTION_TYPE_STR, .value = { "none", "server"}, @@ -1304,5 +1580,18 @@ struct volume_options options[] = { .description = "Sets the quorum percentage for the trusted " "storage pool." }, + { .key = {"glusterd-sockfile"}, + .type = GF_OPTION_TYPE_PATH, + .description = "The socket file on which glusterd should listen for " + "cli requests. Default is "DEFAULT_GLUSTERD_SOCKFILE "." + }, + { .key = {"base-port"}, + .type = GF_OPTION_TYPE_INT, + .description = "Sets the base port for portmap query" + }, + { .key = {"snap-brick-path"}, + .type = GF_OPTION_TYPE_STR, + .description = "directory where the bricks for the snapshots will be created" + }, { .key = {NULL} }, }; diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index c9e8d42d3..3aa395ebc 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + Copyright (c) 2006-2013 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. This file is licensed to you under your choice of the GNU Lesser @@ -37,16 +37,24 @@ #include "glusterd-pmap.h" #include "cli1-xdr.h" #include "syncop.h" +#include "store.h" #define GLUSTERD_MAX_VOLUME_NAME 1000 -#define DEFAULT_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs" #define GLUSTERD_TR_LOG_SIZE 50 #define GLUSTERD_NAME "glusterd" #define GLUSTERD_SOCKET_LISTEN_BACKLOG 128 #define GLUSTERD_QUORUM_TYPE_KEY "cluster.server-quorum-type" #define GLUSTERD_QUORUM_RATIO_KEY "cluster.server-quorum-ratio" #define GLUSTERD_GLOBAL_OPT_VERSION "global-option-version" +#define GLUSTERD_COMMON_PEM_PUB_FILE "/geo-replication/common_secret.pem.pub" +#define GEO_CONF_MAX_OPT_VALS 5 +#define GLUSTERD_CREATE_HOOK_SCRIPT "/hooks/1/gsync-create/post/" \ + "S56glusterd-geo-rep-create-post.sh" + +#define GLUSTERD_SNAPS_MAX_HARD_LIMIT 256 +#define GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT 90 +#define GLUSTERD_SNAPS_MAX_SOFT_LIMIT_PERCENT 100 #define GLUSTERD_SERVER_QUORUM "server" #define FMTSTR_CHECK_VOL_EXISTS "Volume %s does not exist" @@ -67,6 +75,9 @@ struct glusterd_volinfo_; typedef struct glusterd_volinfo_ glusterd_volinfo_t; +struct glusterd_snap_; +typedef struct glusterd_snap_ glusterd_snap_t; + typedef enum glusterd_op_ { GD_OP_NONE = 0, GD_OP_CREATE_VOLUME, @@ -93,26 +104,22 @@ typedef enum glusterd_op_ { GD_OP_LIST_VOLUME, GD_OP_CLEARLOCKS_VOLUME, GD_OP_DEFRAG_BRICK_VOLUME, - GD_OP_BD_OP, + GD_OP_COPY_FILE, + GD_OP_SYS_EXEC, + GD_OP_GSYNC_CREATE, + GD_OP_SNAP, GD_OP_MAX, } glusterd_op_t; extern const char * gd_op_list[]; -struct glusterd_store_iter_ { - int fd; - FILE *file; - char filepath[PATH_MAX]; -}; - -typedef struct glusterd_store_iter_ glusterd_store_iter_t; struct glusterd_volgen { dict_t *dict; }; typedef struct { - struct rpc_clnt *rpc; - gf_boolean_t online; + struct rpc_clnt *rpc; + gf_boolean_t online; } nodesrv_t; typedef struct { @@ -122,37 +129,45 @@ typedef struct { } gd_global_opts_t; typedef struct { - struct _volfile_ctx *volfile; - pthread_mutex_t mutex; - struct list_head peers; - struct list_head xaction_peers; - gf_boolean_t verify_volfile_checksum; - gf_boolean_t trace; - uuid_t uuid; - char workdir[PATH_MAX]; - rpcsvc_t *rpc; - nodesrv_t *shd; - nodesrv_t *nfs; - struct pmap_registry *pmap; - struct list_head volumes; - pthread_mutex_t xprt_lock; - struct list_head xprt_list; - glusterd_store_handle_t *handle; - gf_timer_t *timer; - glusterd_sm_tr_log_t op_sm_log; - struct rpc_clnt_program *gfs_mgmt; - - struct list_head mount_specs; -#ifdef DEBUG - gf_boolean_t valgrind; -#endif - pthread_t brick_thread; - void *hooks_priv; + struct _volfile_ctx *volfile; + pthread_mutex_t mutex; + struct list_head peers; + struct list_head xaction_peers; + gf_boolean_t verify_volfile_checksum; + gf_boolean_t trace; + uuid_t uuid; + char workdir[PATH_MAX]; + rpcsvc_t *rpc; + nodesrv_t *shd; + nodesrv_t *nfs; + struct pmap_registry *pmap; + struct list_head volumes; + struct list_head snapshots; /*List of snap volumes */ + pthread_mutex_t xprt_lock; + struct list_head xprt_list; + gf_store_handle_t *handle; + gf_timer_t *timer; + glusterd_sm_tr_log_t op_sm_log; + struct rpc_clnt_program *gfs_mgmt; + + struct list_head mount_specs; + gf_boolean_t valgrind; + pthread_t brick_thread; + void *hooks_priv; /* need for proper handshake_t */ - int op_version; /* Starts with 1 for 3.3.0 */ - xlator_t *xl; /* Should be set to 'THIS' before creating thread */ - gf_boolean_t pending_quorum_action; - dict_t *opts; + int op_version; /* Starts with 1 for 3.3.0 */ + xlator_t *xl; /* Should be set to 'THIS' before creating thread */ + gf_boolean_t pending_quorum_action; + dict_t *opts; + synclock_t big_lock; + gf_boolean_t restart_done; + rpcsvc_t *uds_rpc; /* RPCSVC for the unix domain socket */ + uint32_t base_port; + uint64_t snap_max_hard_limit; + uint64_t snap_max_soft_limit; + char *snap_bricks_directory; + gf_store_handle_t *missed_snaps_list_shandle; + struct list_head missed_snaps_list; } glusterd_conf_t; @@ -162,18 +177,22 @@ typedef enum gf_brick_status { } gf_brick_status_t; struct glusterd_brickinfo { - char hostname[1024]; - char path[PATH_MAX]; - struct list_head brick_list; - uuid_t uuid; - int port; - int rdma_port; - char *logfile; - gf_boolean_t signed_in; - glusterd_store_handle_t *shandle; - gf_brick_status_t status; - struct rpc_clnt *rpc; - int decommissioned; + char hostname[1024]; + char path[PATH_MAX]; + char device_path[PATH_MAX]; + struct list_head brick_list; + uuid_t uuid; + int port; + int rdma_port; + char *logfile; + gf_boolean_t signed_in; + gf_store_handle_t *shandle; + gf_brick_status_t status; + struct rpc_clnt *rpc; + int decommissioned; + char vg[PATH_MAX]; /* FIXME: Use max size for length of vg */ + int caps; /* Capability */ + int32_t snap_status; }; typedef struct glusterd_brickinfo glusterd_brickinfo_t; @@ -196,7 +215,7 @@ struct glusterd_defrag_info_ { int cmd; pthread_t th; gf_defrag_status_t defrag_status; - struct rpc_clnt * rpc; + struct rpc_clnt *rpc; uint32_t connected; char mount[1024]; char databuf[131072]; @@ -229,80 +248,147 @@ struct _auth { typedef struct _auth auth_t; -typedef enum glusterd_vol_backend_ { - GD_VOL_BK_DEFAULT = 0, /* POSIX */ - GD_VOL_BK_BD = 1, -} glusterd_vol_backend_t; +/* Capabilities of xlator */ +#define CAPS_BD 0x00000001 +#define CAPS_THIN 0x00000002 +#define CAPS_OFFLOAD_COPY 0x00000004 +#define CAPS_OFFLOAD_SNAPSHOT 0x00000008 struct glusterd_rebalance_ { - gf_defrag_status_t defrag_status; - uint64_t rebalance_files; - uint64_t rebalance_data; - uint64_t lookedup_files; + gf_defrag_status_t defrag_status; + uint64_t rebalance_files; + uint64_t rebalance_data; + uint64_t lookedup_files; + uint64_t skipped_files; glusterd_defrag_info_t *defrag; - gf_cli_defrag_type defrag_cmd; - uint64_t rebalance_failures; - uuid_t rebalance_id; - double rebalance_time; - glusterd_op_t op; + gf_cli_defrag_type defrag_cmd; + uint64_t rebalance_failures; + uuid_t rebalance_id; + double rebalance_time; + glusterd_op_t op; + dict_t *dict; /* Dict to store misc information + * like list of bricks being removed */ }; typedef struct glusterd_rebalance_ glusterd_rebalance_t; struct glusterd_replace_brick_ { gf_rb_status_t rb_status; - glusterd_brickinfo_t *src_brick; - glusterd_brickinfo_t *dst_brick; + glusterd_brickinfo_t *src_brick; + glusterd_brickinfo_t *dst_brick; uuid_t rb_id; }; typedef struct glusterd_replace_brick_ glusterd_replace_brick_t; struct glusterd_volinfo_ { - char volname[GLUSTERD_MAX_VOLUME_NAME]; - int type; - int brick_count; - struct list_head vol_list; - struct list_head bricks; - glusterd_volume_status status; - int sub_count; /* backward compatibility */ - int stripe_count; - int replica_count; - int subvol_count; /* Number of subvolumes in a + gf_lock_t lock; + char volname[GLUSTERD_MAX_VOLUME_NAME]; + gf_boolean_t is_snap_volume; + glusterd_snap_t *snapshot; + gf_boolean_t is_volume_restored; + char parent_volname[GLUSTERD_MAX_VOLUME_NAME]; + /* In case of a snap volume + i.e (is_snap_volume == TRUE) this + field will contain the name of + the volume which is snapped. In + case of a non-snap volume, this + field will be initialized as N/A */ + int type; + int brick_count; + uint64_t snap_count; + uint64_t snap_max_hard_limit; + struct list_head vol_list; + /* In case of a snap volume + i.e (is_snap_volume == TRUE) this + is linked to glusterd_snap_t->volumes. + In case of a non-snap volume, this is + linked to glusterd_conf_t->volumes */ + struct list_head snapvol_list; + /* This is a current pointer for + glusterd_volinfo_t->snap_volumes */ + struct list_head bricks; + struct list_head snap_volumes; + /* TODO : Need to remove this, as this + * is already part of snapshot object. + */ + glusterd_volume_status status; + int sub_count; /* backward compatibility */ + int stripe_count; + int replica_count; + int subvol_count; /* Number of subvolumes in a distribute volume */ - int dist_leaf_count; /* Number of bricks in one + int dist_leaf_count; /* Number of bricks in one distribute subvolume */ - int port; - glusterd_store_handle_t *shandle; - glusterd_store_handle_t *rb_shandle; - glusterd_store_handle_t *node_state_shandle; + int port; + gf_store_handle_t *shandle; + gf_store_handle_t *rb_shandle; + gf_store_handle_t *node_state_shandle; /* Defrag/rebalance related */ - glusterd_rebalance_t rebal; + glusterd_rebalance_t rebal; /* Replace brick status */ - glusterd_replace_brick_t rep_brick; + glusterd_replace_brick_t rep_brick; + + int version; + uint32_t cksum; + gf_transport_type transport_type; + gf_transport_type nfs_transport_type; - int version; - uint32_t cksum; - gf_transport_type transport_type; - gf_transport_type nfs_transport_type; + dict_t *dict; - dict_t *dict; + uuid_t volume_id; + auth_t auth; + char *logdir; - uuid_t volume_id; - auth_t auth; - char *logdir; + dict_t *gsync_slaves; - dict_t *gsync_slaves; + int decommission_in_progress; + xlator_t *xl; - int decommission_in_progress; - xlator_t *xl; + gf_boolean_t memory_accounting; + int caps; /* Capability */ + + int op_version; + int client_op_version; +}; - gf_boolean_t memory_accounting; - glusterd_vol_backend_t backend; +typedef enum gd_snap_status_ { + GD_SNAP_STATUS_NONE, + GD_SNAP_STATUS_INIT, + GD_SNAP_STATUS_IN_USE, + GD_SNAP_STATUS_DECOMMISSION, + GD_SNAP_STATUS_RESTORED, +} gd_snap_status_t; + +struct glusterd_snap_ { + gf_lock_t lock; + struct list_head volumes; + struct list_head snap_list; + char snapname[GLUSTERD_MAX_SNAP_NAME]; + uuid_t snap_id; + char *description; + time_t time_stamp; + gf_boolean_t snap_restored; + gd_snap_status_t snap_status; + gf_store_handle_t *shandle; }; +typedef struct glusterd_snap_op_ { + int32_t brick_num; + char *brick_path; + int32_t op; + int32_t status; + struct list_head snap_ops_list; +} glusterd_snap_op_t; + +typedef struct glusterd_missed_snap_ { + char *node_snap_info; + struct list_head missed_snaps; + struct list_head snap_ops; +} glusterd_missed_snap_info; + typedef enum gd_node_type_ { GD_NODE_NONE, GD_NODE_BRICK, @@ -311,6 +397,12 @@ typedef enum gd_node_type_ { GD_NODE_NFS, } gd_node_type; +typedef enum missed_snap_stat { + GD_MISSED_SNAP_NONE, + GD_MISSED_SNAP_PENDING, + GD_MISSED_SNAP_DONE, +} missed_snap_stat; + typedef struct glusterd_pending_node_ { struct list_head list; void *node; @@ -318,6 +410,13 @@ typedef struct glusterd_pending_node_ { int32_t index; } glusterd_pending_node_t; +struct gsync_config_opt_vals_ { + char *op_name; + int no_of_pos_vals; + gf_boolean_t case_sensitive; + char *values[GEO_CONF_MAX_OPT_VALS]; +}; + enum glusterd_op_ret { GLUSTERD_CONNECTION_AWAITED = 100, }; @@ -335,10 +434,18 @@ enum glusterd_vol_comp_status_ { #define GLUSTERD_VOLUME_DIR_PREFIX "vols" #define GLUSTERD_PEER_DIR_PREFIX "peers" #define GLUSTERD_VOLUME_INFO_FILE "info" +#define GLUSTERD_SNAP_INFO_FILE "info" #define GLUSTERD_VOLUME_RBSTATE_FILE "rbstate" #define GLUSTERD_BRICK_INFO_DIR "bricks" #define GLUSTERD_CKSUM_FILE "cksum" +#define GLUSTERD_TRASH "trash" #define GLUSTERD_NODE_STATE_FILE "node_state.info" +#define GLUSTERD_MISSED_SNAPS_LIST_FILE "missed_snaps_list" +#define GLUSTERD_VOL_SNAP_DIR_PREFIX "snaps" + +#define GLUSTERD_DEFAULT_SNAPS_BRICK_DIR "/gluster/snaps" +#define GLUSTERD_VAR_RUN_DIR "/var/run" +#define GLUSTERD_RUN_DIR "/run" /* definitions related to replace brick */ #define RB_CLIENT_MOUNTPOINT "rb_mount" @@ -351,14 +458,29 @@ enum glusterd_vol_comp_status_ { typedef ssize_t (*gd_serialize_t) (struct iovec outmsg, void *args); -#define GLUSTERD_GET_VOLUME_DIR(path, volinfo, priv) \ - snprintf (path, PATH_MAX, "%s/vols/%s", priv->workdir,\ - volinfo->volname); +#define GLUSTERD_GET_VOLUME_DIR(path, volinfo, priv) \ + if (volinfo->is_snap_volume) { \ + snprintf (path, PATH_MAX, "%s/snaps/%s/%s", priv->workdir, \ + volinfo->snapshot->snapname, volinfo->volname); \ + } else { \ + snprintf (path, PATH_MAX, "%s/vols/%s", priv->workdir, \ + volinfo->volname); \ + } -#define GLUSTERD_GET_BRICK_DIR(path, volinfo, priv) \ - snprintf (path, PATH_MAX, "%s/%s/%s/%s", priv->workdir,\ - GLUSTERD_VOLUME_DIR_PREFIX, volinfo->volname, \ - GLUSTERD_BRICK_INFO_DIR); +#define GLUSTERD_GET_SNAP_DIR(path, snap, priv) \ + snprintf (path, PATH_MAX, "%s/snaps/%s", priv->workdir, \ + snap->snapname); + +#define GLUSTERD_GET_BRICK_DIR(path, volinfo, priv) \ + if (volinfo->is_snap_volume) { \ + snprintf (path, PATH_MAX, "%s/snaps/%s/%s/%s", priv->workdir, \ + volinfo->snapshot->snapname, volinfo->volname, \ + GLUSTERD_BRICK_INFO_DIR); \ + } else { \ + snprintf (path, PATH_MAX, "%s/%s/%s/%s", priv->workdir, \ + GLUSTERD_VOLUME_DIR_PREFIX, volinfo->volname, \ + GLUSTERD_BRICK_INFO_DIR); \ + } #define GLUSTERD_GET_NFS_DIR(path, priv) \ snprintf (path, PATH_MAX, "%s/nfs", priv->workdir); @@ -372,12 +494,14 @@ typedef ssize_t (*gd_serialize_t) (struct iovec outmsg, void *args); } \ } while (0) -#define GLUSTERD_GET_BRICK_PIDFILE(pidfile,volpath,hostname,brickpath) { \ - char exp_path[PATH_MAX] = {0,}; \ - GLUSTERD_REMOVE_SLASH_FROM_PATH (brickpath, exp_path); \ - snprintf (pidfile, PATH_MAX, "%s/run/%s-%s.pid", \ - volpath, hostname, exp_path); \ - } +#define GLUSTERD_GET_BRICK_PIDFILE(pidfile,volinfo,brickinfo, priv) do { \ + char exp_path[PATH_MAX] = {0,}; \ + char volpath[PATH_MAX] = {0,}; \ + GLUSTERD_GET_VOLUME_DIR (volpath, volinfo, priv); \ + GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, exp_path); \ + snprintf (pidfile, PATH_MAX, "%s/run/%s-%s.pid", \ + volpath, brickinfo->hostname, exp_path); \ + } while (0) #define GLUSTERD_GET_NFS_PIDFILE(pidfile,nfspath) { \ snprintf (pidfile, PATH_MAX, "%s/run/nfs.pid", \ @@ -385,9 +509,9 @@ typedef ssize_t (*gd_serialize_t) (struct iovec outmsg, void *args); } #define GLUSTERD_STACK_DESTROY(frame) do {\ - frame->local = NULL; \ - STACK_DESTROY (frame->root);\ - } while (0) + frame->local = NULL; \ + STACK_DESTROY (frame->root); \ + } while (0) #define GLUSTERD_GET_DEFRAG_DIR(path, volinfo, priv) do { \ char vol_path[PATH_MAX]; \ @@ -409,6 +533,19 @@ typedef ssize_t (*gd_serialize_t) (struct iovec outmsg, void *args); uuid_utoa(MY_UUID)); \ } while (0) +#define GLUSTERD_GET_UUID_NOHYPHEN(ret_string, uuid) do { \ + char *snap_volname_ptr = ret_string; \ + char *snap_volid_ptr = uuid_utoa(uuid); \ + while (*snap_volid_ptr) { \ + if (*snap_volid_ptr == '-') { \ + snap_volid_ptr++; \ + } else { \ + (*snap_volname_ptr++) = \ + (*snap_volid_ptr++); \ + } \ + } \ + *snap_volname_ptr = '\0'; \ + } while (0) int glusterd_uuid_init(); @@ -419,22 +556,34 @@ int glusterd_uuid_generate_save (); static inline unsigned char * __glusterd_uuid() { - glusterd_conf_t *priv = THIS->private; + glusterd_conf_t *priv = THIS->private; - if (uuid_is_null (priv->uuid)) - glusterd_uuid_init(); - return &priv->uuid[0]; + if (uuid_is_null (priv->uuid)) + glusterd_uuid_init(); + return &priv->uuid[0]; } +int glusterd_big_locked_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, + void *data, rpc_clnt_notify_t notify_fn); + +int +glusterd_big_locked_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe, fop_cbk_fn_t fn); + +int glusterd_big_locked_handler (rpcsvc_request_t *req, rpcsvc_actor actor_fn); + int32_t glusterd_brick_from_brickinfo (glusterd_brickinfo_t *brickinfo, char **new_brick); int -glusterd_probe_begin (rpcsvc_request_t *req, const char *hoststr, int port); +glusterd_probe_begin (rpcsvc_request_t *req, const char *hoststr, int port, + dict_t *dict); int -glusterd_xfer_friend_add_resp (rpcsvc_request_t *req, char *hostname, - int port, int32_t op_ret, int32_t op_errno); +glusterd_xfer_friend_add_resp (rpcsvc_request_t *req, char *myhostname, + char *remote_hostname, int port, int32_t op_ret, + int32_t op_errno); int glusterd_friend_find (uuid_t uuid, char *hostname, @@ -459,6 +608,14 @@ int glusterd_op_unlock_send_resp (rpcsvc_request_t *req, int32_t status); int +glusterd_op_mgmt_v3_lock_send_resp (rpcsvc_request_t *req, + uuid_t *txn_id, int32_t status); + +int +glusterd_op_mgmt_v3_unlock_send_resp (rpcsvc_request_t *req, + uuid_t *txn_id, int32_t status); + +int glusterd_op_stage_send_resp (rpcsvc_request_t *req, int32_t op, int32_t status, char *op_errstr, dict_t *rsp_dict); @@ -503,7 +660,7 @@ glusterd_handle_defrag_volume_v2 (rpcsvc_request_t *req); int glusterd_xfer_cli_probe_resp (rpcsvc_request_t *req, int32_t op_ret, int32_t op_errno, char *op_errstr, char *hostname, - int port); + int port, dict_t *dict); int glusterd_op_commit_send_resp (rpcsvc_request_t *req, @@ -515,7 +672,7 @@ glusterd_xfer_friend_remove_resp (rpcsvc_request_t *req, char *hostname, int por int glusterd_deprobe_begin (rpcsvc_request_t *req, const char *hoststr, int port, - uuid_t uuid); + uuid_t uuid, dict_t *dict); int glusterd_handle_cli_deprobe (rpcsvc_request_t *req); @@ -590,6 +747,12 @@ int glusterd_handle_reset_volume (rpcsvc_request_t *req); int +glusterd_handle_copy_file (rpcsvc_request_t *req); + +int +glusterd_handle_sys_exec (rpcsvc_request_t *req); + +int glusterd_handle_gsync_set (rpcsvc_request_t *req); int @@ -601,7 +764,7 @@ glusterd_handle_fsm_log (rpcsvc_request_t *req); int glusterd_xfer_cli_deprobe_resp (rpcsvc_request_t *req, int32_t op_ret, int32_t op_errno, char *op_errstr, - char *hostname); + char *hostname, dict_t *dict); int glusterd_fetchspec_notify (xlator_t *this); @@ -668,11 +831,20 @@ int glusterd_handle_cli_heal_volume (rpcsvc_request_t *req); int glusterd_handle_cli_list_volume (rpcsvc_request_t *req); +int +glusterd_handle_snapshot (rpcsvc_request_t *req); + /* op-sm functions */ int glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr); int glusterd_op_heal_volume (dict_t *dict, char **op_errstr); int glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr); int glusterd_op_gsync_set (dict_t *dict, char **op_errstr, dict_t *rsp_dict); +int glusterd_op_stage_copy_file (dict_t *dict, char **op_errstr); +int glusterd_op_copy_file (dict_t *dict, char **op_errstr); +int glusterd_op_stage_sys_exec (dict_t *dict, char **op_errstr); +int glusterd_op_sys_exec (dict_t *dict, char **op_errstr, dict_t *rsp_dict); +int glusterd_op_stage_gsync_create (dict_t *dict, char **op_errstr); +int glusterd_op_gsync_create (dict_t *dict, char **op_errstr, dict_t *rsp_dict); int glusterd_op_quota (dict_t *dict, char **op_errstr, dict_t *rsp_dict); int glusterd_op_stage_quota (dict_t *dict, char **op_errstr); int glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, @@ -703,7 +875,6 @@ int glusterd_op_statedump_volume (dict_t *dict, char **op_errstr); int glusterd_op_stage_clearlocks_volume (dict_t *dict, char **op_errstr); int glusterd_op_clearlocks_volume (dict_t *dict, char **op_errstr, dict_t *rsp_dict); -int glusterd_op_stage_bd (dict_t *dict, char **op_errstr); /* misc */ void glusterd_do_replace_brick (void *data); @@ -714,10 +885,68 @@ int glusterd_op_statedump_volume_args_get (dict_t *dict, char **volname, char **options, int *option_cnt); int glusterd_op_gsync_args_get (dict_t *dict, char **op_errstr, - char **master, char **slave); + char **master, char **slave, char **host_uuid); +int glusterd_stop_volume (glusterd_volinfo_t *volinfo); + /* Synctask part */ int32_t glusterd_op_begin_synctask (rpcsvc_request_t *req, glusterd_op_t op, void *dict); int32_t glusterd_defrag_event_notify_handle (dict_t *dict); + +int32_t +glusterd_txn_opinfo_dict_init (); + +void +glusterd_txn_opinfo_dict_fini (); + +void +glusterd_txn_opinfo_init (); + +/* snapshot */ +glusterd_snap_t* +glusterd_new_snap_object(); + +int32_t +glusterd_list_add_snapvol (glusterd_volinfo_t *origin_vol, + glusterd_volinfo_t *snap_vol); + +glusterd_snap_t* +glusterd_remove_snap_by_id (uuid_t snap_id); + +glusterd_snap_t* +glusterd_remove_snap_by_name (char *snap_name); + +glusterd_snap_t* +glusterd_find_snap_by_name (char *snap_name); + +glusterd_snap_t* +glusterd_find_snap_by_id (uuid_t snap_id); + +int +glusterd_snapshot_prevalidate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict); +int +glusterd_snapshot_brickop (dict_t *dict, char **op_errstr, dict_t *rsp_dict); +int +glusterd_snapshot (dict_t *dict, char **op_errstr, dict_t *rsp_dict); +int +glusterd_snapshot_postvalidate (dict_t *dict, int32_t op_ret, char **op_errstr, + dict_t *rsp_dict); +char * +glusterd_build_snap_device_path (char *device, char *snapname); +int32_t +glusterd_snap_remove (dict_t *rsp_dict, glusterd_snap_t *snap, + gf_boolean_t remove_lvm, gf_boolean_t force); +int32_t +glusterd_snapshot_cleanup (dict_t *dict, char **op_errstr, dict_t *rsp_dict); + +int32_t +glusterd_add_missed_snaps_to_list (dict_t *dict, int32_t missed_snap_count); + +int32_t +glusterd_store_missed_snaps_list (char *missed_info, int32_t brick_num, + char *brick_path, int32_t snap_op, + int32_t snap_status); + #endif |
