diff options
Diffstat (limited to 'xlators')
28 files changed, 12546 insertions, 631 deletions
diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am index b109e6dff24..933c440196d 100644 --- a/xlators/mgmt/glusterd/src/Makefile.am +++ b/xlators/mgmt/glusterd/src/Makefile.am @@ -12,7 +12,8 @@ glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \ glusterd-geo-rep.c glusterd-replace-brick.c glusterd-log-ops.c \ glusterd-volume-ops.c glusterd-brick-ops.c glusterd-mountbroker.c \ glusterd-syncop.c glusterd-hooks.c glusterd-volume-set.c \ - glusterd-locks.c + glusterd-locks.c glusterd-snapshot.c glusterd-mgmt-handler.c \ + glusterd-mgmt.c glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ $(top_builddir)/rpc/xdr/src/libgfxdr.la \ @@ -22,7 +23,8 @@ glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \ glusterd-sm.h glusterd-store.h glusterd-mem-types.h \ glusterd-pmap.h glusterd-volgen.h glusterd-mountbroker.h \ - glusterd-syncop.h glusterd-hooks.h glusterd-locks.h + glusterd-syncop.h glusterd-hooks.h glusterd-locks.h \ + glusterd-mgmt.h AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ -I$(rpclibdir) -I$(CONTRIBDIR)/rbtree \ diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 12f3705b5e7..58c030ca011 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -376,6 +376,21 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, if (ret) goto out; + /* As of now, the snap volumes are also displayed as part of + volume info command. So this change is to display whether + the volume is original volume or the snap_volume. If + displaying of snap volumes in volume info o/p is not needed + this should be removed. + */ + snprintf (key, 256, "volume%d.snap_volume", count); + ret = dict_set_int32 (volumes, key, volinfo->is_snap_volume); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "failed to set whether " + "the volume is a snap volume or actual volume (%s)", + volinfo->volname); + goto out; + } + snprintf (key, 256, "volume%d.brick_count", count); ret = dict_set_int32 (volumes, key, volinfo->brick_count); if (ret) @@ -634,7 +649,7 @@ glusterd_op_txn_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx, goto out; } - /* Based on the op_version, acquire a cluster or volume lock */ + /* Based on the op_version, acquire a cluster or mgmt_v3 lock */ if (priv->op_version < GD_OP_VERSION_4) { ret = glusterd_lock (MY_UUID); if (ret) { @@ -664,7 +679,7 @@ glusterd_op_txn_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx, goto out; } - ret = glusterd_volume_lock (volname, MY_UUID); + ret = glusterd_mgmt_v3_lock (volname, MY_UUID, "vol"); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Unable to acquire lock for %s", volname); @@ -711,11 +726,12 @@ local_locking_done: out: if (locked && ret) { /* Based on the op-version, we release the - * cluster or volume lock */ + * cluster or mgmt_v3 lock */ if (priv->op_version < GD_OP_VERSION_4) glusterd_unlock (MY_UUID); else { - ret = glusterd_volume_unlock (volname, MY_UUID); + ret = glusterd_mgmt_v3_unlock (volname, MY_UUID, + "vol"); if (ret) gf_log (this->name, GF_LOG_ERROR, "Unable to release lock for %s", @@ -822,188 +838,6 @@ glusterd_handle_cluster_lock (rpcsvc_request_t *req) __glusterd_handle_cluster_lock); } -static int -glusterd_handle_volume_lock_fn (rpcsvc_request_t *req) -{ - gd1_mgmt_volume_lock_req lock_req = {{0},}; - int32_t ret = -1; - glusterd_op_lock_ctx_t *ctx = NULL; - glusterd_peerinfo_t *peerinfo = NULL; - xlator_t *this = NULL; - glusterd_op_info_t txn_op_info = {{0},}; - - this = THIS; - GF_ASSERT (this); - GF_ASSERT (req); - - ret = xdr_to_generic (req->msg[0], &lock_req, - (xdrproc_t)xdr_gd1_mgmt_volume_lock_req); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "Failed to decode lock " - "request received from peer"); - req->rpc_err = GARBAGE_ARGS; - goto out; - } - - gf_log (this->name, GF_LOG_DEBUG, "Received volume lock req " - "from uuid: %s txn_id: %s", uuid_utoa (lock_req.uuid), - uuid_utoa (lock_req.txn_id)); - - if (glusterd_friend_find_by_uuid (lock_req.uuid, &peerinfo)) { - gf_log (this->name, GF_LOG_WARNING, "%s doesn't " - "belong to the cluster. Ignoring request.", - uuid_utoa (lock_req.uuid)); - ret = -1; - goto out; - } - - ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t); - if (!ctx) { - ret = -1; - goto out; - } - - uuid_copy (ctx->uuid, lock_req.uuid); - ctx->req = req; - - ctx->dict = dict_new (); - if (!ctx->dict) { - ret = -1; - goto out; - } - - ret = dict_unserialize (lock_req.dict.dict_val, - lock_req.dict.dict_len, &ctx->dict); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "failed to unserialize the dictionary"); - goto out; - } - - glusterd_txn_opinfo_init (&txn_op_info, NULL, &lock_req.op, - ctx->dict, req); - - ret = glusterd_set_txn_opinfo (&lock_req.txn_id, &txn_op_info); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Unable to set transaction's opinfo"); - goto out; - } - - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_LOCK, - &lock_req.txn_id, ctx); - if (ret) - gf_log (this->name, GF_LOG_ERROR, - "Failed to inject event GD_OP_EVENT_LOCK"); - -out: - if (ret) { - if (ctx) { - if (ctx->dict) - dict_destroy (ctx->dict); - GF_FREE (ctx); - } - } - - glusterd_friend_sm (); - glusterd_op_sm (); - - gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); - return ret; -} - -int -glusterd_handle_volume_lock (rpcsvc_request_t *req) -{ - return glusterd_big_locked_handler (req, - glusterd_handle_volume_lock_fn); -} - -static int -glusterd_handle_volume_unlock_fn (rpcsvc_request_t *req) -{ - gd1_mgmt_volume_unlock_req lock_req = {{0},}; - int32_t ret = -1; - glusterd_op_lock_ctx_t *ctx = NULL; - glusterd_peerinfo_t *peerinfo = NULL; - xlator_t *this = NULL; - - this = THIS; - GF_ASSERT (this); - GF_ASSERT (req); - - ret = xdr_to_generic (req->msg[0], &lock_req, - (xdrproc_t)xdr_gd1_mgmt_volume_unlock_req); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "Failed to decode unlock " - "request received from peer"); - req->rpc_err = GARBAGE_ARGS; - goto out; - } - - gf_log (this->name, GF_LOG_DEBUG, "Received volume unlock req " - "from uuid: %s", uuid_utoa (lock_req.uuid)); - - if (glusterd_friend_find_by_uuid (lock_req.uuid, &peerinfo)) { - gf_log (this->name, GF_LOG_WARNING, "%s doesn't " - "belong to the cluster. Ignoring request.", - uuid_utoa (lock_req.uuid)); - ret = -1; - goto out; - } - - ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t); - if (!ctx) { - ret = -1; - goto out; - } - - uuid_copy (ctx->uuid, lock_req.uuid); - ctx->req = req; - - ctx->dict = dict_new (); - if (!ctx->dict) { - ret = -1; - goto out; - } - - ret = dict_unserialize (lock_req.dict.dict_val, - lock_req.dict.dict_len, &ctx->dict); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "failed to unserialize the dictionary"); - goto out; - } - - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_UNLOCK, - &lock_req.txn_id, ctx); - if (ret) - gf_log (this->name, GF_LOG_ERROR, - "Failed to inject event GD_OP_EVENT_UNLOCK"); - -out: - if (ret) { - if (ctx) { - if (ctx->dict) - dict_destroy (ctx->dict); - GF_FREE (ctx); - } - } - - glusterd_friend_sm (); - glusterd_op_sm (); - - gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); - return ret; -} - -int -glusterd_handle_volume_unlock (rpcsvc_request_t *req) -{ - return glusterd_big_locked_handler (req, - glusterd_handle_volume_unlock_fn); -} - int glusterd_req_ctx_create (rpcsvc_request_t *rpc_req, glusterd_op_t op, uuid_t uuid, @@ -2243,12 +2077,12 @@ glusterd_op_unlock_send_resp (rpcsvc_request_t *req, int32_t status) } int -glusterd_op_volume_lock_send_resp (rpcsvc_request_t *req, uuid_t *txn_id, +glusterd_op_mgmt_v3_lock_send_resp (rpcsvc_request_t *req, uuid_t *txn_id, int32_t status) { - gd1_mgmt_volume_lock_rsp rsp = {{0},}; - int ret = -1; + gd1_mgmt_v3_lock_rsp rsp = {{0},}; + int ret = -1; GF_ASSERT (req); GF_ASSERT (txn_id); @@ -2259,20 +2093,20 @@ glusterd_op_volume_lock_send_resp (rpcsvc_request_t *req, uuid_t *txn_id, uuid_copy (rsp.txn_id, *txn_id); ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - (xdrproc_t)xdr_gd1_mgmt_volume_lock_rsp); + (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp); - gf_log (THIS->name, GF_LOG_DEBUG, "Responded to volume lock, ret: %d", + gf_log (THIS->name, GF_LOG_DEBUG, "Responded to mgmt_v3 lock, ret: %d", ret); return ret; } int -glusterd_op_volume_unlock_send_resp (rpcsvc_request_t *req, uuid_t *txn_id, +glusterd_op_mgmt_v3_unlock_send_resp (rpcsvc_request_t *req, uuid_t *txn_id, int32_t status) { - gd1_mgmt_volume_unlock_rsp rsp = {{0},}; + gd1_mgmt_v3_unlock_rsp rsp = {{0},}; int ret = -1; GF_ASSERT (req); @@ -2284,9 +2118,9 @@ glusterd_op_volume_unlock_send_resp (rpcsvc_request_t *req, uuid_t *txn_id, uuid_copy (rsp.txn_id, *txn_id); ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - (xdrproc_t)xdr_gd1_mgmt_volume_unlock_rsp); + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp); - gf_log (THIS->name, GF_LOG_DEBUG, "Responded to volume unlock, ret: %d", + gf_log (THIS->name, GF_LOG_DEBUG, "Responded to mgmt_v3 unlock, ret: %d", ret); return ret; @@ -4069,8 +3903,12 @@ get_brickinfo_from_brickid (char *brickid, glusterd_brickinfo_t **brickinfo) brick++; uuid_parse (volid_str, volid); ret = glusterd_volinfo_find_by_volume_id (volid, &volinfo); - if (ret) - goto out; + if (ret) { + /* Check if it a snapshot volume */ + ret = glusterd_snap_volinfo_find_by_volume_id (volid, &volinfo); + if (ret) + goto out; + } ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo, brickinfo); @@ -4284,9 +4122,10 @@ __glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, } else { list_for_each_entry (volinfo, &conf->volumes, vol_list) { - ret = glusterd_volume_unlock + ret = glusterd_mgmt_v3_unlock (volinfo->volname, - peerinfo->uuid); + peerinfo->uuid, + "vol"); if (ret) gf_log (this->name, GF_LOG_TRACE, @@ -4414,8 +4253,9 @@ rpcsvc_actor_t gd_svc_cli_actors[] = { [GLUSTER_CLI_STATEDUMP_VOLUME] = {"STATEDUMP_VOLUME", GLUSTER_CLI_STATEDUMP_VOLUME, glusterd_handle_cli_statedump_volume, NULL, 0, DRC_NA}, [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", GLUSTER_CLI_LIST_VOLUME, glusterd_handle_cli_list_volume, NULL, 0, DRC_NA}, [GLUSTER_CLI_CLRLOCKS_VOLUME] = {"CLEARLOCKS_VOLUME", GLUSTER_CLI_CLRLOCKS_VOLUME, glusterd_handle_cli_clearlocks_volume, NULL, 0, DRC_NA}, - [GLUSTER_CLI_COPY_FILE] = {"COPY_FILE", GLUSTER_CLI_COPY_FILE, glusterd_handle_copy_file, NULL, 0, DRC_NA}, - [GLUSTER_CLI_SYS_EXEC] = {"SYS_EXEC", GLUSTER_CLI_SYS_EXEC, glusterd_handle_sys_exec, NULL, 0, DRC_NA}, + [GLUSTER_CLI_COPY_FILE] = {"COPY_FILE", GLUSTER_CLI_COPY_FILE, glusterd_handle_copy_file, NULL, 0, DRC_NA}, + [GLUSTER_CLI_SYS_EXEC] = {"SYS_EXEC", GLUSTER_CLI_SYS_EXEC, glusterd_handle_sys_exec, NULL, 0, DRC_NA}, + [GLUSTER_CLI_SNAP] = {"SNAP", GLUSTER_CLI_SNAP, glusterd_handle_snapshot, NULL, 0, DRC_NA}, }; struct rpcsvc_program gd_svc_cli_prog = { @@ -4447,18 +4287,3 @@ struct rpcsvc_program gd_svc_cli_prog_ro = { .actors = gd_svc_cli_actors_ro, .synctask = _gf_true, }; - -rpcsvc_actor_t gd_svc_mgmt_v3_actors[] = { - [GLUSTERD_MGMT_V3_NULL] = { "NULL", GLUSTERD_MGMT_V3_NULL, glusterd_null, NULL, 0, DRC_NA}, - [GLUSTERD_MGMT_V3_VOLUME_LOCK] = { "VOL_LOCK", GLUSTERD_MGMT_V3_VOLUME_LOCK, glusterd_handle_volume_lock, NULL, 0, DRC_NA}, - [GLUSTERD_MGMT_V3_VOLUME_UNLOCK] = { "VOL_UNLOCK", GLUSTERD_MGMT_V3_VOLUME_UNLOCK, glusterd_handle_volume_unlock, NULL, 0, DRC_NA}, -}; - -struct rpcsvc_program gd_svc_mgmt_v3_prog = { - .progname = "GlusterD svc mgmt v3", - .prognum = GD_MGMT_PROGRAM, - .progver = GD_MGMT_V3_VERSION, - .numactors = GLUSTERD_MGMT_V3_MAXVALUE, - .actors = gd_svc_mgmt_v3_actors, - .synctask = _gf_true, -}; diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c index e0508faf62b..5078526e983 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handshake.c +++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c @@ -36,21 +36,139 @@ extern struct rpc_clnt_program gd_mgmt_v3_prog; typedef ssize_t (*gfs_serialize_t) (struct iovec outmsg, void *data); +static int +get_snap_volname_and_volinfo (const char *volpath, char **volname, + glusterd_volinfo_t **volinfo) +{ + int ret = -1; + char *save_ptr = NULL; + char *str_token = NULL; + char *snapname = NULL; + char *volname_token = NULL; + char *vol = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (volpath); + GF_ASSERT (volinfo); + + str_token = gf_strdup (volpath); + if (NULL == str_token) { + goto out; + } + + /* Input volname will have below formats: + * /snaps/<snapname>/<volname>.<hostname> + * or + * /snaps/<snapname>/<parent-volname> + * We need to extract snapname and parent_volname */ + + /*split string by "/" */ + strtok_r (str_token, "/", &save_ptr); + snapname = strtok_r(NULL, "/", &save_ptr); + if (!snapname) { + gf_log(this->name, GF_LOG_ERROR, "Invalid path: %s", volpath); + goto out; + } + + volname_token = strtok_r(NULL, "/", &save_ptr); + if (!volname_token) { + gf_log(this->name, GF_LOG_ERROR, "Invalid path: %s", volpath); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + gf_log(this->name, GF_LOG_ERROR, "Failed to " + "fetch snap %s", snapname); + goto out; + } + + /* Find if its a parent volume name or snap volume + * name. This function will succeed if volname_token + * is a parent volname + */ + ret = glusterd_volinfo_find (volname_token, volinfo); + if (ret) { + *volname = gf_strdup (volname_token); + if (NULL == *volname) { + ret = -1; + goto out; + } + + ret = glusterd_snap_volinfo_find (volname_token, snap, + volinfo); + if (ret) { + /* Split the volume name */ + vol = strtok_r (volname_token, ".", &save_ptr); + if (!vol) { + gf_log(this->name, GF_LOG_ERROR, "Invalid " + "volname (%s)", volname_token); + goto out; + } + + ret = glusterd_snap_volinfo_find (vol, snap, volinfo); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "Failed to " + "fetch snap volume from volname (%s)", + vol); + goto out; + } + } + } else { + /*volname_token is parent volname*/ + ret = glusterd_snap_volinfo_find_from_parent_volname ( + volname_token, snap, volinfo); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "Failed to " + "fetch snap volume from parent " + "volname (%s)", volname_token); + goto out; + } + + /* Since volname_token is a parent volname we should + * get the snap volname here*/ + *volname = gf_strdup ((*volinfo)->volname); + if (NULL == *volname) { + ret = -1; + goto out; + } + } + +out: + if (ret && NULL != *volname) { + GF_FREE (*volname); + *volname = NULL; + } + return ret; +} + static size_t build_volfile_path (const char *volname, char *path, size_t path_len, char *trusted_str) { - struct stat stbuf = {0,}; - int32_t ret = -1; - glusterd_conf_t *priv = NULL; - char *vol = NULL; - char *dup_volname = NULL; - char *free_ptr = NULL; - char *tmp = NULL; - glusterd_volinfo_t *volinfo = NULL; - char *server = NULL; - - priv = THIS->private; + struct stat stbuf = {0,}; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + char *vol = NULL; + char *dup_volname = NULL; + char *free_ptr = NULL; + char *save_ptr = NULL; + char *str_token = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *server = NULL; + const char *volname_ptr = NULL; + char path_prefix [PATH_MAX] = {0,}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (volname); + GF_ASSERT (path); if (strstr (volname, "gluster/")) { server = strchr (volname, '/') + 1; @@ -58,6 +176,22 @@ build_volfile_path (const char *volname, char *path, path, path_len); ret = 1; goto out; + } else if ((str_token = strstr (volname, "/snaps/"))) { + ret = get_snap_volname_and_volinfo (str_token, &dup_volname, + &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get snap" + " volinfo from path (%s)", volname); + ret = -1; + goto out; + } + + snprintf (path_prefix, sizeof (path_prefix), "%s/snaps/%s", + priv->workdir, volinfo->snapshot->snapname); + + free_ptr = dup_volname; + volname_ptr = dup_volname; + goto gotvolinfo; } else if (volname[0] != '/') { /* Normal behavior */ dup_volname = gf_strdup (volname); @@ -68,39 +202,53 @@ build_volfile_path (const char *volname, char *path, dup_volname = gf_strdup (&volname[1]); } + if (!dup_volname) { + gf_log(THIS->name, GF_LOG_ERROR, "strdup failed"); + ret = -1; + goto out; + } free_ptr = dup_volname; + volname_ptr = volname; + + snprintf (path_prefix, sizeof (path_prefix), "%s/vols", + priv->workdir); ret = glusterd_volinfo_find (dup_volname, &volinfo); + if (ret) { /* Split the volume name */ - vol = strtok_r (dup_volname, ".", &tmp); + vol = strtok_r (dup_volname, ".", &save_ptr); if (!vol) goto out; + ret = glusterd_volinfo_find (vol, &volinfo); if (ret) goto out; } +gotvolinfo: if (!glusterd_auth_get_username (volinfo)) trusted_str = NULL; - ret = snprintf (path, path_len, "%s/vols/%s/%s.vol", - priv->workdir, volinfo->volname, volname); + ret = snprintf (path, path_len, "%s/%s/%s.vol", path_prefix, + volinfo->volname, volname_ptr); if (ret == -1) goto out; ret = stat (path, &stbuf); if ((ret == -1) && (errno == ENOENT)) { - snprintf (path, path_len, "%s/vols/%s/%s%s-fuse.vol", - priv->workdir, volinfo->volname, - (trusted_str ? trusted_str : ""), dup_volname); + snprintf (path, path_len, "%s/%s/%s%s-fuse.vol", + path_prefix, volinfo->volname, + (trusted_str ? trusted_str : ""), + dup_volname); + ret = stat (path, &stbuf); } if ((ret == -1) && (errno == ENOENT)) { - snprintf (path, path_len, "%s/vols/%s/%s-tcp.vol", - priv->workdir, volinfo->volname, volname); + snprintf (path, path_len, "%s/%s/%s-tcp.vol", + path_prefix, volinfo->volname, volname_ptr); } ret = 1; diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.c b/xlators/mgmt/glusterd/src/glusterd-locks.c index 9e8bbc21b81..28358aa555e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-locks.c +++ b/xlators/mgmt/glusterd/src/glusterd-locks.c @@ -26,10 +26,40 @@ #include <signal.h> -/* Initialize the global vol-lock list(dict) when +#define GF_MAX_LOCKING_ENTITIES 2 + +/* Valid entities that the mgmt_v3 lock can hold locks upon * + * To add newer entities to be locked, we can just add more * + * entries to this table along with the type and default value */ +glusterd_valid_entities valid_types[] = { + { "vol", _gf_true }, + { "snap", _gf_false }, + { NULL }, +}; + +/* Checks if the lock request is for a valid entity */ +gf_boolean_t +glusterd_mgmt_v3_is_type_valid (char *type) +{ + int32_t i = 0; + gf_boolean_t ret = _gf_false; + + GF_ASSERT (type); + + for (i = 0; valid_types[i].type; i++) { + if (!strcmp (type, valid_types[i].type)) { + ret = _gf_true; + break; + } + } + + return ret; +} + +/* Initialize the global mgmt_v3 lock list(dict) when * glusterd is spawned */ int32_t -glusterd_vol_lock_init () +glusterd_mgmt_v3_lock_init () { int32_t ret = -1; xlator_t *this = NULL; @@ -40,8 +70,8 @@ glusterd_vol_lock_init () priv = this->private; GF_ASSERT (priv); - priv->vol_lock = dict_new (); - if (!priv->vol_lock) + priv->mgmt_v3_lock = dict_new (); + if (!priv->mgmt_v3_lock) goto out; ret = 0; @@ -49,10 +79,10 @@ out: return ret; } -/* Destroy the global vol-lock list(dict) when +/* Destroy the global mgmt_v3 lock list(dict) when * glusterd cleanup is performed */ void -glusterd_vol_lock_fini () +glusterd_mgmt_v3_lock_fini () { xlator_t *this = NULL; glusterd_conf_t *priv = NULL; @@ -62,31 +92,31 @@ glusterd_vol_lock_fini () priv = this->private; GF_ASSERT (priv); - if (priv->vol_lock) - dict_unref (priv->vol_lock); + if (priv->mgmt_v3_lock) + dict_unref (priv->mgmt_v3_lock); } int32_t -glusterd_get_vol_lock_owner (char *volname, uuid_t *uuid) +glusterd_get_mgmt_v3_lock_owner (char *key, uuid_t *uuid) { - int32_t ret = -1; - glusterd_vol_lock_obj *lock_obj = NULL; - glusterd_conf_t *priv = NULL; - uuid_t no_owner = {0,}; - xlator_t *this = NULL; + int32_t ret = -1; + glusterd_mgmt_v3_lock_obj *lock_obj = NULL; + glusterd_conf_t *priv = NULL; + uuid_t no_owner = {0,}; + xlator_t *this = NULL; this = THIS; GF_ASSERT (this); priv = this->private; GF_ASSERT (priv); - if (!volname || !uuid) { - gf_log ("", GF_LOG_ERROR, "volname or uuid is null."); + if (!key || !uuid) { + gf_log (this->name, GF_LOG_ERROR, "key or uuid is null."); ret = -1; goto out; } - ret = dict_get_bin (priv->vol_lock, volname, (void **) &lock_obj); + ret = dict_get_bin (priv->mgmt_v3_lock, key, (void **) &lock_obj); if (!ret) uuid_copy (*uuid, lock_obj->lock_owner); else @@ -94,49 +124,433 @@ glusterd_get_vol_lock_owner (char *volname, uuid_t *uuid) ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* This function is called with the locked_count and type, to * + * release all the acquired locks. */ +static int32_t +glusterd_release_multiple_locks_per_entity (dict_t *dict, uuid_t uuid, + int32_t locked_count, + char *type) +{ + char name_buf[PATH_MAX] = ""; + char *name = NULL; + int32_t i = -1; + int32_t op_ret = 0; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT (dict); + GF_ASSERT (type); + + if (locked_count == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "No %s locked as part of this transaction", + type); + goto out; + } + + /* Release all the locks held */ + for (i = 0; i < locked_count; i++) { + snprintf (name_buf, sizeof(name_buf), + "%sname%d", type, i+1); + + /* Looking for volname1, volname2 or snapname1, * + * as key in the dict snapname2 */ + ret = dict_get_str (dict, name_buf, &name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to get %s locked_count = %d", + name_buf, locked_count); + op_ret = ret; + continue; + } + + ret = glusterd_mgmt_v3_unlock (name, uuid, type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to release lock for %s.", + name); + op_ret = ret; + } + } + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", op_ret); + return op_ret; +} + +/* Given the count and type of the entity this function acquires * + * locks on multiple elements of the same entity. For example: * + * If type is "vol" this function tries to acquire locks on multiple * + * volumes */ +static int32_t +glusterd_acquire_multiple_locks_per_entity (dict_t *dict, uuid_t uuid, + int32_t count, char *type) +{ + char name_buf[PATH_MAX] = ""; + char *name = NULL; + int32_t i = -1; + int32_t ret = -1; + int32_t locked_count = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT (dict); + GF_ASSERT (type); + + /* Locking one element after other */ + for (i = 0; i < count; i++) { + snprintf (name_buf, sizeof(name_buf), + "%sname%d", type, i+1); + + /* Looking for volname1, volname2 or snapname1, * + * as key in the dict snapname2 */ + ret = dict_get_str (dict, name_buf, &name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to get %s count = %d", + name_buf, count); + break; + } + + ret = glusterd_mgmt_v3_lock (name, uuid, type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to acquire lock for %s %s " + "on behalf of %s. Reversing " + "this transaction", type, name, + uuid_utoa(uuid)); + break; + } + locked_count++; + } + + if (count == locked_count) { + /* If all locking ops went successfuly, return as success */ + ret = 0; + goto out; + } + + /* If we failed to lock one element, unlock others and return failure */ + ret = glusterd_release_multiple_locks_per_entity (dict, uuid, + locked_count, + type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to release multiple %s locks", + type); + } + ret = -1; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Given the type of entity, this function figures out if it should unlock a * + * single element of multiple elements of the said entity. For example: * + * if the type is "vol", this function will accordingly unlock a single volume * + * or multiple volumes */ +static int32_t +glusterd_mgmt_v3_unlock_entity (dict_t *dict, uuid_t uuid, char *type, + gf_boolean_t default_value) +{ + char name_buf[PATH_MAX] = ""; + char *name = NULL; + int32_t count = -1; + int32_t ret = -1; + gf_boolean_t hold_locks = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT (dict); + GF_ASSERT (type); + + snprintf (name_buf, sizeof(name_buf), "hold_%s_locks", type); + hold_locks = dict_get_str_boolean (dict, name_buf, default_value); + + if (hold_locks == _gf_false) { + /* Locks were not held for this particular entity * + * Hence nothing to release */ + ret = 0; + goto out; + } + + /* Looking for volcount or snapcount in the dict */ + snprintf (name_buf, sizeof(name_buf), "%scount", type); + ret = dict_get_int32 (dict, name_buf, &count); + if (ret) { + /* count is not present. Only one * + * element name needs to be unlocked */ + snprintf (name_buf, sizeof(name_buf), "%sname", + type); + ret = dict_get_str (dict, name_buf, &name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch %sname", type); + goto out; + } + + ret = glusterd_mgmt_v3_unlock (name, uuid, type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to release lock for %s %s " + "on behalf of %s.", type, name, + uuid_utoa(uuid)); + goto out; + } + } else { + /* Unlocking one element name after another */ + ret = glusterd_release_multiple_locks_per_entity (dict, + uuid, + count, + type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to release all %s locks", type); + goto out; + } + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); return ret; } +/* Given the type of entity, this function figures out if it should lock a * + * single element or multiple elements of the said entity. For example: * + * if the type is "vol", this function will accordingly lock a single volume * + * or multiple volumes */ +static int32_t +glusterd_mgmt_v3_lock_entity (dict_t *dict, uuid_t uuid, char *type, + gf_boolean_t default_value) +{ + char name_buf[PATH_MAX] = ""; + char *name = NULL; + int32_t count = -1; + int32_t ret = -1; + gf_boolean_t hold_locks = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT (dict); + GF_ASSERT (type); + + snprintf (name_buf, sizeof(name_buf), "hold_%s_locks", type); + hold_locks = dict_get_str_boolean (dict, name_buf, default_value); + + if (hold_locks == _gf_false) { + /* Not holding locks for this particular entity */ + ret = 0; + goto out; + } + + /* Looking for volcount or snapcount in the dict */ + snprintf (name_buf, sizeof(name_buf), "%scount", type); + ret = dict_get_int32 (dict, name_buf, &count); + if (ret) { + /* count is not present. Only one * + * element name needs to be locked */ + snprintf (name_buf, sizeof(name_buf), "%sname", + type); + ret = dict_get_str (dict, name_buf, &name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch %sname", type); + goto out; + } + + ret = glusterd_mgmt_v3_lock (name, uuid, type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to acquire lock for %s %s " + "on behalf of %s.", type, name, + uuid_utoa(uuid)); + goto out; + } + } else { + /* Locking one element name after another */ + ret = glusterd_acquire_multiple_locks_per_entity (dict, + uuid, + count, + type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to acquire all %s locks", type); + goto out; + } + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Try to release locks of multiple entities like * + * volume, snaps etc. */ +int32_t +glusterd_multiple_mgmt_v3_unlock (dict_t *dict, uuid_t uuid) +{ + int32_t i = -1; + int32_t ret = -1; + int32_t op_ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, "dict is null."); + ret = -1; + goto out; + } + + for (i = 0; valid_types[i].type; i++) { + ret = glusterd_mgmt_v3_unlock_entity + (dict, uuid, + valid_types[i].type, + valid_types[i].default_value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to unlock all %s", + valid_types[i].type); + op_ret = ret; + } + } + + ret = op_ret; +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +/* Try to acquire locks on multiple entities like * + * volume, snaps etc. */ int32_t -glusterd_volume_lock (char *volname, uuid_t uuid) +glusterd_multiple_mgmt_v3_lock (dict_t *dict, uuid_t uuid) { - int32_t ret = -1; - glusterd_vol_lock_obj *lock_obj = NULL; - glusterd_conf_t *priv = NULL; - uuid_t owner = {0}; - xlator_t *this = NULL; + int32_t i = -1; + int32_t ret = -1; + int32_t locked_count = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, "dict is null."); + ret = -1; + goto out; + } + + /* Locking one entity after other */ + for (i = 0; valid_types[i].type; i++) { + ret = glusterd_mgmt_v3_lock_entity + (dict, uuid, + valid_types[i].type, + valid_types[i].default_value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to lock all %s", + valid_types[i].type); + break; + } + locked_count++; + } + + if (locked_count == GF_MAX_LOCKING_ENTITIES) { + /* If all locking ops went successfuly, return as success */ + ret = 0; + goto out; + } + + /* If we failed to lock one entity, unlock others and return failure */ + for (i = 0; i < locked_count; i++) { + ret = glusterd_mgmt_v3_unlock_entity + (dict, uuid, + valid_types[i].type, + valid_types[i].default_value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to unlock all %s", + valid_types[i].type); + } + } + ret = -1; +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_mgmt_v3_lock (const char *name, uuid_t uuid, char *type) +{ + char key[PATH_MAX] = ""; + int32_t ret = -1; + glusterd_mgmt_v3_lock_obj *lock_obj = NULL; + glusterd_conf_t *priv = NULL; + gf_boolean_t is_valid = _gf_true; + uuid_t owner = {0}; + xlator_t *this = NULL; this = THIS; GF_ASSERT (this); priv = this->private; GF_ASSERT (priv); - if (!volname) { - gf_log ("", GF_LOG_ERROR, "volname is null."); + if (!name || !type) { + gf_log (this->name, GF_LOG_ERROR, "name or type is null."); ret = -1; goto out; } - ret = glusterd_get_vol_lock_owner (volname, &owner); + is_valid = glusterd_mgmt_v3_is_type_valid (type); + if (is_valid != _gf_true) { + gf_log (this->name, GF_LOG_ERROR, + "Invalid entity. Cannot perform locking " + "operation on %s types", type); + ret = -1; + goto out; + } + + ret = snprintf (key, sizeof(key), "%s_%s", name, type); + if (ret != strlen(name) + 1 + strlen(type)) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Unable to create key"); + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, + "Trying to acquire lock of %s %s for %s as %s", + type, name, uuid_utoa (uuid), key); + + ret = glusterd_get_mgmt_v3_lock_owner (key, &owner); if (ret) { - gf_log ("", GF_LOG_WARNING, - "Unable to get volume lock owner"); + gf_log (this->name, GF_LOG_DEBUG, + "Unable to get mgmt_v3 lock owner"); goto out; } /* If the lock has already been held for the given volume * we fail */ if (!uuid_is_null (owner)) { - gf_log ("", GF_LOG_WARNING, "Unable to acquire lock. " - "Lock for %s held by %s", volname, - uuid_utoa (owner)); + gf_log (this->name, GF_LOG_WARNING, "Lock for %s held by %s", + name, uuid_utoa (owner)); ret = -1; goto out; } - lock_obj = GF_CALLOC (1, sizeof(glusterd_vol_lock_obj), - gf_common_mt_vol_lock_obj_t); + lock_obj = GF_CALLOC (1, sizeof(glusterd_mgmt_v3_lock_obj), + gf_common_mt_mgmt_v3_lock_obj_t); if (!lock_obj) { ret = -1; goto out; @@ -144,70 +558,99 @@ glusterd_volume_lock (char *volname, uuid_t uuid) uuid_copy (lock_obj->lock_owner, uuid); - ret = dict_set_bin (priv->vol_lock, volname, lock_obj, - sizeof(glusterd_vol_lock_obj)); + ret = dict_set_bin (priv->mgmt_v3_lock, key, lock_obj, + sizeof(glusterd_mgmt_v3_lock_obj)); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to set lock owner " - "in volume lock"); + gf_log (this->name, GF_LOG_ERROR, + "Unable to set lock owner in mgmt_v3 lock"); if (lock_obj) GF_FREE (lock_obj); goto out; } - gf_log ("", GF_LOG_DEBUG, "Lock for %s successfully held by %s", - volname, uuid_utoa (uuid)); + gf_log (this->name, GF_LOG_DEBUG, + "Lock for %s %s successfully held by %s", + type, name, uuid_utoa (uuid)); ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); return ret; } int32_t -glusterd_volume_unlock (char *volname, uuid_t uuid) +glusterd_mgmt_v3_unlock (const char *name, uuid_t uuid, char *type) { - int32_t ret = -1; - glusterd_conf_t *priv = NULL; - uuid_t owner = {0}; - xlator_t *this = NULL; + char key[PATH_MAX] = ""; + int32_t ret = -1; + gf_boolean_t is_valid = _gf_true; + glusterd_conf_t *priv = NULL; + uuid_t owner = {0}; + xlator_t *this = NULL; this = THIS; GF_ASSERT (this); priv = this->private; GF_ASSERT (priv); - if (!volname) { - gf_log ("", GF_LOG_ERROR, "volname is null."); + if (!name || !type) { + gf_log (this->name, GF_LOG_ERROR, "name is null."); + ret = -1; + goto out; + } + + is_valid = glusterd_mgmt_v3_is_type_valid (type); + if (is_valid != _gf_true) { + gf_log (this->name, GF_LOG_ERROR, + "Invalid entity. Cannot perform unlocking " + "operation on %s types", type); ret = -1; goto out; } - ret = glusterd_get_vol_lock_owner (volname, &owner); - if (ret) + ret = snprintf (key, sizeof(key), "%s_%s", + name, type); + if (ret != strlen(name) + 1 + strlen(type)) { + gf_log (this->name, GF_LOG_ERROR, "Unable to create key"); + ret = -1; goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, + "Trying to release lock of %s %s for %s as %s", + type, name, uuid_utoa (uuid), key); + + ret = glusterd_get_mgmt_v3_lock_owner (key, &owner); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Unable to get mgmt_v3 lock owner"); + goto out; + } if (uuid_is_null (owner)) { - gf_log ("", GF_LOG_WARNING, "Lock for %s not held", volname); + gf_log (this->name, GF_LOG_WARNING, + "Lock for %s %s not held", type, name); ret = -1; goto out; } ret = uuid_compare (uuid, owner); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, "Lock owner mismatch. " - "Lock for %s held by %s", - volname, uuid_utoa (owner)); + gf_log (this->name, GF_LOG_WARNING, "Lock owner mismatch. " + "Lock for %s %s held by %s", + type, name, uuid_utoa (owner)); goto out; } - /* Removing the volume lock from the global list */ - dict_del (priv->vol_lock, volname); + /* Removing the mgmt_v3 lock from the global list */ + dict_del (priv->mgmt_v3_lock, key); - gf_log ("", GF_LOG_DEBUG, "Lock for %s successfully released", - volname); + gf_log (this->name, GF_LOG_DEBUG, + "Lock for %s %s successfully released", + type, name); ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.h b/xlators/mgmt/glusterd/src/glusterd-locks.h index 6e3f56f9cf9..b9cc8c0d1e4 100644 --- a/xlators/mgmt/glusterd/src/glusterd-locks.h +++ b/xlators/mgmt/glusterd/src/glusterd-locks.h @@ -15,23 +15,37 @@ #include "config.h" #endif -typedef struct glusterd_volume_lock_object_ { +typedef struct glusterd_mgmt_v3_lock_object_ { uuid_t lock_owner; -} glusterd_vol_lock_obj; +} glusterd_mgmt_v3_lock_obj; + +typedef struct glusterd_mgmt_v3_lock_valid_entities { + char *type; /* Entity type like vol, snap */ + gf_boolean_t default_value; /* The default value that * + * determines if the locks * + * should be held for that * + * entity */ +} glusterd_valid_entities; int32_t -glusterd_vol_lock_init (); +glusterd_mgmt_v3_lock_init (); void -glusterd_vol_lock_fini (); +glusterd_mgmt_v3_lock_fini (); + +int32_t +glusterd_get_mgmt_v3_lock_owner (char *volname, uuid_t *uuid); + +int32_t +glusterd_mgmt_v3_lock (const char *key, uuid_t uuid, char *type); int32_t -glusterd_get_vol_lock_owner (char *volname, uuid_t *uuid); +glusterd_mgmt_v3_unlock (const char *key, uuid_t uuid, char *type); int32_t -glusterd_volume_lock (char *volname, uuid_t uuid); +glusterd_multiple_mgmt_v3_lock (dict_t *dict, uuid_t uuid); int32_t -glusterd_volume_unlock (char *volname, uuid_t uuid); +glusterd_multiple_mgmt_v3_unlock (dict_t *dict, uuid_t uuid); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-mem-types.h b/xlators/mgmt/glusterd/src/glusterd-mem-types.h index 51057c27472..e6f6a0333a1 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mem-types.h +++ b/xlators/mgmt/glusterd/src/glusterd-mem-types.h @@ -67,7 +67,9 @@ typedef enum gf_gld_mem_types_ { gf_gld_mt_hooks_priv_t = gf_common_mt_end + 51, gf_gld_mt_mop_commit_req_t = gf_common_mt_end + 52, gf_gld_mt_int = gf_common_mt_end + 53, - gf_gld_mt_end = gf_common_mt_end + 54, + gf_gld_mt_snap_t = gf_common_mt_end + 54, + gf_gld_mt_missed_snapinfo_t = gf_common_mt_end + 55, + gf_gld_mt_end = gf_common_mt_end + 56, } gf_gld_mem_types_t; #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c new file mode 100644 index 00000000000..81c5aa57958 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c @@ -0,0 +1,936 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +/* rpc related syncops */ +#include "rpc-clnt.h" +#include "protocol-common.h" +#include "xdr-generic.h" +#include "glusterd1-xdr.h" +#include "glusterd-syncop.h" + +#include "glusterd.h" +#include "glusterd-utils.h" +#include "glusterd-locks.h" +#include "glusterd-mgmt.h" +#include "glusterd-op-sm.h" + +static int +glusterd_mgmt_v3_null (rpcsvc_request_t *req) +{ + return 0; +} + +static int +glusterd_mgmt_v3_lock_send_resp (rpcsvc_request_t *req, int32_t status) +{ + + gd1_mgmt_v3_lock_rsp rsp = {{0},}; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + rsp.op_ret = status; + if (rsp.op_ret) + rsp.op_errno = errno; + + glusterd_get_uuid (&rsp.uuid); + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp); + + gf_log (this->name, GF_LOG_DEBUG, + "Responded to mgmt_v3 lock, ret: %d", ret); + + return ret; +} + +static int +glusterd_synctasked_mgmt_v3_lock (rpcsvc_request_t *req, + gd1_mgmt_v3_lock_req *lock_req, + glusterd_op_lock_ctx_t *ctx) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (ctx); + GF_ASSERT (ctx->dict); + + /* Trying to acquire multiple mgmt_v3 locks */ + ret = glusterd_multiple_mgmt_v3_lock (ctx->dict, ctx->uuid); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to acquire mgmt_v3 locks for %s", + uuid_utoa (ctx->uuid)); + + ret = glusterd_mgmt_v3_lock_send_resp (req, ret); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_op_state_machine_mgmt_v3_lock (rpcsvc_request_t *req, + gd1_mgmt_v3_lock_req *lock_req, + glusterd_op_lock_ctx_t *ctx) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_op_info_t txn_op_info = {{0},}; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + glusterd_txn_opinfo_init (&txn_op_info, NULL, &lock_req->op, + ctx->dict, req); + + ret = glusterd_set_txn_opinfo (&lock_req->txn_id, &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set transaction's opinfo"); + goto out; + } + + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_LOCK, + &lock_req->txn_id, ctx); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to inject event GD_OP_EVENT_LOCK"); + +out: + glusterd_friend_sm (); + glusterd_op_sm (); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_handle_mgmt_v3_lock_fn (rpcsvc_request_t *req) +{ + gd1_mgmt_v3_lock_req lock_req = {{0},}; + int32_t ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_op_lock_ctx_t *ctx = NULL; + xlator_t *this = NULL; + gf_boolean_t is_synctasked = _gf_false; + gf_boolean_t free_ctx = _gf_false; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &lock_req, + (xdrproc_t)xdr_gd1_mgmt_v3_lock_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode lock " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "Received mgmt_v3 lock req " + "from uuid: %s", uuid_utoa (lock_req.uuid)); + + if (glusterd_friend_find_by_uuid (lock_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (lock_req.uuid)); + ret = -1; + goto out; + } + + ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t); + if (!ctx) { + ret = -1; + goto out; + } + + uuid_copy (ctx->uuid, lock_req.uuid); + ctx->req = req; + + ctx->dict = dict_new (); + if (!ctx->dict) { + ret = -1; + goto out; + } + + ret = dict_unserialize (lock_req.dict.dict_val, + lock_req.dict.dict_len, &ctx->dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + is_synctasked = dict_get_str_boolean (ctx->dict, + "is_synctasked", _gf_false); + if (is_synctasked) { + ret = glusterd_synctasked_mgmt_v3_lock (req, &lock_req, ctx); + /* The above function does not take ownership of ctx. + * Therefore we need to free the ctx explicitly. */ + free_ctx = _gf_true; + } + else { + ret = glusterd_op_state_machine_mgmt_v3_lock (req, &lock_req, + ctx); + } + +out: + + if (ret || free_ctx) { + if (ctx->dict) + dict_unref (ctx->dict); + if (ctx) + GF_FREE (ctx); + } + + free (lock_req.dict.dict_val); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_mgmt_v3_pre_validate_send_resp (rpcsvc_request_t *req, + int32_t op, int32_t status, + char *op_errstr, dict_t *rsp_dict) +{ + gd1_mgmt_v3_pre_val_rsp rsp = {{0},}; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + rsp.op_ret = status; + glusterd_get_uuid (&rsp.uuid); + rsp.op = op; + if (op_errstr) + rsp.op_errstr = op_errstr; + else + rsp.op_errstr = ""; + + ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get serialized length of dict"); + goto out; + } + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_pre_val_rsp); + + GF_FREE (rsp.dict.dict_val); +out: + gf_log (this->name, GF_LOG_DEBUG, + "Responded to pre validation, ret: %d", ret); + return ret; +} + +static int +glusterd_handle_pre_validate_fn (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gd1_mgmt_v3_pre_val_req op_req = {{0},}; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + char *op_errstr = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &op_req, + (xdrproc_t)xdr_gd1_mgmt_v3_pre_val_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to decode pre validation " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (glusterd_friend_find_by_uuid (op_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (op_req.uuid)); + ret = -1; + goto out; + } + + dict = dict_new (); + if (!dict) + goto out; + + ret = dict_unserialize (op_req.dict.dict_val, + op_req.dict.dict_len, &dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get new dictionary"); + return -1; + } + + ret = gd_mgmt_v3_pre_validate_fn (op_req.op, dict, &op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Pre Validation failed on operation %s", + gd_op_list[op_req.op]); + } + + ret = glusterd_mgmt_v3_pre_validate_send_resp (req, op_req.op, + ret, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to send Pre Validation " + "response for operation %s", + gd_op_list[op_req.op]); + goto out; + } + +out: + if (op_errstr && (strcmp (op_errstr, ""))) + GF_FREE (op_errstr); + + free (op_req.dict.dict_val); + + if (dict) + dict_unref (dict); + + if (rsp_dict) + dict_unref (rsp_dict); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_mgmt_v3_brick_op_send_resp (rpcsvc_request_t *req, + int32_t op, int32_t status, + char *op_errstr, dict_t *rsp_dict) +{ + gd1_mgmt_v3_brick_op_rsp rsp = {{0},}; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + rsp.op_ret = status; + glusterd_get_uuid (&rsp.uuid); + rsp.op = op; + if (op_errstr) + rsp.op_errstr = op_errstr; + else + rsp.op_errstr = ""; + + ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get serialized length of dict"); + goto out; + } + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_brick_op_rsp); + + GF_FREE (rsp.dict.dict_val); +out: + gf_log (this->name, GF_LOG_DEBUG, + "Responded to brick op, ret: %d", ret); + return ret; +} + +static int +glusterd_handle_brick_op_fn (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gd1_mgmt_v3_brick_op_req op_req = {{0},}; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + char *op_errstr = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &op_req, + (xdrproc_t)xdr_gd1_mgmt_v3_brick_op_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode brick op " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (glusterd_friend_find_by_uuid (op_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (op_req.uuid)); + ret = -1; + goto out; + } + + dict = dict_new (); + if (!dict) + goto out; + + ret = dict_unserialize (op_req.dict.dict_val, + op_req.dict.dict_len, &dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get new dictionary"); + return -1; + } + + ret = gd_mgmt_v3_brick_op_fn (op_req.op, dict, &op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Brick Op failed on operation %s", + gd_op_list[op_req.op]); + } + + ret = glusterd_mgmt_v3_brick_op_send_resp (req, op_req.op, + ret, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to send brick op " + "response for operation %s", + gd_op_list[op_req.op]); + goto out; + } + +out: + if (op_errstr && (strcmp (op_errstr, ""))) + GF_FREE (op_errstr); + + free (op_req.dict.dict_val); + + if (dict) + dict_unref (dict); + + if (rsp_dict) + dict_unref (rsp_dict); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_mgmt_v3_commit_send_resp (rpcsvc_request_t *req, + int32_t op, int32_t status, + char *op_errstr, dict_t *rsp_dict) +{ + gd1_mgmt_v3_commit_rsp rsp = {{0},}; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + rsp.op_ret = status; + glusterd_get_uuid (&rsp.uuid); + rsp.op = op; + if (op_errstr) + rsp.op_errstr = op_errstr; + else + rsp.op_errstr = ""; + + ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get serialized length of dict"); + goto out; + } + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_commit_rsp); + + GF_FREE (rsp.dict.dict_val); +out: + gf_log (this->name, GF_LOG_DEBUG, "Responded to commit, ret: %d", ret); + return ret; +} + +static int +glusterd_handle_commit_fn (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gd1_mgmt_v3_commit_req op_req = {{0},}; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + char *op_errstr = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &op_req, + (xdrproc_t)xdr_gd1_mgmt_v3_commit_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode commit " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (glusterd_friend_find_by_uuid (op_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (op_req.uuid)); + ret = -1; + goto out; + } + + dict = dict_new (); + if (!dict) + goto out; + + ret = dict_unserialize (op_req.dict.dict_val, + op_req.dict.dict_len, &dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get new dictionary"); + return -1; + } + + ret = gd_mgmt_v3_commit_fn (op_req.op, dict, &op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "commit failed on operation %s", + gd_op_list[op_req.op]); + } + + ret = glusterd_mgmt_v3_commit_send_resp (req, op_req.op, + ret, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to send commit " + "response for operation %s", + gd_op_list[op_req.op]); + goto out; + } + +out: + if (op_errstr && (strcmp (op_errstr, ""))) + GF_FREE (op_errstr); + + free (op_req.dict.dict_val); + + if (dict) + dict_unref (dict); + + if (rsp_dict) + dict_unref (rsp_dict); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_mgmt_v3_post_validate_send_resp (rpcsvc_request_t *req, + int32_t op, int32_t status, + char *op_errstr, dict_t *rsp_dict) +{ + gd1_mgmt_v3_post_val_rsp rsp = {{0},}; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + rsp.op_ret = status; + glusterd_get_uuid (&rsp.uuid); + rsp.op = op; + if (op_errstr) + rsp.op_errstr = op_errstr; + else + rsp.op_errstr = ""; + + ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get serialized length of dict"); + goto out; + } + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_post_val_rsp); + + GF_FREE (rsp.dict.dict_val); +out: + gf_log (this->name, GF_LOG_DEBUG, + "Responded to post validation, ret: %d", ret); + return ret; +} + +static int +glusterd_handle_post_validate_fn (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gd1_mgmt_v3_post_val_req op_req = {{0},}; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + char *op_errstr = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &op_req, + (xdrproc_t)xdr_gd1_mgmt_v3_post_val_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to decode post validation " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (glusterd_friend_find_by_uuid (op_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (op_req.uuid)); + ret = -1; + goto out; + } + + dict = dict_new (); + if (!dict) + goto out; + + ret = dict_unserialize (op_req.dict.dict_val, + op_req.dict.dict_len, &dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get new dictionary"); + return -1; + } + + ret = gd_mgmt_v3_post_validate_fn (op_req.op, op_req.op_ret, dict, + &op_errstr, rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Post Validation failed on operation %s", + gd_op_list[op_req.op]); + } + + ret = glusterd_mgmt_v3_post_validate_send_resp (req, op_req.op, + ret, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to send Post Validation " + "response for operation %s", + gd_op_list[op_req.op]); + goto out; + } + +out: + if (op_errstr && (strcmp (op_errstr, ""))) + GF_FREE (op_errstr); + + free (op_req.dict.dict_val); + + if (dict) + dict_unref (dict); + + if (rsp_dict) + dict_unref (rsp_dict); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_mgmt_v3_unlock_send_resp (rpcsvc_request_t *req, int32_t status) +{ + + gd1_mgmt_v3_unlock_rsp rsp = {{0},}; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + rsp.op_ret = status; + if (rsp.op_ret) + rsp.op_errno = errno; + + glusterd_get_uuid (&rsp.uuid); + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp); + + gf_log (this->name, GF_LOG_DEBUG, + "Responded to mgmt_v3 unlock, ret: %d", ret); + + return ret; +} + +static int +glusterd_syctasked_mgmt_v3_unlock (rpcsvc_request_t *req, + gd1_mgmt_v3_unlock_req *unlock_req, + glusterd_op_lock_ctx_t *ctx) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (ctx); + + /* Trying to release multiple mgmt_v3 locks */ + ret = glusterd_multiple_mgmt_v3_unlock (ctx->dict, ctx->uuid); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to release mgmt_v3 locks for %s", + uuid_utoa(ctx->uuid)); + } + + ret = glusterd_mgmt_v3_unlock_send_resp (req, ret); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + + +static int +glusterd_op_state_machine_mgmt_v3_unlock (rpcsvc_request_t *req, + gd1_mgmt_v3_unlock_req *lock_req, + glusterd_op_lock_ctx_t *ctx) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_UNLOCK, + &lock_req->txn_id, ctx); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to inject event GD_OP_EVENT_UNLOCK"); + + glusterd_friend_sm (); + glusterd_op_sm (); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_handle_mgmt_v3_unlock_fn (rpcsvc_request_t *req) +{ + gd1_mgmt_v3_unlock_req lock_req = {{0},}; + int32_t ret = -1; + glusterd_op_lock_ctx_t *ctx = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + gf_boolean_t is_synctasked = _gf_false; + gf_boolean_t free_ctx = _gf_false; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &lock_req, + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode unlock " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "Received volume unlock req " + "from uuid: %s", uuid_utoa (lock_req.uuid)); + + if (glusterd_friend_find_by_uuid (lock_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (lock_req.uuid)); + ret = -1; + goto out; + } + + ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t); + if (!ctx) { + ret = -1; + goto out; + } + + uuid_copy (ctx->uuid, lock_req.uuid); + ctx->req = req; + + ctx->dict = dict_new (); + if (!ctx->dict) { + ret = -1; + goto out; + } + + ret = dict_unserialize (lock_req.dict.dict_val, + lock_req.dict.dict_len, &ctx->dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + is_synctasked = dict_get_str_boolean (ctx->dict, + "is_synctasked", _gf_false); + if (is_synctasked) { + ret = glusterd_syctasked_mgmt_v3_unlock (req, &lock_req, ctx); + /* The above function does not take ownership of ctx. + * Therefore we need to free the ctx explicitly. */ + free_ctx = _gf_true; + } + else { + ret = glusterd_op_state_machine_mgmt_v3_unlock (req, &lock_req, + ctx); + } + +out: + + if (ret || free_ctx) { + if (ctx->dict) + dict_unref (ctx->dict); + if (ctx) + GF_FREE (ctx); + } + + free (lock_req.dict.dict_val); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_handle_mgmt_v3_lock (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_mgmt_v3_lock_fn); +} + +static int +glusterd_handle_pre_validate (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_pre_validate_fn); +} + +static int +glusterd_handle_brick_op (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_brick_op_fn); +} + +static int +glusterd_handle_commit (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_commit_fn); +} + +static int +glusterd_handle_post_validate (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_post_validate_fn); +} + +int +glusterd_handle_mgmt_v3_unlock (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_mgmt_v3_unlock_fn); +} + +rpcsvc_actor_t gd_svc_mgmt_v3_actors[] = { + [GLUSTERD_MGMT_V3_NULL] = { "NULL", GLUSTERD_MGMT_V3_NULL, glusterd_mgmt_v3_null, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_LOCK] = { "MGMT_V3_LOCK", GLUSTERD_MGMT_V3_LOCK, glusterd_handle_mgmt_v3_lock, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_PRE_VALIDATE] = { "PRE_VAL", GLUSTERD_MGMT_V3_PRE_VALIDATE, glusterd_handle_pre_validate, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_BRICK_OP] = { "BRCK_OP", GLUSTERD_MGMT_V3_BRICK_OP, glusterd_handle_brick_op, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_COMMIT] = { "COMMIT", GLUSTERD_MGMT_V3_COMMIT, glusterd_handle_commit, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_POST_VALIDATE] = { "POST_VAL", GLUSTERD_MGMT_V3_POST_VALIDATE, glusterd_handle_post_validate, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_UNLOCK] = { "MGMT_V3_UNLOCK", GLUSTERD_MGMT_V3_UNLOCK, glusterd_handle_mgmt_v3_unlock, NULL, 0, DRC_NA}, +}; + +struct rpcsvc_program gd_svc_mgmt_v3_prog = { + .progname = "GlusterD svc mgmt v3", + .prognum = GD_MGMT_PROGRAM, + .progver = GD_MGMT_V3_VERSION, + .numactors = GLUSTERD_MGMT_V3_MAXVALUE, + .actors = gd_svc_mgmt_v3_actors, + .synctask = _gf_true, +}; diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c new file mode 100644 index 00000000000..5295f889eaa --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c @@ -0,0 +1,1899 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +/* rpc related syncops */ +#include "rpc-clnt.h" +#include "protocol-common.h" +#include "xdr-generic.h" +#include "glusterd1-xdr.h" +#include "glusterd-syncop.h" + +#include "glusterd.h" +#include "glusterd-utils.h" +#include "glusterd-locks.h" +#include "glusterd-mgmt.h" +#include "glusterd-op-sm.h" + +extern struct rpc_clnt_program gd_mgmt_v3_prog; + + +static void +gd_mgmt_v3_collate_errors (struct syncargs *args, int op_ret, int op_errno, + char *op_errstr, int op_code, + glusterd_peerinfo_t *peerinfo, u_char *uuid) +{ + char *peer_str = NULL; + char err_str[PATH_MAX] = "Please check log file for details."; + char op_err[PATH_MAX] = ""; + int32_t len = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (args); + GF_ASSERT (uuid); + + if (op_ret) { + args->op_ret = op_ret; + args->op_errno = op_errno; + + if (peerinfo) + peer_str = peerinfo->hostname; + else + peer_str = uuid_utoa (uuid); + + if (op_errstr && strcmp (op_errstr, "")) { + len = snprintf (err_str, sizeof(err_str) - 1, + "Error: %s", op_errstr); + err_str[len] = '\0'; + } + + switch (op_code) { + case GLUSTERD_MGMT_V3_LOCK: + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Locking failed " + "on %s. %s", peer_str, err_str); + break; + } + case GLUSTERD_MGMT_V3_PRE_VALIDATE: + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Pre Validation failed " + "on %s. %s", peer_str, err_str); + break; + } + case GLUSTERD_MGMT_V3_BRICK_OP: + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Brick ops failed " + "on %s. %s", peer_str, err_str); + break; + } + case GLUSTERD_MGMT_V3_COMMIT: + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Commit failed on %s. %s", + peer_str, err_str); + break; + } + case GLUSTERD_MGMT_V3_POST_VALIDATE: + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Post Validation failed " + "on %s. %s", peer_str, err_str); + break; + } + case GLUSTERD_MGMT_V3_UNLOCK: + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Unlocking failed " + "on %s. %s", peer_str, err_str); + break; + } + } + op_err[len] = '\0'; + + if (args->errstr) { + len = snprintf (err_str, sizeof(err_str) - 1, + "%s\n%s", args->errstr, + op_err); + GF_FREE (args->errstr); + args->errstr = NULL; + } else + len = snprintf (err_str, sizeof(err_str) - 1, + "%s", op_err); + err_str[len] = '\0'; + + gf_log (this->name, GF_LOG_ERROR, "%s", op_err); + args->errstr = gf_strdup (err_str); + } + + return; +} + +int32_t +gd_mgmt_v3_pre_validate_fn (glusterd_op_t op, dict_t *dict, + char **op_errstr, dict_t *rsp_dict) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + + switch (op) { + case GD_OP_SNAP: + ret = glusterd_snapshot_prevalidate (dict, op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Snapshot Prevalidate Failed"); + goto out; + } + + break; + + default: + break; + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_DEBUG, "OP = %d. Returning %d", op, ret); + return ret; +} + +int32_t +gd_mgmt_v3_brick_op_fn (glusterd_op_t op, dict_t *dict, + char **op_errstr, dict_t *rsp_dict) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + + switch (op) { + case GD_OP_SNAP: + { + ret = glusterd_snapshot_brickop (dict, op_errstr, rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "snapshot brickop " + "failed"); + goto out; + } + break; + } + default: + break; + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "OP = %d. Returning %d", op, ret); + return ret; +} + +int32_t +gd_mgmt_v3_commit_fn (glusterd_op_t op, dict_t *dict, + char **op_errstr, dict_t *rsp_dict) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + + switch (op) { + case GD_OP_SNAP: + { + ret = glusterd_snapshot (dict, op_errstr, rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Snapshot Commit Failed"); + goto out; + } + break; + } + default: + break; + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_DEBUG, "OP = %d. Returning %d", op, ret); + return ret; +} + +int32_t +gd_mgmt_v3_post_validate_fn (glusterd_op_t op, int32_t op_ret, dict_t *dict, + char **op_errstr, dict_t *rsp_dict) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + + switch (op) { + case GD_OP_SNAP: + { + ret = glusterd_snapshot_postvalidate (dict, op_ret, + op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "postvalidate operation failed"); + goto out; + } + break; + } + default: + break; + } + + ret = 0; + +out: + gf_log (this->name, GF_LOG_TRACE, "OP = %d. Returning %d", op, ret); + return ret; +} + +int32_t +gd_mgmt_v3_lock_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_v3_lock_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (myframe); + + /* Even though the lock command has failed, while collating the errors + (gd_mgmt_v3_collate_errors), args->op_ret and args->op_errno will be + used. @args is obtained from frame->local. So before checking the + status of the request and going out if its a failure, args should be + set to frame->local. Otherwise, while collating args will be NULL. + This applies to other phases such as prevalidate, brickop, commit and + postvalidate also. + */ + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + if (!iov) { + gf_log (this->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp); + if (ret < 0) + goto out; + + uuid_copy (args->uuid, rsp.uuid); + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + +out: + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_LOCK, + peerinfo, rsp.uuid); + if (rsp.dict.dict_val) + free (rsp.dict.dict_val); + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_lock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + gd_mgmt_v3_lock_cbk_fn); +} + +int +gd_mgmt_v3_lock (glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid) +{ + gd1_mgmt_v3_lock_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (op_ctx); + GF_ASSERT (peerinfo); + GF_ASSERT (args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + req.op = op; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_LOCK, + gd_mgmt_v3_lock_cbk, + (xdrproc_t) xdr_gd1_mgmt_v3_lock_req); + synclock_lock (&conf->big_lock); +out: + GF_FREE (req.dict.dict_val); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_initiate_lockdown (glusterd_conf_t *conf, glusterd_op_t op, + dict_t *dict, char **op_errstr, int npeers, + gf_boolean_t *is_acquired) +{ + char *volname = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + int32_t ret = -1; + int32_t peer_cnt = 0; + struct syncargs args = {0}; + struct list_head *peers = NULL; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (conf); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (is_acquired); + + peers = &conf->xaction_peers; + + /* Trying to acquire multiple mgmt_v3 locks on local node */ + ret = glusterd_multiple_mgmt_v3_lock (dict, MY_UUID); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to acquire mgmt_v3 locks on localhost"); + goto out; + } + + *is_acquired = _gf_true; + + if (!npeers) { + ret = 0; + goto out; + } + + /* Sending mgmt_v3 lock req to other nodes in the cluster */ + gd_syncargs_init (&args, NULL); + synctask_barrier_init((&args)); + peer_cnt = 0; + list_for_each_entry (peerinfo, peers, op_peers_list) { + gd_mgmt_v3_lock (op, dict, peerinfo, &args, + MY_UUID, peer_uuid); + peer_cnt++; + } + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent lock op req for %s " + "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret); +out: + if (ret) { + if (*op_errstr) + gf_log (this->name, GF_LOG_ERROR, "%s", + *op_errstr); + + if (volname) + ret = gf_asprintf (op_errstr, + "Another transaction is in progress " + "for %s. Please try again after " + "sometime.", volname); + else + ret = gf_asprintf (op_errstr, + "Another transaction is in progress " + "Please try again after sometime."); + + if (ret == -1) + *op_errstr = NULL; + + ret = -1; + } + + return ret; +} + +int +glusterd_pre_validate_aggr_rsp_dict (glusterd_op_t op, + dict_t *aggr, dict_t *rsp) +{ + int32_t ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (aggr); + GF_ASSERT (rsp); + + switch (op) { + case GD_OP_SNAP: + ret = glusterd_snap_pre_validate_use_rsp_dict (aggr, rsp); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to aggregate prevalidate " + "response dictionaries."); + goto out; + } + break; + default: + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Invalid op (%s)", + gd_op_list[op]); + + break; + } +out: + return ret; +} + +int32_t +gd_mgmt_v3_pre_validate_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_v3_pre_val_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + dict_t *rsp_dict = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + if (!iov) { + gf_log (this->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_pre_val_rsp); + if (ret < 0) + goto out; + + if (rsp.dict.dict_len) { + /* Unserialize the dictionary */ + rsp_dict = dict_new (); + + ret = dict_unserialize (rsp.dict.dict_val, + rsp.dict.dict_len, + &rsp_dict); + if (ret < 0) { + free (rsp.dict.dict_val); + goto out; + } else { + rsp_dict->extra_stdfree = rsp.dict.dict_val; + } + } + + uuid_copy (args->uuid, rsp.uuid); + pthread_mutex_lock (&args->lock_dict); + { + ret = glusterd_pre_validate_aggr_rsp_dict (rsp.op, args->dict, + rsp_dict); + } + pthread_mutex_unlock (&args->lock_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "%s", + "Failed to aggregate response from " + " node/brick"); + if (!rsp.op_ret) + op_ret = ret; + else { + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + } + } else { + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + } + +out: + if (rsp_dict) + dict_unref (rsp_dict); + + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_PRE_VALIDATE, + peerinfo, rsp.uuid); + + if (rsp.op_errstr) + free (rsp.op_errstr); + + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_pre_validate_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + gd_mgmt_v3_pre_validate_cbk_fn); +} + +int +gd_mgmt_v3_pre_validate_req (glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid) +{ + int32_t ret = -1; + gd1_mgmt_v3_pre_val_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (op_ctx); + GF_ASSERT (peerinfo); + GF_ASSERT (args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + req.op = op; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_PRE_VALIDATE, + gd_mgmt_v3_pre_validate_cbk, + (xdrproc_t) xdr_gd1_mgmt_v3_pre_val_req); + synclock_lock (&conf->big_lock); +out: + GF_FREE (req.dict.dict_val); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_pre_validate (glusterd_conf_t *conf, glusterd_op_t op, + dict_t *req_dict, char **op_errstr, int npeers) +{ + int32_t ret = -1; + int32_t peer_cnt = 0; + dict_t *rsp_dict = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + struct list_head *peers = NULL; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (conf); + GF_ASSERT (req_dict); + GF_ASSERT (op_errstr); + + peers = &conf->xaction_peers; + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create response dictionary"); + goto out; + } + + /* Pre Validation on local node */ + ret = gd_mgmt_v3_pre_validate_fn (op, req_dict, op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Pre Validation failed for " + "operation %s on local node", + gd_op_list[op]); + + if (*op_errstr == NULL) { + ret = gf_asprintf (op_errstr, + "Pre-validation failed " + "on localhost. Please " + "check log file for details"); + if (ret == -1) + *op_errstr = NULL; + + ret = -1; + } + goto out; + } + + ret = glusterd_pre_validate_aggr_rsp_dict (op, req_dict, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "%s", + "Failed to aggregate response from " + " node/brick"); + goto out; + } + + dict_unref (rsp_dict); + rsp_dict = NULL; + + if (!npeers) { + ret = 0; + goto out; + } + + /* Sending Pre Validation req to other nodes in the cluster */ + gd_syncargs_init (&args, req_dict); + synctask_barrier_init((&args)); + peer_cnt = 0; + list_for_each_entry (peerinfo, peers, op_peers_list) { + gd_mgmt_v3_pre_validate_req (op, req_dict, peerinfo, &args, + MY_UUID, peer_uuid); + peer_cnt++; + } + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "Pre Validation failed on peers"); + + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + } + + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent pre valaidation req for %s " + "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret); +out: + return ret; +} + +int +glusterd_mgmt_v3_build_payload (dict_t **req, char **op_errstr, dict_t *dict, + glusterd_op_t op) +{ + int32_t ret = -1; + dict_t *req_dict = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (op_errstr); + GF_ASSERT (dict); + + req_dict = dict_new (); + if (!req_dict) + goto out; + + switch (op) { + case GD_OP_SNAP: + dict_copy (dict, req_dict); + break; + default: + break; + } + + *req = req_dict; + ret = 0; +out: + return ret; +} + +int32_t +gd_mgmt_v3_brick_op_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_v3_brick_op_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + /* If the operation failed, then iov can be NULL. So better check the + status of the operation and then worry about iov (if the status of + the command is success) + */ + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + if (!iov) { + gf_log (this->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_brick_op_rsp); + if (ret < 0) + goto out; + + uuid_copy (args->uuid, rsp.uuid); + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + +out: + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_BRICK_OP, + peerinfo, rsp.uuid); + + if (rsp.op_errstr) + free (rsp.op_errstr); + + if (rsp.dict.dict_val) + free (rsp.dict.dict_val); + + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_brick_op_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + gd_mgmt_v3_brick_op_cbk_fn); +} + +int +gd_mgmt_v3_brick_op_req (glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid) +{ + int32_t ret = -1; + gd1_mgmt_v3_brick_op_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (op_ctx); + GF_ASSERT (peerinfo); + GF_ASSERT (args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + req.op = op; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_BRICK_OP, + gd_mgmt_v3_brick_op_cbk, + (xdrproc_t) xdr_gd1_mgmt_v3_brick_op_req); + synclock_lock (&conf->big_lock); +out: + GF_FREE (req.dict.dict_val); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_brick_op (glusterd_conf_t *conf, glusterd_op_t op, + dict_t *req_dict, char **op_errstr, int npeers) +{ + int32_t ret = -1; + int32_t peer_cnt = 0; + dict_t *rsp_dict = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + struct list_head *peers = NULL; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (conf); + GF_ASSERT (req_dict); + GF_ASSERT (op_errstr); + + peers = &conf->xaction_peers; + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create response dictionary"); + goto out; + } + + /* Perform brick op on local node */ + ret = gd_mgmt_v3_brick_op_fn (op, req_dict, op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Brick ops failed for " + "operation %s on local node", + gd_op_list[op]); + + if (*op_errstr == NULL) { + ret = gf_asprintf (op_errstr, + "Brick ops failed " + "on localhost. Please " + "check log file for details"); + if (ret == -1) + *op_errstr = NULL; + + ret = -1; + } + goto out; + } + + dict_unref (rsp_dict); + rsp_dict = NULL; + + if (!npeers) { + ret = 0; + goto out; + } + + /* Sending brick op req to other nodes in the cluster */ + gd_syncargs_init (&args, NULL); + synctask_barrier_init((&args)); + peer_cnt = 0; + list_for_each_entry (peerinfo, peers, op_peers_list) { + gd_mgmt_v3_brick_op_req (op, req_dict, peerinfo, &args, + MY_UUID, peer_uuid); + peer_cnt++; + } + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "Brick ops failed on peers"); + + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + } + + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent brick op req for %s " + "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret); +out: + return ret; +} + +int32_t +gd_mgmt_v3_commit_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_v3_commit_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + dict_t *rsp_dict = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + if (!iov) { + gf_log (this->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_commit_rsp); + if (ret < 0) + goto out; + + if (rsp.dict.dict_len) { + /* Unserialize the dictionary */ + rsp_dict = dict_new (); + + ret = dict_unserialize (rsp.dict.dict_val, + rsp.dict.dict_len, + &rsp_dict); + if (ret < 0) { + free (rsp.dict.dict_val); + goto out; + } else { + rsp_dict->extra_stdfree = rsp.dict.dict_val; + } + } + + uuid_copy (args->uuid, rsp.uuid); + pthread_mutex_lock (&args->lock_dict); + { + ret = glusterd_syncop_aggr_rsp_dict (rsp.op, args->dict, + rsp_dict); + } + pthread_mutex_unlock (&args->lock_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "%s", + "Failed to aggregate response from " + " node/brick"); + if (!rsp.op_ret) + op_ret = ret; + else { + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + } + } else { + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + } + +out: + if (rsp_dict) + dict_unref (rsp_dict); + + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_COMMIT, + peerinfo, rsp.uuid); + + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_commit_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + gd_mgmt_v3_commit_cbk_fn); +} + +int +gd_mgmt_v3_commit_req (glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid) +{ + int32_t ret = -1; + gd1_mgmt_v3_commit_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (op_ctx); + GF_ASSERT (peerinfo); + GF_ASSERT (args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + req.op = op; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_COMMIT, + gd_mgmt_v3_commit_cbk, + (xdrproc_t) xdr_gd1_mgmt_v3_commit_req); + synclock_lock (&conf->big_lock); +out: + GF_FREE (req.dict.dict_val); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_commit (glusterd_conf_t *conf, glusterd_op_t op, + dict_t *op_ctx, dict_t *req_dict, + char **op_errstr, int npeers) +{ + int32_t ret = -1; + int32_t peer_cnt = 0; + dict_t *rsp_dict = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + struct list_head *peers = NULL; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (conf); + GF_ASSERT (op_ctx); + GF_ASSERT (req_dict); + GF_ASSERT (op_errstr); + + peers = &conf->xaction_peers; + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create response dictionary"); + goto out; + } + + /* Commit on local node */ + ret = gd_mgmt_v3_commit_fn (op, req_dict, op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Commit failed for " + "operation %s on local node", + gd_op_list[op]); + + if (*op_errstr == NULL) { + ret = gf_asprintf (op_errstr, + "Commit failed " + "on localhost. Please " + "check log file for details."); + if (ret == -1) + *op_errstr = NULL; + + ret = -1; + } + goto out; + } + + ret = glusterd_syncop_aggr_rsp_dict (op, op_ctx, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "%s", + "Failed to aggregate response from " + " node/brick"); + goto out; + } + + dict_unref (rsp_dict); + rsp_dict = NULL; + + if (!npeers) { + ret = 0; + goto out; + } + + /* Sending commit req to other nodes in the cluster */ + gd_syncargs_init (&args, op_ctx); + synctask_barrier_init((&args)); + peer_cnt = 0; + list_for_each_entry (peerinfo, peers, op_peers_list) { + gd_mgmt_v3_commit_req (op, req_dict, peerinfo, &args, + MY_UUID, peer_uuid); + peer_cnt++; + } + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "Commit failed on peers"); + + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + } + + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent commit req for %s to %d " + "peers. Returning %d", gd_op_list[op], peer_cnt, ret); +out: + return ret; +} + +int32_t +gd_mgmt_v3_post_validate_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_v3_post_val_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + if (!iov) { + gf_log (this->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_post_val_rsp); + if (ret < 0) + goto out; + + uuid_copy (args->uuid, rsp.uuid); + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + +out: + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_POST_VALIDATE, + peerinfo, rsp.uuid); + if (rsp.op_errstr) + free (rsp.op_errstr); + + if (rsp.dict.dict_val) + free (rsp.dict.dict_val); + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_post_validate_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + gd_mgmt_v3_post_validate_cbk_fn); +} + +int +gd_mgmt_v3_post_validate_req (glusterd_op_t op, int32_t op_ret, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid) +{ + int32_t ret = -1; + gd1_mgmt_v3_post_val_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (op_ctx); + GF_ASSERT (peerinfo); + GF_ASSERT (args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + req.op = op; + req.op_ret = op_ret; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_POST_VALIDATE, + gd_mgmt_v3_post_validate_cbk, + (xdrproc_t) xdr_gd1_mgmt_v3_post_val_req); + synclock_lock (&conf->big_lock); +out: + GF_FREE (req.dict.dict_val); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_post_validate (glusterd_conf_t *conf, glusterd_op_t op, + int32_t op_ret, dict_t *dict, dict_t *req_dict, + char **op_errstr, int npeers) +{ + int32_t ret = -1; + int32_t peer_cnt = 0; + dict_t *rsp_dict = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + struct list_head *peers = NULL; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (conf); + GF_ASSERT (dict); + GF_ASSERT (req_dict); + GF_ASSERT (op_errstr); + + peers = &conf->xaction_peers; + GF_ASSERT (peers); + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create response dictionary"); + goto out; + } + + /* Copy the contents of dict like missed snaps info to req_dict */ + dict_copy (dict, req_dict); + + /* Post Validation on local node */ + ret = gd_mgmt_v3_post_validate_fn (op, op_ret, req_dict, op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Post Validation failed for " + "operation %s on local node", + gd_op_list[op]); + + if (*op_errstr == NULL) { + ret = gf_asprintf (op_errstr, + "Post-validation failed " + "on localhost. Please check " + "log file for details"); + if (ret == -1) + *op_errstr = NULL; + + ret = -1; + } + goto out; + } + + dict_unref (rsp_dict); + rsp_dict = NULL; + + if (!npeers) { + ret = 0; + goto out; + } + + /* Sending Post Validation req to other nodes in the cluster */ + gd_syncargs_init (&args, req_dict); + synctask_barrier_init((&args)); + peer_cnt = 0; + list_for_each_entry (peerinfo, peers, op_peers_list) { + gd_mgmt_v3_post_validate_req (op, op_ret, req_dict, peerinfo, + &args, MY_UUID, peer_uuid); + peer_cnt++; + } + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "Post Validation failed on peers"); + + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + } + + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent post valaidation req for %s " + "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret); +out: + return ret; +} + +int32_t +gd_mgmt_v3_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_v3_unlock_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + if (!iov) { + gf_log (this->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp); + if (ret < 0) + goto out; + + uuid_copy (args->uuid, rsp.uuid); + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + +out: + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_UNLOCK, + peerinfo, rsp.uuid); + if (rsp.dict.dict_val) + free (rsp.dict.dict_val); + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_unlock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + gd_mgmt_v3_unlock_cbk_fn); +} + +int +gd_mgmt_v3_unlock (glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid) +{ + int32_t ret = -1; + gd1_mgmt_v3_unlock_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (op_ctx); + GF_ASSERT (peerinfo); + GF_ASSERT (args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + req.op = op; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_UNLOCK, + gd_mgmt_v3_unlock_cbk, + (xdrproc_t) xdr_gd1_mgmt_v3_unlock_req); + synclock_lock (&conf->big_lock); +out: + GF_FREE (req.dict.dict_val); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_release_peer_locks (glusterd_conf_t *conf, glusterd_op_t op, + dict_t *dict, int32_t op_ret, + char **op_errstr, int npeers, + gf_boolean_t is_acquired) +{ + int32_t ret = -1; + int32_t peer_cnt = 0; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + struct list_head *peers = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (conf); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + peers = &conf->xaction_peers; + + /* If the lock has not been held during this + * transaction, do not send unlock requests */ + if (!is_acquired) + goto out; + + if (!npeers) { + ret = 0; + goto out; + } + + /* Sending mgmt_v3 unlock req to other nodes in the cluster */ + gd_syncargs_init (&args, NULL); + synctask_barrier_init((&args)); + peer_cnt = 0; + list_for_each_entry (peerinfo, peers, op_peers_list) { + gd_mgmt_v3_unlock (op, dict, peerinfo, &args, + MY_UUID, peer_uuid); + peer_cnt++; + } + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unlock failed on peers"); + + if (!op_ret && args.errstr) + *op_errstr = gf_strdup (args.errstr); + } + + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent unlock op req for %s " + "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret); + +out: + return ret; +} + +int32_t +glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict) +{ + int32_t ret = -1; + int32_t op_ret = -1; + int32_t npeers = 0; + dict_t *req_dict = NULL; + dict_t *tmp_dict = NULL; + glusterd_conf_t *conf = NULL; + char *op_errstr = NULL; + xlator_t *this = NULL; + gf_boolean_t is_acquired = _gf_false; + uuid_t *originator_uuid = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (dict); + conf = this->private; + GF_ASSERT (conf); + + /* Save the MY_UUID as the originator_uuid. This originator_uuid + * will be used by is_origin_glusterd() to determine if a node + * is the originator node for a command. */ + originator_uuid = GF_CALLOC (1, sizeof(uuid_t), + gf_common_mt_uuid_t); + if (!originator_uuid) { + ret = -1; + goto out; + } + + uuid_copy (*originator_uuid, MY_UUID); + ret = dict_set_bin (dict, "originator_uuid", + originator_uuid, sizeof (uuid_t)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set originator_uuid."); + goto out; + } + + /* Marking the operation as complete synctasked */ + ret = dict_set_int32 (dict, "is_synctasked", _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set synctasked flag."); + goto out; + } + + /* Use a copy at local unlock as cli response will be sent before + * the unlock and the volname in the dict might be removed */ + tmp_dict = dict_new(); + if (!tmp_dict) { + gf_log (this->name, GF_LOG_ERROR, "Unable to create dict"); + goto out; + } + dict_copy (dict, tmp_dict); + + /* BUILD PEERS LIST */ + INIT_LIST_HEAD (&conf->xaction_peers); + npeers = gd_build_peers_list (&conf->peers, &conf->xaction_peers, op); + + /* LOCKDOWN PHASE - Acquire mgmt_v3 locks */ + ret = glusterd_mgmt_v3_initiate_lockdown (conf, op, dict, &op_errstr, + npeers, &is_acquired); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "mgmt_v3 lockdown failed."); + goto out; + } + + /* BUILD PAYLOAD */ + ret = glusterd_mgmt_v3_build_payload (&req_dict, &op_errstr, dict, op); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, LOGSTR_BUILD_PAYLOAD, + gd_op_list[op]); + if (op_errstr == NULL) + gf_asprintf (&op_errstr, OPERRSTR_BUILD_PAYLOAD); + goto out; + } + + /* PRE-COMMIT VALIDATE PHASE */ + ret = glusterd_mgmt_v3_pre_validate (conf, op, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Pre Validation Failed"); + goto out; + } + + /* COMMIT OP PHASE */ + ret = glusterd_mgmt_v3_commit (conf, op, dict, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Commit Op Failed"); + goto out; + } + + /* POST-COMMIT VALIDATE PHASE */ + /* As of now, post_validate is not handling any other + commands other than snapshot. So as of now, I am + sending 0 (op_ret as 0). + */ + ret = glusterd_mgmt_v3_post_validate (conf, op, 0, dict, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Post Validation Failed"); + goto out; + } + + ret = 0; +out: + op_ret = ret; + /* UNLOCK PHASE FOR PEERS*/ + (void) glusterd_mgmt_v3_release_peer_locks (conf, op, dict, + op_ret, &op_errstr, + npeers, is_acquired); + + /* LOCAL VOLUME(S) UNLOCK */ + if (is_acquired) { + /* Trying to release multiple mgmt_v3 locks */ + ret = glusterd_multiple_mgmt_v3_unlock (tmp_dict, MY_UUID); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to release mgmt_v3 locks on localhost"); + op_ret = ret; + } + } + + /* SEND CLI RESPONSE */ + glusterd_op_send_cli_response (op, op_ret, 0, req, dict, op_errstr); + + if (req_dict) + dict_unref (req_dict); + + if (tmp_dict) + dict_unref (tmp_dict); + + if (op_errstr) { + GF_FREE (op_errstr); + op_errstr = NULL; + } + + return 0; +} + +int32_t +glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict) +{ + int32_t ret = -1; + int32_t op_ret = -1; + int32_t npeers = 0; + dict_t *req_dict = NULL; + dict_t *tmp_dict = NULL; + glusterd_conf_t *conf = NULL; + char *op_errstr = NULL; + xlator_t *this = NULL; + gf_boolean_t is_acquired = _gf_false; + uuid_t *originator_uuid = NULL; + gf_boolean_t success = _gf_false; + char *tmp_errstr = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (dict); + conf = this->private; + GF_ASSERT (conf); + + /* Save the MY_UUID as the originator_uuid. This originator_uuid + * will be used by is_origin_glusterd() to determine if a node + * is the originator node for a command. */ + originator_uuid = GF_CALLOC (1, sizeof(uuid_t), + gf_common_mt_uuid_t); + if (!originator_uuid) { + ret = -1; + goto out; + } + + uuid_copy (*originator_uuid, MY_UUID); + ret = dict_set_bin (dict, "originator_uuid", + originator_uuid, sizeof (uuid_t)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set originator_uuid."); + goto out; + } + + /* Marking the operation as complete synctasked */ + ret = dict_set_int32 (dict, "is_synctasked", _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set synctasked flag."); + goto out; + } + + /* Use a copy at local unlock as cli response will be sent before + * the unlock and the volname in the dict might be removed */ + tmp_dict = dict_new(); + if (!tmp_dict) { + gf_log (this->name, GF_LOG_ERROR, "Unable to create dict"); + goto out; + } + dict_copy (dict, tmp_dict); + + /* BUILD PEERS LIST */ + INIT_LIST_HEAD (&conf->xaction_peers); + npeers = gd_build_peers_list (&conf->peers, &conf->xaction_peers, op); + + /* LOCKDOWN PHASE - Acquire mgmt_v3 locks */ + ret = glusterd_mgmt_v3_initiate_lockdown (conf, op, dict, &op_errstr, + npeers, &is_acquired); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "mgmt_v3 lockdown failed."); + goto out; + } + + /* BUILD PAYLOAD */ + ret = glusterd_mgmt_v3_build_payload (&req_dict, &op_errstr, dict, op); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, LOGSTR_BUILD_PAYLOAD, + gd_op_list[op]); + if (op_errstr == NULL) + gf_asprintf (&op_errstr, OPERRSTR_BUILD_PAYLOAD); + goto out; + } + + /* PRE-COMMIT VALIDATE PHASE */ + ret = glusterd_mgmt_v3_pre_validate (conf, op, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Pre Validation Failed"); + goto out; + } + + /* BRICK OP PHASE for initiating barrier*/ + ret = dict_set_int32 (req_dict, "barrier", 1); + if (ret) + goto out; + ret = glusterd_mgmt_v3_brick_op (conf, op, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Brick Ops Failed"); + goto unbarrier; + } + + /* COMMIT OP PHASE */ + /* TODO: As of now, the plan is to do quorum check before sending the + commit fop and if the quorum succeeds, then commit is sent to all + the other glusterds. + snap create functionality now creates the in memory and on disk + objects for the snapshot (marking them as incomplete), takes the lvm + snapshot and then updates the status of the in memory and on disk + snap objects as complete. Suppose one of the glusterds goes down + after taking the lvm snapshot, but before updating the snap object, + then treat it as a snapshot create failure and trigger cleanup. + i.e the number of commit responses received by the originator + glusterd shold be the same as the number of peers it has sent the + request to (i.e npeers variable). If not, then originator glusterd + will initiate cleanup in post-validate fop. + Question: What if one of the other glusterds goes down as explained + above and along with it the originator glusterd also goes down? + Who will initiate the cleanup? + */ + ret = glusterd_mgmt_v3_commit (conf, op, dict, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Commit Op Failed"); + /* If the main op fails, we should save the error string. + Because, op_errstr will be used for unbarrier and + unlock ops also. We might lose the actual error that + caused the failure. + */ + tmp_errstr = op_errstr; + op_errstr = NULL; + goto unbarrier; + } + + success = _gf_true; +unbarrier: + /* BRICK OP PHASE for removing the barrier*/ + ret = dict_set_int32 (req_dict, "barrier", 0); + if (ret) + goto out; + ret = glusterd_mgmt_v3_brick_op (conf, op, req_dict, + &op_errstr, npeers); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Brick Ops Failed"); + goto out; + } + + ret = 0; + +out: + op_ret = ret; + + if (success == _gf_false) + op_ret = -1; + + /* POST-COMMIT VALIDATE PHASE */ + ret = glusterd_mgmt_v3_post_validate (conf, op, op_ret, dict, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Post Validation Failed"); + op_ret = -1; + } + + /* UNLOCK PHASE FOR PEERS*/ + (void) glusterd_mgmt_v3_release_peer_locks (conf, op, dict, + op_ret, &op_errstr, + npeers, is_acquired); + + /* If the commit op (snapshot taking) failed, then the error is stored + in tmp_errstr and unbarrier is called. Suppose, if unbarrier also + fails, then the error happened in unbarrier is logged and freed. + The error happened in commit op, which is stored in tmp_errstr + is sent to cli. + */ + if (tmp_errstr) { + if (op_errstr) { + gf_log (this->name, GF_LOG_ERROR, "unbarrier brick op" + "failed with the error %s", op_errstr); + GF_FREE (op_errstr); + op_errstr = NULL; + } + op_errstr = tmp_errstr; + } + + /* LOCAL VOLUME(S) UNLOCK */ + if (is_acquired) { + /* Trying to release multiple mgmt_v3 locks */ + ret = glusterd_multiple_mgmt_v3_unlock (tmp_dict, MY_UUID); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to release mgmt_v3 locks on localhost"); + op_ret = ret; + } + } + + /* SEND CLI RESPONSE */ + glusterd_op_send_cli_response (op, op_ret, 0, req, dict, op_errstr); + + if (req_dict) + dict_unref (req_dict); + + if (tmp_dict) + dict_unref (tmp_dict); + + if (op_errstr) { + GF_FREE (op_errstr); + op_errstr = NULL; + } + + return 0; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-mgmt.h new file mode 100644 index 00000000000..b185a9bec26 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.h @@ -0,0 +1,45 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _GLUSTERD_MGMT_H_ +#define _GLUSTERD_MGMT_H_ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +int32_t +gd_mgmt_v3_pre_validate_fn (glusterd_op_t op, dict_t *dict, + char **op_errstr, dict_t *rsp_dict); + +int32_t +gd_mgmt_v3_brick_op_fn (glusterd_op_t op, dict_t *dict, + char **op_errstr, dict_t *rsp_dict); + +int32_t +gd_mgmt_v3_commit_fn (glusterd_op_t op, dict_t *dict, + char **op_errstr, dict_t *rsp_dict); + +int32_t +gd_mgmt_v3_post_validate_fn (glusterd_op_t op, int32_t op_ret, dict_t *dict, + char **op_errstr, dict_t *rsp_dict); + +int32_t +glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict); + +int32_t +glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict); + +int +glusterd_snap_pre_validate_use_rsp_dict (dict_t *dst, dict_t *src); + +#endif /* _GLUSTERD_MGMT_H_ */ diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index e3ae369e442..fb5e097d9c1 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -158,7 +158,7 @@ glusterd_generate_txn_id (dict_t *dict, uuid_t **txn_id) uuid_generate (**txn_id); ret = dict_set_bin (dict, "transaction_id", - *txn_id, sizeof (uuid_t)); + *txn_id, sizeof (**txn_id)); if (ret) { gf_log ("", GF_LOG_ERROR, "Failed to set transaction id."); @@ -517,7 +517,20 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin brick_req->name = gf_strdup (name); break; + case GD_OP_SNAP: + brick_req = GF_CALLOC (1, sizeof (*brick_req), + gf_gld_mt_mop_brick_req_t); + if (!brick_req) + goto out; + + brick_req->op = GLUSTERD_VOLUME_BARRIER_OP; + ret = dict_get_str (dict, "volname", &volname); + if (ret) + goto out; + snprintf (name, 1024, "%s-server",volname); + brick_req->name = gf_strdup (name); + break; default: goto out; break; @@ -1659,14 +1672,25 @@ glusterd_stop_bricks (glusterd_volinfo_t *volinfo) int glusterd_start_bricks (glusterd_volinfo_t *volinfo) { - glusterd_brickinfo_t *brickinfo = NULL; + int ret = -1; + glusterd_brickinfo_t *brickinfo = NULL; + + GF_ASSERT (volinfo); list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - if (glusterd_brick_start (volinfo, brickinfo, _gf_false)) - return -1; + ret = glusterd_brick_start (volinfo, brickinfo, _gf_false); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "Failed to start %s:%s for %s", + brickinfo->hostname, brickinfo->path, + volinfo->volname); + goto out; + } } - return 0; + ret = 0; +out: + return ret; } static int @@ -2687,7 +2711,7 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx) (glusterd_op_get_op() != GD_OP_SYNC_VOLUME)) continue; - /* Based on the op_version, acquire a cluster or volume lock */ + /* Based on the op_version, acquire a cluster or mgmt_v3 lock */ if (priv->op_version < GD_OP_VERSION_4) { proc = &peerinfo->mgmt->proctable [GLUSTERD_MGMT_CLUSTER_LOCK]; @@ -2709,7 +2733,7 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx) dict_ref (dict); proc = &peerinfo->mgmt_v3->proctable - [GLUSTERD_MGMT_V3_VOLUME_LOCK]; + [GLUSTERD_MGMT_V3_LOCK]; if (proc->fn) { ret = dict_set_static_ptr (dict, "peerinfo", peerinfo); @@ -2723,7 +2747,7 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx) ret = proc->fn (NULL, this, dict); if (ret) { gf_log (this->name, GF_LOG_WARNING, - "Failed to send volume lock " + "Failed to send mgmt_v3 lock " "request for operation " "'Volume %s' to peer %s", gd_op_list[opinfo.op], @@ -2770,7 +2794,7 @@ glusterd_op_ac_send_unlock (glusterd_op_sm_event_t *event, void *ctx) continue; /* Based on the op_version, - * release the cluster or volume lock */ + * release the cluster or mgmt_v3 lock */ if (priv->op_version < GD_OP_VERSION_4) { proc = &peerinfo->mgmt->proctable [GLUSTERD_MGMT_CLUSTER_UNLOCK]; @@ -2792,7 +2816,7 @@ glusterd_op_ac_send_unlock (glusterd_op_sm_event_t *event, void *ctx) dict_ref (dict); proc = &peerinfo->mgmt_v3->proctable - [GLUSTERD_MGMT_V3_VOLUME_UNLOCK]; + [GLUSTERD_MGMT_V3_UNLOCK]; if (proc->fn) { ret = dict_set_static_ptr (dict, "peerinfo", peerinfo); @@ -2870,7 +2894,7 @@ glusterd_op_ac_lock (glusterd_op_sm_event_t *event, void *ctx) /* If the req came from a node running on older op_version * the dict won't be present. Based on it acquiring a cluster - * or volume lock */ + * or mgmt_v3 lock */ if (lock_ctx->dict == NULL) { ret = glusterd_lock (lock_ctx->uuid); glusterd_op_lock_send_resp (lock_ctx->req, ret); @@ -2880,14 +2904,15 @@ glusterd_op_ac_lock (glusterd_op_sm_event_t *event, void *ctx) gf_log (this->name, GF_LOG_ERROR, "Unable to acquire volname"); else { - ret = glusterd_volume_lock (volname, lock_ctx->uuid); + ret = glusterd_mgmt_v3_lock (volname, lock_ctx->uuid, + "vol"); if (ret) gf_log (this->name, GF_LOG_ERROR, "Unable to acquire lock for %s", volname); } - glusterd_op_volume_lock_send_resp (lock_ctx->req, + glusterd_op_mgmt_v3_lock_send_resp (lock_ctx->req, &event->txn_id, ret); dict_unref (lock_ctx->dict); @@ -2917,7 +2942,7 @@ glusterd_op_ac_unlock (glusterd_op_sm_event_t *event, void *ctx) /* If the req came from a node running on older op_version * the dict won't be present. Based on it releasing the cluster - * or volume lock */ + * or mgmt_v3 lock */ if (lock_ctx->dict == NULL) { ret = glusterd_unlock (lock_ctx->uuid); glusterd_op_unlock_send_resp (lock_ctx->req, ret); @@ -2927,14 +2952,15 @@ glusterd_op_ac_unlock (glusterd_op_sm_event_t *event, void *ctx) gf_log (this->name, GF_LOG_ERROR, "Unable to acquire volname"); else { - ret = glusterd_volume_unlock (volname, lock_ctx->uuid); + ret = glusterd_mgmt_v3_unlock (volname, lock_ctx->uuid, + "vol"); if (ret) gf_log (this->name, GF_LOG_ERROR, "Unable to release lock for %s", volname); } - glusterd_op_volume_unlock_send_resp (lock_ctx->req, + glusterd_op_mgmt_v3_unlock_send_resp (lock_ctx->req, &event->txn_id, ret); dict_unref (lock_ctx->dict); @@ -3466,7 +3492,7 @@ glusterd_op_start_rb_timer (dict_t *dict, uuid_t *txn_id) } ret = dict_set_bin (rb_ctx, "transaction_id", - txn_id, sizeof (uuid_t)); + txn_id, sizeof (*txn_id)); if (ret) { gf_log ("", GF_LOG_ERROR, "Failed to set transaction id."); @@ -4243,7 +4269,7 @@ glusterd_op_txn_complete (uuid_t *txn_id) glusterd_op_reset_ctx (); glusterd_op_clear_errstr (); - /* Based on the op-version, we release the cluster or volume lock */ + /* Based on the op-version, we release the cluster or mgmt_v3 lock */ if (priv->op_version < GD_OP_VERSION_4) { ret = glusterd_unlock (MY_UUID); /* unlock cant/shouldnt fail here!! */ @@ -4260,7 +4286,8 @@ glusterd_op_txn_complete (uuid_t *txn_id) "Locks have not been held."); if (volname) { - ret = glusterd_volume_unlock (volname, MY_UUID); + ret = glusterd_mgmt_v3_unlock (volname, MY_UUID, + "vol"); if (ret) gf_log (this->name, GF_LOG_ERROR, "Unable to release lock for %s", @@ -4355,7 +4382,7 @@ glusterd_op_ac_stage_op (glusterd_op_sm_event_t *event, void *ctx) } ret = dict_set_bin (rsp_dict, "transaction_id", - txn_id, sizeof(uuid_t)); + txn_id, sizeof(*txn_id)); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to set transaction id."); @@ -4467,7 +4494,7 @@ glusterd_op_ac_commit_op (glusterd_op_sm_event_t *event, void *ctx) } ret = dict_set_bin (rsp_dict, "transaction_id", - txn_id, sizeof(uuid_t)); + txn_id, sizeof(*txn_id)); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to set transaction id."); @@ -5225,6 +5252,61 @@ _select_rxlators_for_full_self_heal (xlator_t *this, static int +glusterd_bricks_select_snap (dict_t *dict, char **op_errstr, + struct list_head *selected) +{ + int ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + glusterd_pending_node_t *pending_node = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *volname = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + int brick_index = -1; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get" + " volname"); + goto out; + } + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) + goto out; + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + brick_index++; + if (uuid_compare (brickinfo->uuid, MY_UUID) || + !glusterd_is_brick_started (brickinfo)) { + continue; + } + pending_node = GF_CALLOC (1, sizeof (*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } + pending_node->node = brickinfo; + pending_node->type = GD_NODE_BRICK; + pending_node->index = brick_index; + list_add_tail (&pending_node->list, + selected); + pending_node = NULL; + } + + ret = 0; + +out: + gf_log (THIS->name, GF_LOG_DEBUG, "Returning ret %d", ret); + return ret; +} + +static int fill_shd_status_for_local_bricks (dict_t *dict, glusterd_volinfo_t *volinfo, cli_cmd_type type, dict_t *req_dict) { @@ -5795,7 +5877,9 @@ glusterd_op_bricks_select (glusterd_op_t op, dict_t *dict, char **op_errstr, ret = glusterd_bricks_select_rebalance_volume (dict, op_errstr, selected); break; - + case GD_OP_SNAP: + ret = glusterd_bricks_select_snap (dict, op_errstr, selected); + break; default: break; } diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c index e78eff44de5..ed6d7fd57d5 100644 --- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c +++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c @@ -688,7 +688,7 @@ rb_src_brick_restart (glusterd_volinfo_t *volinfo, sleep (2); ret = glusterd_volume_start_glusterfs (volinfo, src_brickinfo, - _gf_false); + _gf_false); if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to start " "glusterfs, ret: %d", ret); diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c index 18f37c190fe..27910d13206 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c @@ -137,6 +137,7 @@ glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret, case GD_OP_CLEARLOCKS_VOLUME: case GD_OP_HEAL_VOLUME: case GD_OP_QUOTA: + case GD_OP_SNAP: { /*nothing specific to be done*/ break; @@ -645,10 +646,10 @@ glusterd_cluster_lock_cbk (struct rpc_req *req, struct iovec *iov, } static int32_t -glusterd_vol_lock_cbk_fn (struct rpc_req *req, struct iovec *iov, +glusterd_mgmt_v3_lock_peers_cbk_fn (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { - gd1_mgmt_volume_lock_rsp rsp = {{0},}; + gd1_mgmt_v3_lock_rsp rsp = {{0},}; int ret = -1; int32_t op_ret = -1; glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; @@ -667,10 +668,10 @@ glusterd_vol_lock_cbk_fn (struct rpc_req *req, struct iovec *iov, } ret = xdr_to_generic (*iov, &rsp, - (xdrproc_t)xdr_gd1_mgmt_volume_lock_rsp); + (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp); if (ret < 0) { gf_log (this->name, GF_LOG_ERROR, - "Failed to decode volume lock " + "Failed to decode mgmt_v3 lock " "response received from peer"); rsp.op_ret = -1; rsp.op_errno = EINVAL; @@ -682,13 +683,13 @@ glusterd_vol_lock_cbk_fn (struct rpc_req *req, struct iovec *iov, txn_id = &rsp.txn_id; gf_log (this->name, (op_ret) ? GF_LOG_ERROR : GF_LOG_DEBUG, - "Received volume lock %s from uuid: %s", + "Received mgmt_v3 lock %s from uuid: %s", (op_ret) ? "RJT" : "ACC", uuid_utoa (rsp.uuid)); ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); if (ret) { gf_log (this->name, GF_LOG_CRITICAL, - "Volume lock response received " + "mgmt_v3 lock response received " "from unknown peer: %s. Ignoring response", uuid_utoa (rsp.uuid)); goto out; @@ -717,18 +718,18 @@ out: } int32_t -glusterd_vol_lock_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) +glusterd_mgmt_v3_lock_peers_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) { return glusterd_big_locked_cbk (req, iov, count, myframe, - glusterd_vol_lock_cbk_fn); + glusterd_mgmt_v3_lock_peers_cbk_fn); } static int32_t -glusterd_vol_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) +glusterd_mgmt_v3_unlock_peers_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) { - gd1_mgmt_volume_unlock_rsp rsp = {{0},}; + gd1_mgmt_v3_unlock_rsp rsp = {{0},}; int ret = -1; int32_t op_ret = -1; glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; @@ -747,10 +748,10 @@ glusterd_vol_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov, } ret = xdr_to_generic (*iov, &rsp, - (xdrproc_t)xdr_gd1_mgmt_volume_unlock_rsp); + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp); if (ret < 0) { gf_log (this->name, GF_LOG_ERROR, - "Failed to decode volume unlock " + "Failed to decode mgmt_v3 unlock " "response received from peer"); rsp.op_ret = -1; rsp.op_errno = EINVAL; @@ -762,7 +763,7 @@ glusterd_vol_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov, txn_id = &rsp.txn_id; gf_log (this->name, (op_ret) ? GF_LOG_ERROR : GF_LOG_DEBUG, - "Received volume unlock %s from uuid: %s", + "Received mgmt_v3 unlock %s from uuid: %s", (op_ret) ? "RJT" : "ACC", uuid_utoa (rsp.uuid)); @@ -770,7 +771,7 @@ glusterd_vol_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov, if (ret) { gf_log (this->name, GF_LOG_CRITICAL, - "Volume unlock response received " + "mgmt_v3 unlock response received " "from unknown peer: %s. Ignoring response", uuid_utoa (rsp.uuid)); goto out; @@ -799,11 +800,11 @@ out: } int32_t -glusterd_vol_unlock_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) +glusterd_mgmt_v3_unlock_peers_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) { return glusterd_big_locked_cbk (req, iov, count, myframe, - glusterd_vol_unlock_cbk_fn); + glusterd_mgmt_v3_unlock_peers_cbk_fn); } int32_t @@ -1404,10 +1405,10 @@ out: } int32_t -glusterd_vol_lock (call_frame_t *frame, xlator_t *this, - void *data) +glusterd_mgmt_v3_lock_peers (call_frame_t *frame, xlator_t *this, + void *data) { - gd1_mgmt_volume_lock_req req = {{0},}; + gd1_mgmt_v3_lock_req req = {{0},}; int ret = -1; glusterd_peerinfo_t *peerinfo = NULL; glusterd_conf_t *priv = NULL; @@ -1440,6 +1441,7 @@ glusterd_vol_lock (call_frame_t *frame, xlator_t *this, goto out; } + /* Sending valid transaction ID to peers */ ret = dict_get_bin (dict, "transaction_id", (void **)&txn_id); if (ret) { @@ -1460,19 +1462,19 @@ glusterd_vol_lock (call_frame_t *frame, xlator_t *this, ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame, peerinfo->mgmt_v3, - GLUSTERD_MGMT_V3_VOLUME_LOCK, NULL, - this, glusterd_vol_lock_cbk, - (xdrproc_t)xdr_gd1_mgmt_volume_lock_req); + GLUSTERD_MGMT_V3_LOCK, NULL, + this, glusterd_mgmt_v3_lock_peers_cbk, + (xdrproc_t)xdr_gd1_mgmt_v3_lock_req); out: gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } int32_t -glusterd_vol_unlock (call_frame_t *frame, xlator_t *this, +glusterd_mgmt_v3_unlock_peers (call_frame_t *frame, xlator_t *this, void *data) { - gd1_mgmt_volume_unlock_req req = {{0},}; + gd1_mgmt_v3_unlock_req req = {{0},}; int ret = -1; glusterd_peerinfo_t *peerinfo = NULL; glusterd_conf_t *priv = NULL; @@ -1505,6 +1507,7 @@ glusterd_vol_unlock (call_frame_t *frame, xlator_t *this, goto out; } + /* Sending valid transaction ID to peers */ ret = dict_get_bin (dict, "transaction_id", (void **)&txn_id); if (ret) { @@ -1525,10 +1528,10 @@ glusterd_vol_unlock (call_frame_t *frame, xlator_t *this, ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame, peerinfo->mgmt_v3, - GLUSTERD_MGMT_V3_VOLUME_UNLOCK, NULL, - this, glusterd_vol_unlock_cbk, + GLUSTERD_MGMT_V3_UNLOCK, NULL, + this, glusterd_mgmt_v3_unlock_peers_cbk, (xdrproc_t) - xdr_gd1_mgmt_volume_unlock_req); + xdr_gd1_mgmt_v3_unlock_req); out: gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; @@ -1955,9 +1958,9 @@ struct rpc_clnt_procedure gd_mgmt_actors[GLUSTERD_MGMT_MAXVALUE] = { }; struct rpc_clnt_procedure gd_mgmt_v3_actors[GLUSTERD_MGMT_V3_MAXVALUE] = { - [GLUSTERD_MGMT_V3_NULL] = {"NULL", NULL }, - [GLUSTERD_MGMT_V3_VOLUME_LOCK] = {"VOLUME_LOCK", glusterd_vol_lock}, - [GLUSTERD_MGMT_V3_VOLUME_UNLOCK] = {"VOLUME_UNLOCK", glusterd_vol_unlock}, + [GLUSTERD_MGMT_V3_NULL] = {"NULL", NULL }, + [GLUSTERD_MGMT_V3_LOCK] = {"MGMT_V3_LOCK", glusterd_mgmt_v3_lock_peers}, + [GLUSTERD_MGMT_V3_UNLOCK] = {"MGMT_V3_UNLOCK", glusterd_mgmt_v3_unlock_peers}, }; struct rpc_clnt_program gd_mgmt_prog = { diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c new file mode 100644 index 00000000000..e19ee78ec0f --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c @@ -0,0 +1,5595 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <inttypes.h> +#include <sys/types.h> +#include <unistd.h> +#include <sys/resource.h> +#include <sys/statvfs.h> +#include <sys/mount.h> + +#include "globals.h" +#include "compat.h" +#include "protocol-common.h" +#include "xlator.h" +#include "logging.h" +#include "timer.h" +#include "glusterd-mem-types.h" +#include "glusterd.h" +#include "glusterd-sm.h" +#include "glusterd-op-sm.h" +#include "glusterd-utils.h" +#include "glusterd-store.h" +#include "run.h" +#include "glusterd-volgen.h" +#include "glusterd-mgmt.h" +#include "glusterd-syncop.h" + +#include "syscall.h" +#include "cli1-xdr.h" +#include "xdr-generic.h" + +#ifdef GF_LINUX_HOST_OS +#include <mntent.h> +#endif + +char snap_mount_folder[PATH_MAX]; + +static int32_t +glusterd_find_missed_snap (dict_t *rsp_dict, glusterd_volinfo_t *vol, + char *snap_uuid, struct list_head *peers, + int32_t op); + +/* This function will restore a snapshot volumes + * + * @param dict dictionary containing snapshot restore request + * @param op_errstr In case of any failure error message will be returned + * in this variable + * @return Negative value on Failure and 0 in success + */ +int +glusterd_snapshot_restore (dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + int ret = -1; + char *volname = NULL; + char *snapname = NULL; + xlator_t *this = NULL; + glusterd_volinfo_t *snap_volinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "snap name"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (NULL == snap) { + ret = gf_asprintf (op_errstr, "Snap (%s) not found", + snapname); + if (ret < 0) { + goto out; + } + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); + ret = -1; + goto out; + } + + /* TODO : As of now there is only volume in snapshot. + * Change this when multiple volume snapshot is introduced + */ + snap_volinfo = list_entry (snap->volumes.next, glusterd_volinfo_t, + vol_list); + + ret = glusterd_volinfo_find (snap_volinfo->parent_volname, &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not get volinfo of " + "%s", snap_volinfo->parent_volname); + goto out; + } + + if (is_origin_glusterd (dict) == _gf_true) { + /* From origin glusterd check if * + * any peers with snap bricks is down */ + ret = glusterd_find_missed_snap (rsp_dict, snap_volinfo, + snap_volinfo->volname, + &priv->peers, + GF_SNAP_OPTION_TYPE_RESTORE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to find missed snap restores"); + goto out; + } + } + + ret = gd_restore_snap_volume (rsp_dict, volinfo, snap_volinfo); + if (ret) { + /* No need to update op_errstr because it is assumed + * that the called function will do that in case of + * failure. + */ + gf_log (this->name, GF_LOG_ERROR, "Failed to restore " + "snap for %s volume", volname); + goto out; + } + + ret = 0; + + /* TODO: Need to check if we need to delete the snap after the + * operation is successful or not. Also need to persist the state + * of restore operation in the store. + */ +out: + return ret; +} + +/* This function is called before actual restore is taken place. This function + * will validate whether the snapshot volumes are ready to be restored or not. + * + * @param dict dictionary containing snapshot restore request + * @param op_errstr In case of any failure error message will be returned + * in this variable + * @param rsp_dict response dictionary + * @return Negative value on Failure and 0 in success + */ +int +glusterd_snapshot_restore_prevalidate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int ret = -1; + int32_t i = 0; + int32_t volcount = 0; + gf_boolean_t snap_restored = _gf_false; + char key[PATH_MAX] = {0, }; + char *volname = NULL; + char *snapname = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "snap name"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (NULL == snap) { + ret = gf_asprintf (op_errstr, "Snap (%s) not found", + snapname); + if (ret < 0) { + goto out; + } + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); + ret = -1; + goto out; + } + + snap_restored = snap->snap_restored; + + if (snap_restored) { + ret = gf_asprintf (op_errstr, "Snap (%s) is already " + "restored", snapname); + if (ret < 0) { + goto out; + } + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); + ret = -1; + goto out; + } + + ret = dict_set_str (rsp_dict, "snapname", snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "snap name(%s)", snapname); + goto out; + } + + ret = dict_get_int32 (dict, "volcount", &volcount); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get volume count"); + goto out; + } + + /* Snapshot restore will only work if all the volumes, + that are part of the snapshot, are stopped. */ + for (i = 1; i <= volcount; ++i) { + snprintf (key, sizeof (key), "volname%d", i); + ret = dict_get_str (dict, key, &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "get volume name"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + ret = gf_asprintf (op_errstr, "Volume (%s) not found", + volname); + if (ret < 0) { + goto out; + } + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); + ret = -1; + goto out; + } + + if (glusterd_is_volume_started (volinfo)) { + ret = gf_asprintf (op_errstr, "Volume (%s) has been " + "started. Volume needs to be stopped before restoring " + "a snapshot.", volname); + if (ret < 0) { + goto out; + } + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); + ret = -1; + goto out; + } + } + + ret = 0; +out: + return ret; +} + +int +snap_max_hard_limits_validate (dict_t *dict, char *volname, + uint64_t value, char **op_errstr) +{ + char err_str[PATH_MAX] = ""; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + int ret = -1; + uint64_t max_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + conf = this->private; + + GF_ASSERT (conf); + + if (volname) { + ret = glusterd_volinfo_find (volname, &volinfo); + if (!ret) { + if (volinfo->is_snap_volume) { + ret = -1; + snprintf (err_str, PATH_MAX, + "%s is a snap volume. Configuring " + "snap-max-hard-limit for a snap " + "volume is prohibited.", volname); + goto out; + } + } + } + + if (value) { + /* Max limit for the system is GLUSTERD_SNAPS_MAX_HARD_LIMIT + * but max limit for a volume is conf->snap_max_hard_limit. + */ + if (volname) { + max_limit = conf->snap_max_hard_limit; + } else { + max_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + } + } + + if ((value < 0) || (value > max_limit)) { + ret = -1; + snprintf (err_str, PATH_MAX, "Invalid snap-max-hard-limit" + "%"PRIu64 ". Expected range 0 - %"PRIu64, + value, max_limit); + goto out; + } + + ret = 0; +out: + if (ret) { + *op_errstr = gf_strdup (err_str); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + } + return ret; +} + +int +glusterd_snapshot_config_prevalidate (dict_t *dict, char **op_errstr) +{ + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + int ret = -1; + int config_command = 0; + char err_str[PATH_MAX] = {0,}; + glusterd_conf_t *conf = NULL; + uint64_t value = 0; + uint64_t hard_limit = 0; + uint64_t soft_limit = 0; + gf_loglevel_t loglevel = GF_LOG_ERROR; + uint64_t max_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + conf = this->private; + + GF_ASSERT (conf); + + ret = dict_get_int32 (dict, "config-command", &config_command); + if (ret) { + snprintf (err_str, sizeof (err_str), + "failed to get config-command type"); + goto out; + } + + ret = dict_get_uint64 (dict, "snap-max-hard-limit", &hard_limit); + + ret = dict_get_uint64 (dict, "snap-max-soft-limit", &soft_limit); + + ret = dict_get_str (dict, "volname", &volname); + + if (volname) { + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, sizeof (err_str), + "Volume %s does not exist.", volname); + goto out; + } + } + + switch (config_command) { + case GF_SNAP_CONFIG_TYPE_SET: + if (hard_limit) { + /* Validations for snap-max-hard-limits */ + ret = snap_max_hard_limits_validate (dict, volname, + hard_limit, op_errstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "snap-max-hard-limit validation " + "failed."); + goto out; + } + } + + if (soft_limit) { + max_limit = GLUSTERD_SNAPS_MAX_SOFT_LIMIT_PERCENT; + if ((soft_limit < 0) || (soft_limit > max_limit)) { + ret = -1; + snprintf (err_str, PATH_MAX, "Invalid " + "snap-max-soft-limit ""%" + PRIu64 ". Expected range 0 - %"PRIu64, + value, max_limit); + goto out; + } + break; + } + default: + break; + } + + ret = 0; +out: + + if (ret && err_str[0] != '\0') { + gf_log (this->name, loglevel, "%s", err_str); + *op_errstr = gf_strdup (err_str); + } + + return ret; +} + +int +glusterd_snap_create_pre_val_use_rsp_dict (dict_t *dst, dict_t *src) +{ + char *snap_brick_dir = NULL; + char *snap_device = NULL; + char *tmpstr = NULL; + char key[PATH_MAX] = ""; + char snapbrckcnt[PATH_MAX] = ""; + char snapbrckord[PATH_MAX] = ""; + int ret = -1; + int64_t i = -1; + int64_t j = -1; + int64_t volume_count = 0; + int64_t brick_count = 0; + int64_t brick_order = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dst); + GF_ASSERT (src); + + ret = dict_get_int64 (src, "volcount", &volume_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "get the volume count"); + goto out; + } + + for (i = 0; i < volume_count; i++) { + memset (snapbrckcnt, '\0', sizeof(snapbrckcnt)); + ret = snprintf (snapbrckcnt, sizeof(snapbrckcnt) - 1, + "vol%ld_brickcount", i+1); + ret = dict_get_int64 (src, snapbrckcnt, &brick_count); + if (ret) { + gf_log (this->name, GF_LOG_TRACE, + "No bricks for this volume in this dict"); + continue; + } + + for (j = 0; j < brick_count; j++) { + /* Fetching data from source dict */ + snprintf (key, sizeof(key) - 1, + "vol%ld.brickdir%ld", i+1, j); + + ret = dict_get_ptr (src, key, + (void **)&snap_brick_dir); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Unable to fetch %s", key); + continue; + } + + snprintf (key, sizeof(key) - 1, + "vol%ld.brick_snapdevice%ld", i+1, j); + + ret = dict_get_ptr (src, key, + (void **)&snap_device); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch snap_device"); + goto out; + } + + snprintf (snapbrckord, sizeof(snapbrckord) - 1, + "vol%ld.brick%ld.order", i+1, j); + + ret = dict_get_int64 (src, snapbrckord, &brick_order); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get brick order"); + goto out; + } + + /* Adding the data in the dst dict */ + snprintf (key, sizeof(key) - 1, + "vol%ld.brickdir%ld", i+1, brick_order); + + tmpstr = gf_strdup (snap_brick_dir); + if (!tmpstr) { + gf_log (this->name, GF_LOG_ERROR, + "Out Of Memory"); + ret = -1; + goto out; + } + ret = dict_set_dynstr (dst, key, tmpstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set %s", key); + GF_FREE (tmpstr); + goto out; + } + + snprintf (key, sizeof(key) - 1, + "vol%ld.brick_snapdevice%ld", + i+1, brick_order); + + tmpstr = gf_strdup (snap_device); + if (!tmpstr) { + ret = -1; + goto out; + } + ret = dict_set_dynstr (dst, key, tmpstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set %s", key); + GF_FREE (tmpstr); + goto out; + } + } + } + + ret = 0; +out: + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_snap_pre_validate_use_rsp_dict (dict_t *dst, dict_t *src) +{ + int ret = -1; + int32_t snap_command = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + if (!dst || !src) { + gf_log (this->name, GF_LOG_ERROR, "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32 (dst, "type", &snap_command); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case GF_SNAP_OPTION_TYPE_CREATE: + ret = glusterd_snap_create_pre_val_use_rsp_dict (dst, src); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to use " + "rsp dict"); + goto out; + } + break; + default: + break; + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_snapshot_create_prevalidate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + char *volname = NULL; + char *snapname = NULL; + char *device = NULL; + char *tmpstr = NULL; + char *brick_dir = NULL; + char snap_brick_dir[PATH_MAX] = ""; + char *mnt_pt = NULL; + char key[PATH_MAX] = ""; + char snap_mount[PATH_MAX] = ""; + char snap_volname[64] = ""; + char err_str[PATH_MAX] = ""; + int ret = -1; + int64_t i = 0; + int64_t volcount = 0; + int64_t brick_count = 0; + int64_t brick_order = 0; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + uuid_t *snap_volid = NULL; + gf_loglevel_t loglevel = GF_LOG_ERROR; + glusterd_conf_t *conf = NULL; + int64_t effective_max_limit = 0; + + this = THIS; + GF_ASSERT (op_errstr); + conf = this->private; + GF_ASSERT (conf); + + ret = dict_get_int64 (dict, "volcount", &volcount); + if (ret) { + snprintf (err_str, sizeof (err_str), "Failed to " + "get the volume count"); + goto out; + } + if (volcount <= 0) { + snprintf (err_str, sizeof (err_str), "Invalid volume count %ld " + "supplied", volcount); + ret = -1; + goto out; + } + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + snprintf (err_str, sizeof (err_str), "Failed to get snapname"); + goto out; + } + + if (glusterd_find_snap_by_name (snapname)) { + ret = -1; + snprintf (err_str, sizeof (err_str), "Snap %s already exists", + snapname); + goto out; + } + + for (i = 1; i <= volcount; i++) { + snprintf (key, sizeof (key), "volname%ld", i); + ret = dict_get_str (dict, key, &volname); + if (ret) { + snprintf (err_str, sizeof (err_str), + "failed to get volume name"); + goto out; + } + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, sizeof (err_str), + "Volume (%s) does not exist ", volname); + goto out; + } + + ret = -1; + if (!glusterd_is_volume_started (volinfo)) { + snprintf (err_str, sizeof (err_str), "volume %s is " + "not started", volinfo->volname); + loglevel = GF_LOG_WARNING; + goto out; + } + if (glusterd_is_defrag_on (volinfo)) { + snprintf (err_str, sizeof (err_str), + "rebalance process is running for the " + "volume %s", volname); + loglevel = GF_LOG_WARNING; + goto out; + } + /* TODO: Also check whether geo replication is running */ + + if (volinfo->is_snap_volume == _gf_true) { + snprintf (err_str, sizeof (err_str), + "Volume %s is a snap volume", volname); + loglevel = GF_LOG_WARNING; + goto out; + } + + if (volinfo->snap_max_hard_limit < conf->snap_max_hard_limit) + effective_max_limit = volinfo->snap_max_hard_limit; + else + effective_max_limit = conf->snap_max_hard_limit; + + if (volinfo->snap_count >= effective_max_limit) { + snprintf (err_str, sizeof (err_str), + "The number of existing snaps has reached " + "the effective maximum limit of %"PRIu64" ," + "for the volume %s", effective_max_limit, + volname); + loglevel = GF_LOG_WARNING; + goto out; + } + + snprintf (key, sizeof(key) - 1, "vol%ld_volid", i); + ret = dict_get_bin (dict, key, (void **)&snap_volid); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch snap_volid"); + goto out; + } + + /* snap volume uuid is used as lvm snapshot name. + This will avoid restrictions on snapshot names + provided by user */ + GLUSTERD_GET_UUID_NOHYPHEN (snap_volname, *snap_volid); + + brick_count = 0; + brick_order = 0; + /* Adding snap bricks mount paths to the dict */ + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (uuid_compare (brickinfo->uuid, MY_UUID)) { + brick_order++; + continue; + } + + if (!glusterd_is_brick_started (brickinfo)) { + gf_log (this->name, GF_LOG_WARNING, + "brick %s:%s is not started", + brickinfo->hostname, + brickinfo->path); + brick_order++; + brick_count++; + continue; + } + + device = glusterd_get_brick_mount_details (brickinfo); + if (!device) { + snprintf (err_str, sizeof (err_str), + "getting device name for the brick " + "%s:%s failed", brickinfo->hostname, + brickinfo->path); + ret = -1; + goto out; + } + + device = glusterd_build_snap_device_path (device, + snap_volname); + if (!device) { + snprintf (err_str, sizeof (err_str), + "cannot copy the snapshot device " + "name (volname: %s, snapname: %s)", + volinfo->volname, snapname); + loglevel = GF_LOG_WARNING; + ret = -1; + goto out; + } + + snprintf (key, sizeof(key), + "vol%ld.brick_snapdevice%ld", i, + brick_count); + ret = dict_set_dynstr (rsp_dict, key, device); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set %s", key); + GF_FREE (device); + goto out; + } + + ret = glusterd_get_brick_root (brickinfo->path, + &mnt_pt); + if (ret) { + snprintf (err_str, sizeof (err_str), + "could not get the root of the brick path %s", + brickinfo->path); + loglevel = GF_LOG_WARNING; + goto out; + } + if (strncmp (brickinfo->path, mnt_pt, strlen(mnt_pt))) { + snprintf (err_str, sizeof (err_str), + "brick: %s brick mount: %s", + brickinfo->path, mnt_pt); + loglevel = GF_LOG_WARNING; + goto out; + } + + brick_dir = &brickinfo->path[strlen (mnt_pt)]; + brick_dir++; + + snprintf (snap_brick_dir, sizeof (snap_brick_dir), + "/%s", brick_dir); + + tmpstr = gf_strdup (snap_brick_dir); + if (!tmpstr) { + ret = -1; + goto out; + } + snprintf (key, sizeof(key), "vol%ld.brickdir%ld", i, + brick_count); + ret = dict_set_dynstr (rsp_dict, key, tmpstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set %s", snap_mount); + goto out; + } + tmpstr = NULL; + + snprintf (key, sizeof(key) - 1, "vol%ld.brick%ld.order", + i, brick_count); + ret = dict_set_int64 (rsp_dict, key, brick_order); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set %s", key); + goto out; + } + + brick_count++; + brick_order++; + } + snprintf (key, sizeof(key) - 1, "vol%ld_brickcount", i); + ret = dict_set_int64 (rsp_dict, key, brick_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set %s", + key); + goto out; + } + } + + ret = dict_set_int64 (rsp_dict, "volcount", volcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set volcount"); + goto out; + } + + ret = 0; +out: + if (ret) + GF_FREE (tmpstr); + + if (ret && err_str[0] != '\0') { + gf_log (this->name, loglevel, "%s", err_str); + *op_errstr = gf_strdup (err_str); + } + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +glusterd_snap_t* +glusterd_new_snap_object() +{ + glusterd_snap_t *snap = NULL; + + snap = GF_CALLOC (1, sizeof (*snap), gf_gld_mt_snap_t); + + if (snap) { + if (LOCK_INIT (&snap->lock)) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed initiating" + " snap lock"); + GF_FREE (snap); + return NULL; + } + + INIT_LIST_HEAD (&snap->snap_list); + INIT_LIST_HEAD (&snap->volumes); + snap->snapname[0] = 0; + snap->snap_status = GD_SNAP_STATUS_INIT; + } + + return snap; + +}; + +/* Function glusterd_list_add_snapvol adds the volinfo object (snapshot volume) + to the snapshot object list and to the parent volume list */ +int32_t +glusterd_list_add_snapvol (glusterd_volinfo_t *origin_vol, + glusterd_volinfo_t *snap_vol) +{ + int ret = -1; + glusterd_snap_t *snap = NULL; + + GF_VALIDATE_OR_GOTO ("glusterd", origin_vol, out); + GF_VALIDATE_OR_GOTO ("glusterd", snap_vol, out); + + snap = snap_vol->snapshot; + GF_ASSERT (snap); + + list_add_tail (&snap_vol->vol_list, &snap->volumes); + LOCK (&origin_vol->lock); + { + list_add_order (&snap_vol->snapvol_list, + &origin_vol->snap_volumes, + glusterd_compare_snap_vol_time); + origin_vol->snap_count++; + } + UNLOCK (&origin_vol->lock); + + gf_log (THIS->name, GF_LOG_DEBUG, "Snap %s added to the list", + snap->snapname); + ret = 0; + out: + return ret; +} + +glusterd_snap_t* +glusterd_find_snap_by_name (char *snapname) +{ + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + GF_ASSERT (priv); + GF_ASSERT (snapname); + + list_for_each_entry (snap, &priv->snapshots, snap_list) { + if (!strcmp (snap->snapname, snapname)) { + gf_log (THIS->name, GF_LOG_DEBUG, "Found " + "snap %s (%s)", snap->snapname, + uuid_utoa (snap->snap_id)); + goto out; + } + } + snap = NULL; +out: + return snap; +} + +glusterd_snap_t* +glusterd_find_snap_by_id (uuid_t snap_id) +{ + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + GF_ASSERT (priv); + + if (uuid_is_null(snap_id)) + goto out; + + list_for_each_entry (snap, &priv->snapshots, snap_list) { + if (!uuid_compare (snap->snap_id, snap_id)) { + gf_log (THIS->name, GF_LOG_DEBUG, "Found " + "snap %s (%s)", snap->snapname, + uuid_utoa (snap->snap_id)); + goto out; + } + } + snap = NULL; +out: + return snap; +} + +int +glusterd_do_lvm_snapshot_remove (glusterd_volinfo_t *snap_vol, + glusterd_brickinfo_t *brickinfo, + const char *mount_pt, const char *snap_device) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + runner_t runner = {0,}; + char msg[1024] = {0, }; + char pidfile[PATH_MAX] = {0, }; + pid_t pid = -1; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + if (!brickinfo) { + gf_log (this->name, GF_LOG_ERROR, "brickinfo NULL"); + goto out; + } + GF_ASSERT (snap_vol); + GF_ASSERT (mount_pt); + GF_ASSERT (snap_device); + + GLUSTERD_GET_BRICK_PIDFILE (pidfile, snap_vol, brickinfo, priv); + if (gf_is_service_running (pidfile, &pid)) { + ret = kill (pid, SIGKILL); + if (ret && errno != ESRCH) { + gf_log (this->name, GF_LOG_ERROR, "Unable to kill pid " + "%d reason : %s", pid, strerror(errno)); + goto out; + } + } + + runinit (&runner); + snprintf (msg, sizeof (msg), "umount the snapshot mounted path %s", + mount_pt); + runner_add_args (&runner, "umount", mount_pt, NULL); + runner_log (&runner, "", GF_LOG_DEBUG, msg); + + /* We need not do synclock_unlock => runner_run => synclock_lock here. + Because it is needed if we are running a glusterfs process in + runner_run, so that when the glusterfs process started wants to + communicate to glusterd, glusterd wont be able to respond if it + has held the big lock. So we do unlock, run glusterfs process + (thus communicate to glusterd), lock. But since this is not a + glusterfs command that is being run, unlocking and then relocking + is not needed. + */ + ret = runner_run (&runner); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "unmounting the " + "path %s (brick: %s) failed (%s)", mount_pt, + brickinfo->path, strerror (errno)); + goto out; + } + + runinit (&runner); + snprintf (msg, sizeof(msg), "remove snapshot of the brick %s:%s, " + "device: %s", brickinfo->hostname, brickinfo->path, + snap_device); + runner_add_args (&runner, "/sbin/lvremove", "-f", snap_device, NULL); + runner_log (&runner, "", GF_LOG_DEBUG, msg); + + ret = runner_run (&runner); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "removing snapshot of the " + "brick (%s:%s) of device %s failed", + brickinfo->hostname, brickinfo->path, snap_device); + goto out; + } + +out: + return ret; +} + +int32_t +glusterd_lvm_snapshot_remove (dict_t *rsp_dict, glusterd_volinfo_t *snap_vol) +{ + char *mnt_pt = NULL; + struct mntent *entry = NULL; + int32_t brick_count = -1; + int32_t ret = -1; + glusterd_brickinfo_t *brickinfo = NULL; + xlator_t *this = NULL; + FILE *mtab = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (snap_vol); + + if (!snap_vol) { + gf_log (this->name, GF_LOG_ERROR, "snap volinfo is NULL"); + goto out; + } + + brick_count = -1; + list_for_each_entry (brickinfo, &snap_vol->bricks, brick_list) { + brick_count++; + if (uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + + if (brickinfo->snap_status == -1) { + gf_log (this->name, GF_LOG_INFO, + "snapshot was pending. lvm not present " + "for brick %s:%s of the snap %s.", + brickinfo->hostname, brickinfo->path, + snap_vol->snapshot->snapname); + + /* Adding missed delete to the dict */ + ret = glusterd_add_missed_snaps_to_dict + (rsp_dict, + snap_vol->volname, + brickinfo, + brick_count + 1, + GF_SNAP_OPTION_TYPE_DELETE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snapshot info " + "for %s:%s in the rsp_dict", + brickinfo->hostname, + brickinfo->path); + goto out; + } + + continue; + } + + ret = glusterd_get_brick_root (brickinfo->path, &mnt_pt); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "getting the root " + "of the brick for volume %s (snap %s) failed ", + snap_vol->volname, snap_vol->snapshot->snapname); + goto out; + } + + entry = glusterd_get_mnt_entry_info (mnt_pt, mtab); + if (!entry) { + gf_log (this->name, GF_LOG_WARNING, "getting the mount" + " entry for the brick %s:%s of the snap %s " + "(volume: %s) failed", brickinfo->hostname, + brickinfo->path, snap_vol->snapshot->snapname, + snap_vol->volname); + ret = -1; + goto out; + } + ret = glusterd_do_lvm_snapshot_remove (snap_vol, brickinfo, + mnt_pt, + entry->mnt_fsname); + if (mtab) + endmntent (mtab); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "remove the snapshot %s (%s)", + brickinfo->path, entry->mnt_fsname); + goto out; + } + + } + + ret = 0; +out: + return ret; +} + + +int32_t +glusterd_snap_volume_remove (dict_t *rsp_dict, + glusterd_volinfo_t *snap_vol, + gf_boolean_t remove_lvm, + gf_boolean_t force) +{ + int ret = -1; + int save_ret = 0; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_volinfo_t *origin_vol = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (snap_vol); + + if (!snap_vol) { + gf_log(this->name, GF_LOG_WARNING, "snap_vol in NULL"); + ret = -1; + goto out; + } + + list_for_each_entry (brickinfo, &snap_vol->bricks, brick_list) { + if (uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + + ret = glusterd_brick_stop (snap_vol, brickinfo, _gf_false); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "Failed to stop " + "brick for volume %s", snap_vol->volname); + save_ret = ret; + + /* Continue to cleaning up the snap in case of error + if force flag is enabled */ + if (!force) + goto out; + } + } + + /* Only remove the backend lvm when required */ + if (remove_lvm) { + ret = glusterd_lvm_snapshot_remove (rsp_dict, snap_vol); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "Failed to remove " + "lvm snapshot volume %s", snap_vol->volname); + save_ret = ret; + if (!force) + goto out; + } + } + + ret = glusterd_store_delete_volume (snap_vol); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "Failed to remove volume %s " + "from store", snap_vol->volname); + save_ret = ret; + if (!force) + goto out; + } + + if (!list_empty(&snap_vol->snapvol_list)) { + ret = glusterd_volinfo_find (snap_vol->parent_volname, + &origin_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "parent volinfo %s for volume %s", + snap_vol->parent_volname, snap_vol->volname); + save_ret = ret; + if (!force) + goto out; + } + origin_vol->snap_count--; + } + + ret = glusterd_volinfo_delete (snap_vol); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "Failed to remove volinfo " + "%s ", snap_vol->volname); + save_ret = ret; + if (!force) + goto out; + } + + if (save_ret) + ret = save_ret; +out: + gf_log (this->name, GF_LOG_TRACE, "returning %d", ret); + return ret; +} + +int32_t +glusterd_snapobject_delete (glusterd_snap_t *snap) +{ + if (snap == NULL) { + gf_log(THIS->name, GF_LOG_WARNING, "snap is NULL"); + return -1; + } + + list_del_init (&snap->snap_list); + list_del_init (&snap->volumes); + if (LOCK_DESTROY(&snap->lock)) + gf_log (THIS->name, GF_LOG_WARNING, "Failed destroying lock" + "of snap %s", snap->snapname); + + GF_FREE (snap->description); + GF_FREE (snap); + + return 0; +} + +int32_t +glusterd_snap_remove (dict_t *rsp_dict, + glusterd_snap_t *snap, + gf_boolean_t remove_lvm, + gf_boolean_t force) +{ + int ret = -1; + int save_ret = 0; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *tmp = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (snap); + + if (!snap) { + gf_log(this->name, GF_LOG_WARNING, "snap is NULL"); + ret = -1; + goto out; + } + + list_for_each_entry_safe (snap_vol, tmp, &snap->volumes, vol_list) { + ret = glusterd_snap_volume_remove (rsp_dict, snap_vol, + remove_lvm, force); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "Failed to remove " + "volinfo %s for snap %s", snap_vol->volname, + snap->snapname); + save_ret = ret; + + /* Continue to cleaning up the snap in case of error + if force flag is enabled */ + if (!force) + goto out; + } + } + + ret = glusterd_store_delete_snap (snap); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "Failed to remove snap %s " + "from store", snap->snapname); + save_ret = ret; + if (!force) + goto out; + } + + ret = glusterd_snapobject_delete (snap); + if (ret) + gf_log (this->name, GF_LOG_WARNING, "Failed to delete " + "snap object %s", snap->snapname); + + if (save_ret) + ret = save_ret; +out: + gf_log (THIS->name, GF_LOG_TRACE, "returning %d", ret); + return ret; +} + +static int +glusterd_snapshot_get_snapvol_detail (dict_t *dict, + glusterd_volinfo_t *snap_vol, + char *keyprefix, int detail) +{ + int ret = -1; + int snap_limit = 0; + char key[PATH_MAX] = {0,}; + char *value = NULL; + glusterd_volinfo_t *origin_vol = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + + this = THIS; + conf = this->private; + GF_ASSERT (conf); + + GF_ASSERT (dict); + GF_ASSERT (snap_vol); + GF_ASSERT (keyprefix); + + /* Volume Name */ + value = gf_strdup (snap_vol->volname); + if (!value) + goto out; + + snprintf (key, sizeof (key), "%s.volname", keyprefix); + ret = dict_set_dynstr (dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "volume name in dictionary: %s", key); + goto out; + } + + /* Volume ID */ + value = gf_strdup (uuid_utoa (snap_vol->volume_id)); + if (NULL == value) { + ret = -1; + goto out; + } + + snprintf (key, sizeof (key), "%s.vol-id", keyprefix); + ret = dict_set_dynstr (dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "volume id in dictionary: %s", key); + goto out; + } + value = NULL; + + /* volume status */ + snprintf (key, sizeof (key), "%s.vol-status", keyprefix); + switch (snap_vol->status) { + case GLUSTERD_STATUS_STARTED: + ret = dict_set_str (dict, key, "Started"); + break; + case GLUSTERD_STATUS_STOPPED: + ret = dict_set_str (dict, key, "Stopped"); + break; + case GD_SNAP_STATUS_NONE: + ret = dict_set_str (dict, key, "None"); + break; + default: + gf_log (this->name, GF_LOG_ERROR, "Invalid volume status"); + ret = -1; + goto out; + } + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set volume status" + " in dictionary: %s", key); + goto out; + } + + + ret = glusterd_volinfo_find (snap_vol->parent_volname, &origin_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to get the parent " + "volinfo for the volume %s", snap_vol->volname); + goto out; + } + + /* Snaps available */ + if (conf->snap_max_hard_limit < origin_vol->snap_max_hard_limit) { + snap_limit = conf->snap_max_hard_limit; + gf_log(this->name, GF_LOG_DEBUG, "system snap-max-hard-limit is" + " lesser than volume snap-max-hard-limit, " + "snap-max-hard-limit value is set to %d", snap_limit); + } else { + snap_limit = origin_vol->snap_max_hard_limit; + gf_log(this->name, GF_LOG_DEBUG, "volume snap-max-hard-limit is" + " lesser than system snap-max-hard-limit, " + "snap-max-hard-limit value is set to %d", snap_limit); + } + + snprintf (key, sizeof (key), "%s.snaps-available", keyprefix); + if (snap_limit > origin_vol->snap_count) + ret = dict_set_int32 (dict, key, + snap_limit - origin_vol->snap_count); + else + ret = dict_set_int32 (dict, key, 0); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set available snaps"); + goto out; + } + + snprintf (key, sizeof (key), "%s.snapcount", keyprefix); + ret = dict_set_int32 (dict, key, origin_vol->snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not save snapcount"); + goto out; + } + + if (!detail) + goto out; + + /* Parent volume name */ + value = gf_strdup (snap_vol->parent_volname); + if (!value) + goto out; + + snprintf (key, sizeof (key), "%s.origin-volname", keyprefix); + ret = dict_set_dynstr (dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set parent " + "volume name in dictionary: %s", key); + goto out; + } + value = NULL; + + ret = 0; +out: + if (value) + GF_FREE (value); + + return ret; +} + +static int +glusterd_snapshot_get_snap_detail (dict_t *dict, glusterd_snap_t *snap, + char *keyprefix, glusterd_volinfo_t *volinfo) +{ + int ret = -1; + int volcount = 0; + char key[PATH_MAX] = {0,}; + char *value = NULL; + char *timestr = NULL; + struct tm *tmptr = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *tmp_vol = NULL; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT (dict); + GF_ASSERT (snap); + GF_ASSERT (keyprefix); + + /* Snap Name */ + value = gf_strdup (snap->snapname); + if (!value) + goto out; + + snprintf (key, sizeof (key), "%s.snapname", keyprefix); + ret = dict_set_dynstr (dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "snap name in dictionary"); + goto out; + } + + /* Snap ID */ + value = gf_strdup (uuid_utoa (snap->snap_id)); + if (NULL == value) { + ret = -1; + goto out; + } + + snprintf (key, sizeof (key), "%s.snap-id", keyprefix); + ret = dict_set_dynstr (dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "snap id in dictionary"); + goto out; + } + value = NULL; + + tmptr = localtime (&(snap->time_stamp)); + if (NULL == tmptr) { + gf_log (this->name, GF_LOG_ERROR, "Failed to convert " + "time_t to *tm"); + ret = -1; + goto out; + } + + timestr = GF_CALLOC (1, PATH_MAX, gf_gld_mt_char); + if (NULL == timestr) { + ret = -1; + goto out; + } + + ret = strftime (timestr, PATH_MAX, "%Y-%m-%d %H:%M:%S", tmptr); + if (0 == ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to convert time_t " + "to string"); + ret = -1; + goto out; + } + + snprintf (key, sizeof (key), "%s.snap-time", keyprefix); + ret = dict_set_dynstr (dict, key, timestr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "snap time stamp in dictionary"); + goto out; + } + timestr = NULL; + + /* If snap description is provided then add that into dictionary */ + if (NULL != snap->description) { + value = gf_strdup (snap->description); + if (NULL == value) { + ret = -1; + goto out; + } + + snprintf (key, sizeof (key), "%s.snap-desc", keyprefix); + ret = dict_set_dynstr (dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "snap description in dictionary"); + goto out; + } + value = NULL; + } + + snprintf (key, sizeof (key), "%s.snap-status", keyprefix); + switch (snap->snap_status) { + case GD_SNAP_STATUS_INIT: + ret = dict_set_str (dict, key, "Init"); + break; + case GD_SNAP_STATUS_IN_USE: + ret = dict_set_str (dict, key, "In-use"); + break; + case GD_SNAP_STATUS_DECOMMISSION: + ret = dict_set_str (dict, key, "Decommisioned"); + break; + case GD_SNAP_STATUS_RESTORED: + ret = dict_set_str (dict, key, "Restored"); + break; + case GD_SNAP_STATUS_NONE: + ret = dict_set_str (dict, key, "None"); + break; + default: + gf_log (this->name, GF_LOG_ERROR, "Invalid snap status"); + ret = -1; + goto out; + } + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snap status " + "in dictionary"); + goto out; + } + + if (volinfo) { + volcount = 1; + snprintf (key, sizeof (key), "%s.vol%d", keyprefix, volcount); + ret = glusterd_snapshot_get_snapvol_detail (dict, + volinfo, key, 0); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "get volume detail %s for snap %s", + snap_vol->volname, snap->snapname); + goto out; + } + goto done; + } + + list_for_each_entry_safe (snap_vol, tmp_vol, &snap->volumes, vol_list) { + volcount++; + snprintf (key, sizeof (key), "%s.vol%d", keyprefix, volcount); + ret = glusterd_snapshot_get_snapvol_detail (dict, + snap_vol, key, 1); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "get volume detail %s for snap %s", + snap_vol->volname, snap->snapname); + goto out; + } + } + +done: + snprintf (key, sizeof (key), "%s.vol-count", keyprefix); + ret = dict_set_int32 (dict, key, volcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set %s", + key); + goto out; + } + + ret = 0; +out: + if (value) + GF_FREE (value); + + if (timestr) + GF_FREE(timestr); + + return ret; +} + +static int +glusterd_snapshot_get_all_snap_info (dict_t *dict) +{ + int ret = -1; + int snapcount = 0; + char key[PATH_MAX] = {0,}; + glusterd_snap_t *snap = NULL; + glusterd_snap_t *tmp_snap = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT (priv); + + /* General parameter validation */ + GF_ASSERT (dict); + + list_for_each_entry_safe (snap, tmp_snap, &priv->snapshots, snap_list) { + snapcount++; + snprintf (key, sizeof (key), "snap%d", snapcount); + ret = glusterd_snapshot_get_snap_detail (dict, snap, key, NULL); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "snapdetail for snap %s", snap->snapname); + goto out; + } + } + + ret = dict_set_int32 (dict, "snap-count", snapcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snapcount"); + goto out; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_snapshot_get_info_by_volume (dict_t *dict, char *volname, + char *err_str, size_t len) +{ + int ret = -1; + int snapcount = 0; + int snap_limit = 0; + char *value = NULL; + char key[PATH_MAX] = ""; + glusterd_volinfo_t *volinfo = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *tmp_vol = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + + this = THIS; + conf = this->private; + GF_ASSERT (conf); + + GF_ASSERT (dict); + GF_ASSERT (volname); + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, len, "Volume (%s) does not exist", volname); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + /* Snaps available */ + if (conf->snap_max_hard_limit < volinfo->snap_max_hard_limit) { + snap_limit = conf->snap_max_hard_limit; + gf_log(this->name, GF_LOG_DEBUG, "system snap-max-hard-limit is" + " lesser than volume snap-max-hard-limit, " + "snap-max-hard-limit value is set to %d", snap_limit); + } else { + snap_limit = volinfo->snap_max_hard_limit; + gf_log(this->name, GF_LOG_DEBUG, "volume snap-max-hard-limit is" + " lesser than system snap-max-hard-limit, " + "snap-max-hard-limit value is set to %d", snap_limit); + } + + if (snap_limit > volinfo->snap_count) + ret = dict_set_int32 (dict, "snaps-available", + snap_limit - volinfo->snap_count); + else + ret = dict_set_int32 (dict, "snaps-available", 0); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set available snaps"); + goto out; + } + + /* Origin volume name */ + value = gf_strdup (volinfo->volname); + if (!value) + goto out; + + ret = dict_set_dynstr (dict, "origin-volname", value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set parent " + "volume name in dictionary: %s", key); + goto out; + } + value = NULL; + + list_for_each_entry_safe (snap_vol, tmp_vol, &volinfo->snap_volumes, + snapvol_list) { + snapcount++; + snprintf (key, sizeof (key), "snap%d", snapcount); + ret = glusterd_snapshot_get_snap_detail (dict, + snap_vol->snapshot, + key, snap_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "snapdetail for snap %s", + snap_vol->snapshot->snapname); + goto out; + } + } + ret = dict_set_int32 (dict, "snap-count", snapcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snapcount"); + goto out; + } + + ret = 0; +out: + if (value) + GF_FREE (value); + + return ret; +} + +/* This function will be called from RPC handler routine. + * This function is responsible for getting the requested + * snapshot info into the dictionary. + * + * @param req RPC request object. Required for sending a response back. + * @param op glusterd operation. Required for sending a response back. + * @param dict pointer to dictionary which will contain both + * request and response key-pair values. + * @return -1 on error and 0 on success + */ +int +glusterd_handle_snapshot_info (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int ret = -1; + int8_t snap_driven = 1; + char *volname = NULL; + char *snapname = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + int32_t cmd = GF_SNAP_INFO_TYPE_ALL; + + this = THIS; + GF_ASSERT (this); + + GF_VALIDATE_OR_GOTO (this->name, req, out); + GF_VALIDATE_OR_GOTO (this->name, dict, out); + + + ret = dict_get_int32 (dict, "cmd", &cmd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get type " + "of snapshot info"); + goto out; + } + + switch (cmd) { + case GF_SNAP_INFO_TYPE_ALL: + { + ret = glusterd_snapshot_get_all_snap_info (dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get info of all snaps"); + goto out; + } + break; + } + + case GF_SNAP_INFO_TYPE_SNAP: + { + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get snap name"); + goto out; + } + + ret = dict_set_int32 (dict, "snap-count", 1); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set snapcount"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + snprintf (err_str, len, + "Snap (%s) does not exist", snapname); + gf_log (this->name, GF_LOG_ERROR, + "%s", err_str); + ret = -1; + goto out; + } + ret = glusterd_snapshot_get_snap_detail (dict, snap, + "snap1", NULL); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get snap detail of snap " + "%s", snap->snapname); + goto out; + } + break; + } + + case GF_SNAP_INFO_TYPE_VOL: + { + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get volname"); + goto out; + } + ret = glusterd_snapshot_get_info_by_volume (dict, + volname, err_str, len); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get volume info of volume " + "%s", volname); + goto out; + } + snap_driven = 0; + break; + } + } + + ret = dict_set_int8 (dict, "snap-driven", snap_driven); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snap-driven"); + goto out; + } + + /* If everything is successful then send the response back to cli. + * In case of failure the caller of this function will take care + of the response */ + ret = glusterd_op_send_cli_response (op, 0, 0, req, dict, err_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to send cli " + "response"); + goto out; + } + + ret = 0; + +out: + return ret; +} + +/* This function sets all the snapshot names in the dictionary */ +int +glusterd_snapshot_get_all_snapnames (dict_t *dict) +{ + int ret = -1; + int snapcount = 0; + char *snapname = NULL; + char key[PATH_MAX] = {0,}; + glusterd_snap_t *snap = NULL; + glusterd_snap_t *tmp_snap = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (dict); + + list_for_each_entry_safe (snap, tmp_snap, &priv->snapshots, snap_list) { + snapcount++; + snapname = gf_strdup (snap->snapname); + if (!snapname) { + gf_log (this->name, GF_LOG_ERROR, "strdup failed"); + ret = -1; + goto out; + } + snprintf (key, sizeof (key), "snapname%d", snapcount); + ret = dict_set_dynstr (dict, key, snapname); + if (ret) { + GF_FREE (snapname); + gf_log (this->name, GF_LOG_ERROR, "Failed to set %s", + key); + goto out; + } + } + + ret = dict_set_int32 (dict, "snap-count", snapcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snapcount"); + goto out; + } + + ret = 0; +out: + + return ret; +} + +/* This function sets all the snapshot names + under a given volume in the dictionary */ +int +glusterd_snapshot_get_vol_snapnames (dict_t *dict, glusterd_volinfo_t *volinfo) +{ + int ret = -1; + int snapcount = 0; + char *snapname = NULL; + char key[PATH_MAX] = {0,}; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *tmp_vol = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (dict); + GF_ASSERT (volinfo); + + list_for_each_entry_safe (snap_vol, tmp_vol, + &volinfo->snap_volumes, snapvol_list) { + snapcount++; + snapname = gf_strdup (snap_vol->snapshot->snapname); + if (!snapname) { + gf_log (this->name, GF_LOG_ERROR, + "strdup failed"); + ret = -1; + goto out; + } + snprintf (key, sizeof (key), "snapname%d", snapcount); + ret = dict_set_dynstr (dict, key, snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "set %s", key); + GF_FREE (snapname); + goto out; + } + } + + ret = dict_set_int32 (dict, "snap-count", snapcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snapcount"); + goto out; + } + + ret = 0; +out: + + return ret; +} + +int +glusterd_handle_snapshot_list (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int ret = -1; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + + GF_VALIDATE_OR_GOTO (this->name, req, out); + GF_VALIDATE_OR_GOTO (this->name, dict, out); + + /* Ignore error for getting volname as it is optional */ + ret = dict_get_str (dict, "volname", &volname); + + if (NULL == volname) { + ret = glusterd_snapshot_get_all_snapnames (dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get snapshot list"); + goto out; + } + } else { + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, len, + "Volume (%s) does not exist", volname); + gf_log (this->name, GF_LOG_ERROR, + "%s", err_str); + goto out; + } + + ret = glusterd_snapshot_get_vol_snapnames (dict, volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get snapshot list for volume %s", + volname); + goto out; + } + } + + /* If everything is successful then send the response back to cli. + In case of failure the caller of this function will take of response.*/ + ret = glusterd_op_send_cli_response (op, 0, 0, req, dict, err_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to send cli " + "response"); + goto out; + } + + ret = 0; + +out: + return ret; +} + +/* This is a snapshot create handler function. This function will be + * executed in the originator node. This function is responsible for + * calling mgmt_v3 framework to do the actual snap creation on all the bricks + * + * @param req RPC request object + * @param op gluster operation + * @param dict dictionary containing snapshot restore request + * @param err_str In case of an err this string should be populated + * @param len length of err_str buffer + * + * @return Negative value on Failure and 0 in success + */ +int +glusterd_handle_snapshot_create (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int ret = -1; + char *volname = NULL; + char *snapname = NULL; + int64_t volcount = 0; + xlator_t *this = NULL; + char key[PATH_MAX] = ""; + char *username = NULL; + char *password = NULL; + uuid_t *uuid_ptr = NULL; + uuid_t tmp_uuid = {0}; + int i = 0; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (dict); + GF_ASSERT (err_str); + + ret = dict_get_int64 (dict, "volcount", &volcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "get the volume count"); + goto out; + } + if (volcount <= 0) { + gf_log (this->name, GF_LOG_ERROR, "Invalid volume count %ld " + "supplied", volcount); + ret = -1; + goto out; + } + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to get the snapname"); + goto out; + } + + if (strlen(snapname) >= GLUSTERD_MAX_SNAP_NAME) { + snprintf (err_str, len, "snapname cannot exceed 255 " + "characters"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + ret = -1; + goto out; + } + + uuid_ptr = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t); + if (!uuid_ptr) { + gf_log (this->name, GF_LOG_ERROR, "Out Of Memory"); + ret = -1; + goto out; + } + + uuid_generate (*uuid_ptr); + ret = dict_set_bin (dict, "snap-id", uuid_ptr, sizeof(uuid_t)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to set snap-id"); + GF_FREE (uuid_ptr); + goto out; + } + uuid_ptr = NULL; + + ret = dict_set_int64 (dict, "snap-time", (int64_t)time(NULL)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to set snap-time"); + goto out; + } + + for (i = 1; i <= volcount; i++) { + snprintf (key, sizeof (key), "volname%d", i); + ret = dict_get_str (dict, key, &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get volume name"); + goto out; + } + + /* generate internal username and password for the snap*/ + uuid_generate (tmp_uuid); + username = gf_strdup (uuid_utoa (tmp_uuid)); + snprintf (key, sizeof(key), "volume%d_username", i); + ret = dict_set_dynstr (dict, key, username); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snap " + "username for volume %s", volname); + GF_FREE (username); + goto out; + } + + uuid_generate (tmp_uuid); + password = gf_strdup (uuid_utoa (tmp_uuid)); + snprintf (key, sizeof(key), "volume%d_password", i); + ret = dict_set_dynstr (dict, key, password); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snap " + "password for volume %s", volname); + GF_FREE (password); + goto out; + } + + uuid_ptr = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t); + if (!uuid_ptr) { + gf_log (this->name, GF_LOG_ERROR, "Out Of Memory"); + ret = -1; + goto out; + } + + snprintf (key, sizeof(key) - 1, "vol%d_volid", i); + uuid_generate (*uuid_ptr); + ret = dict_set_bin (dict, key, uuid_ptr, sizeof(uuid_t)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set snap_volid"); + GF_FREE (uuid_ptr); + goto out; + } + } + + ret = glusterd_mgmt_v3_initiate_snap_phases (req, op, dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to initiate snap " + "phases"); + } + +out: + return ret; +} + +/* This is a snapshot status handler function. This function will be + * executed in a originator node. This function is responsible for + * calling mgmt v3 framework to get the actual snapshot status from + * all the bricks + * + * @param req RPC request object + * @param op gluster operation + * @param dict dictionary containing snapshot status request + * @param err_str In case of an err this string should be populated + * @param len length of err_str buffer + * + * return : 0 in case of success. + * -1 in case of failure. + * + */ +int +glusterd_handle_snapshot_status (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int ret = -1; + char *volname = NULL; + char *snapname = NULL; + char *buf = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + int32_t cmd = -1; + int i = 0; + dict_t *voldict = NULL; + char key[PATH_MAX] = ""; + glusterd_volinfo_t *volinfo = NULL; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *snap_volinfo = NULL; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + + GF_ASSERT (conf); + GF_ASSERT (req); + GF_ASSERT (dict); + GF_ASSERT (err_str); + + ret = dict_get_int32 (dict, "cmd", &cmd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not get status type"); + goto out; + } + switch (cmd) { + case GF_SNAP_STATUS_TYPE_ALL: + { + /* IF we give "gluster snapshot status" + * then lock is held on all snaps. + * This is the place where necessary information + * (snapname and snapcount)is populated in dictionary + * for locking. + */ + ++i; + list_for_each_entry (snap, &conf->snapshots, snap_list) + { + snprintf (key, sizeof (key), "snapname%d", i); + buf = gf_strdup (snap->snapname); + if (!buf) { + ret = -1; + goto out; + } + ret = dict_set_dynstr (dict, key, buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save snapname (%s) " + "in the dictionary", + snap->snapname); + GF_FREE (buf); + goto out; + } + + buf = NULL; + i++; + } + + ret = dict_set_int32 (dict, "snapcount", i - 1); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not " + "save snapcount in the dictionary"); + goto out; + } + break; + } + + case GF_SNAP_STATUS_TYPE_SNAP: + { + /* IF we give "gluster snapshot status <snapname>" + * then lock is held on single snap. + * This is the place where necessary information + * (snapname)is populated in dictionary + * for locking. + */ + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to fetch snap name"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + snprintf (err_str, len, "Snap (%s)" + "does not exist", snapname); + gf_log(this->name, GF_LOG_ERROR, + "%s", err_str); + ret = -1; + goto out; + } + break; + } + case GF_SNAP_STATUS_TYPE_VOL: + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to fetch volname"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, len, "Volume (%s) " + "does not exist", volname); + gf_log (this->name, GF_LOG_ERROR, + "%s", err_str); + goto out; + } + + i = 1; + list_for_each_entry (snap_volinfo, + &volinfo->snap_volumes, snapvol_list) { + snprintf (key, sizeof (key), "snapname%d", i); + + buf = gf_strdup + (snap_volinfo->snapshot->snapname); + if (!buf) { + ret = -1; + goto out; + } + + ret = dict_set_dynstr (dict, key, buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save snapname"); + GF_FREE (buf); + goto out; + } + + buf = NULL; + i++; + } + + ret = dict_set_int32 (dict, "snapcount", i-1); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save snapcount"); + goto out; + } + break; + default: + { + gf_log (this->name, GF_LOG_ERROR, "Unknown type"); + ret = -1; + goto out; + } + } + + /* Volume lock is not necessary for snapshot status, hence + * turning it off + */ + ret = dict_set_int8 (dict, "hold_vol_locks", 0); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Setting volume lock " + "flag failed"); + goto out; + } + + ret = glusterd_mgmt_v3_initiate_snap_phases (req, op, dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to initiate " + "snap phases"); + goto out; + } + + ret = 0; + +out: + if (voldict) { + dict_unref (voldict); + } + return ret; +} + + +/* This is a snapshot restore handler function. This function will be + * executed in the originator node. This function is responsible for + * calling mgmt_v3 framework to do the actual restore on all the bricks + * + * @param req RPC request object + * @param op gluster operation + * @param dict dictionary containing snapshot restore request + * @param err_str In case of an err this string should be populated + * @param len length of err_str buffer + * + * @return Negative value on Failure and 0 in success + */ +int +glusterd_handle_snapshot_restore (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int ret = -1; + char *snapname = NULL; + char *buf = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *snap_volinfo = NULL; + int32_t i = 0; + char key[PATH_MAX] = ""; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + + GF_ASSERT (conf); + GF_ASSERT (req); + GF_ASSERT (dict); + GF_ASSERT (err_str); + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "get snapname"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + snprintf (err_str, len, "Snap (%s) does not exist", snapname); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + ret = -1; + goto out; + } + + list_for_each_entry (snap_volinfo, &snap->volumes, vol_list) { + i++; + snprintf (key, sizeof (key), "volname%d", i); + buf = gf_strdup (snap_volinfo->parent_volname); + if (!buf) { + ret = -1; + goto out; + } + ret = dict_set_dynstr (dict, key, buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not set " + "parent volume name %s in the dict", + snap_volinfo->parent_volname); + GF_FREE (buf); + goto out; + } + buf = NULL; + } + + ret = dict_set_int32 (dict, "volcount", i); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save volume count"); + goto out; + } + + ret = glusterd_mgmt_v3_initiate_snap_phases (req, op, dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to initiate snap " + "phases"); + goto out; + } + + ret = 0; + +out: + return ret; +} + +glusterd_snap_t* +glusterd_create_snap_object (dict_t *dict, dict_t *rsp_dict) +{ + char *snapname = NULL; + uuid_t *snap_id = NULL; + char *description = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int ret = -1; + int64_t time_stamp = 0; + + this = THIS; + priv = this->private; + + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + + /* Fetch snapname, description, id and time from dict */ + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to fetch snapname"); + goto out; + } + + /* Ignore ret value for description*/ + ret = dict_get_str (dict, "description", &description); + + ret = dict_get_bin (dict, "snap-id", (void **)&snap_id); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to fetch snap_id"); + goto out; + } + + ret = dict_get_int64 (dict, "snap-time", &time_stamp); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to fetch snap-time"); + goto out; + } + if (time_stamp <= 0) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Invalid time-stamp: %ld", + time_stamp); + goto out; + } + + list_for_each_entry (snap, &priv->snapshots, snap_list) { + if (!strcmp (snap->snapname, snapname) || + !uuid_compare (snap->snap_id, *snap_id)) { + gf_log (THIS->name, GF_LOG_ERROR, + "Found duplicate snap %s (%s)", + snap->snapname, uuid_utoa (snap->snap_id)); + ret = -1; + break; + } + } + if (ret) { + snap = NULL; + goto out; + } + + snap = glusterd_new_snap_object (); + if (!snap) { + gf_log (this->name, GF_LOG_ERROR, "Could not create " + "the snap object for snap %s", snapname); + goto out; + } + + strcpy (snap->snapname, snapname); + uuid_copy (snap->snap_id, *snap_id); + snap->time_stamp = (time_t)time_stamp; + /* Set the status as GD_SNAP_STATUS_INIT and once the backend snapshot + is taken and snap is really ready to use, set the status to + GD_SNAP_STATUS_IN_USE. This helps in identifying the incomplete + snapshots and cleaning them up. + */ + snap->snap_status = GD_SNAP_STATUS_INIT; + if (description) { + snap->description = gf_strdup (description); + if (snap->description == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "Saving the Snap Description Failed"); + ret = -1; + goto out; + } + } + + ret = glusterd_store_snap (snap); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Could not store snap" + "object %s", snap->snapname); + goto out; + } + + list_add_order (&snap->snap_list, &priv->snapshots, + glusterd_compare_snap_time); + + gf_log (this->name, GF_LOG_TRACE, "Snap %s added to the list", + snap->snapname); + + ret = 0; + +out: + if (ret) { + if (snap) + glusterd_snap_remove (rsp_dict, snap, + _gf_true, _gf_true); + snap = NULL; + } + + return snap; +} + +/* Added missed_snap_entry to rsp_dict */ +int32_t +glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict, char *snap_uuid, + glusterd_brickinfo_t *brickinfo, + int32_t brick_number, int32_t op) +{ + char *buf = NULL; + char missed_snap_entry[PATH_MAX] = ""; + char name_buf[PATH_MAX] = ""; + int32_t missed_snap_count = -1; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (snap_uuid); + GF_ASSERT (brickinfo); + + snprintf (missed_snap_entry, sizeof(missed_snap_entry), + "%s:%s=%d:%s:%d:%d", uuid_utoa(brickinfo->uuid), + snap_uuid, brick_number, brickinfo->path, op, + GD_MISSED_SNAP_PENDING); + + buf = gf_strdup (missed_snap_entry); + if (!buf) { + ret = -1; + goto out; + } + + /* Fetch the missed_snap_count from the dict */ + ret = dict_get_int32 (rsp_dict, "missed_snap_count", + &missed_snap_count); + if (ret) { + /* Initialize the missed_snap_count for the first time */ + missed_snap_count = 0; + } + + /* Setting the missed_snap_entry in the rsp_dict */ + snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", + missed_snap_count); + ret = dict_set_dynstr (rsp_dict, name_buf, buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set missed_snap_entry (%s) " + "in the rsp_dict.", buf); + GF_FREE (buf); + goto out; + } + missed_snap_count++; + + /* Setting the new missed_snap_count in the dict */ + ret = dict_set_int32 (rsp_dict, "missed_snap_count", + missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set missed_snap_count for %s " + "in the rsp_dict.", missed_snap_entry); + goto out; + } + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* This function is called to get the device path of the snap lvm. Usually + if /dev/mapper/<group-name>-<lvm-name> is the device for the lvm, + then the snap device will be /dev/<group-name>/<snapname>. + This function takes care of building the path for the snap device. +*/ +char * +glusterd_build_snap_device_path (char *device, char *snapname) +{ + char snap[PATH_MAX] = ""; + char msg[1024] = ""; + char volgroup[PATH_MAX] = ""; + char *snap_device = NULL; + xlator_t *this = NULL; + runner_t runner = {0,}; + char *ptr = NULL; + int ret = -1; + + this = THIS; + GF_ASSERT (this); + if (!device) { + gf_log (this->name, GF_LOG_ERROR, "device is NULL"); + goto out; + } + if (!snapname) { + gf_log (this->name, GF_LOG_ERROR, "snapname is NULL"); + goto out; + } + + runinit (&runner); + runner_add_args (&runner, "/sbin/lvs", "--noheadings", "-o", "vg_name", + device, NULL); + runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); + snprintf (msg, sizeof (msg), "Get volume group for device %s", device); + runner_log (&runner, this->name, GF_LOG_DEBUG, msg); + ret = runner_start (&runner); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get volume group " + "for device %s", device); + runner_end (&runner); + goto out; + } + ptr = fgets(volgroup, sizeof(volgroup), + runner_chio (&runner, STDOUT_FILENO)); + if (!ptr || !strlen(volgroup)) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get volume group " + "for snap %s", snapname); + runner_end (&runner); + ret = -1; + goto out; + } + runner_end (&runner); + + snprintf (snap, sizeof(snap), "/dev/%s/%s", gf_trim(volgroup), + snapname); + snap_device = gf_strdup (snap); + if (!snap_device) { + gf_log (this->name, GF_LOG_WARNING, "Cannot copy the " + "snapshot device name for snapname: %s)", snapname); + } + +out: + return snap_device; +} + +/* This function actually calls the command (or the API) for taking the + snapshot of the backend brick filesystem. If this is successful, + then call the glusterd_snap_create function to create the snap object + for glusterd +*/ +char * +glusterd_take_lvm_snapshot (glusterd_volinfo_t *snap_vol, + glusterd_brickinfo_t *brickinfo) +{ + char msg[NAME_MAX] = ""; + char buf[PATH_MAX] = ""; + char *snap_device = NULL; + char *ptr = NULL; + char *device = NULL; + int ret = -1; + gf_boolean_t match = _gf_false; + runner_t runner = {0,}; + xlator_t *this = NULL; + + this = THIS; + + if (!brickinfo) { + gf_log (this->name, GF_LOG_ERROR, "brickinfo NULL"); + goto out; + } + + device = glusterd_get_brick_mount_details (brickinfo); + if (!device) { + gf_log (this->name, GF_LOG_ERROR, "getting device name for " + "the brick %s:%s failed", brickinfo->hostname, + brickinfo->path); + goto out; + } + + /* Figuring out if setactivationskip flag is supported or not */ + runinit (&runner); + snprintf (msg, sizeof (msg), "running lvcreate help"); + runner_add_args (&runner, "/sbin/lvcreate", "--help", NULL); + runner_log (&runner, "", GF_LOG_DEBUG, msg); + runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); + ret = runner_start (&runner); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to run lvcreate help"); + runner_end (&runner); + goto out; + } + + /* Looking for setactivationskip in lvcreate --help */ + do { + ptr = fgets(buf, sizeof(buf), + runner_chio (&runner, STDOUT_FILENO)); + if (ptr) { + if (strstr(buf, "setactivationskip")) { + match = _gf_true; + break; + } + } + } while (ptr != NULL); + runner_end (&runner); + + /* Takng the actual snapshot */ + runinit (&runner); + snprintf (msg, sizeof (msg), "taking snapshot of the brick %s:%s", + brickinfo->hostname, brickinfo->path); + if (match == _gf_true) + runner_add_args (&runner, "/sbin/lvcreate", "-s", device, + "--setactivationskip", "n", "--name", + snap_vol->volname, NULL); + else + runner_add_args (&runner, "/sbin/lvcreate", "-s", device, + "--name", snap_vol->volname, NULL); + runner_log (&runner, "", GF_LOG_DEBUG, msg); + ret = runner_start (&runner); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "taking snapshot of the " + "brick (%s:%s) of device %s failed", + brickinfo->hostname, brickinfo->path, device); + runner_end (&runner); + goto out; + } + runner_end (&runner); + + snap_device = glusterd_build_snap_device_path (device, + snap_vol->volname); + if (!snap_device) { + gf_log (this->name, GF_LOG_WARNING, "Cannot copy the snapshot " + "device name for snap %s (volume id: %s)", + snap_vol->snapshot->snapname, snap_vol->volname); + ret = -1; + goto out; + } + +out: + return snap_device; +} + +int32_t +glusterd_snap_brick_create (char *device, glusterd_volinfo_t *snap_volinfo, + glusterd_brickinfo_t *original_brickinfo, + int32_t brick_count, char *snap_brick_dir) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + char snap_brick_mount_path[PATH_MAX] = ""; + char snap_brick_path[PATH_MAX] = ""; + char msg[1024] = ""; + struct stat statbuf = {0, }; + runner_t runner = {0, }; + + this = THIS; + priv = this->private; + + GF_ASSERT (device); + GF_ASSERT (snap_volinfo); + GF_ASSERT (original_brickinfo); + GF_ASSERT (snap_brick_dir); + + snprintf (snap_brick_mount_path, sizeof (snap_brick_mount_path), + "%s/%s/brick%d", snap_mount_folder, snap_volinfo->volname, + brick_count+1); + + snprintf (snap_brick_path, sizeof (snap_brick_path), "%s%s", + snap_brick_mount_path, snap_brick_dir); + + ret = mkdir_p (snap_brick_mount_path, 0777, _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "creating the brick directory" + " %s for the snapshot %s(device: %s) failed", + snap_brick_mount_path, snap_volinfo->volname, device); + goto out; + } + /* mount the snap logical device on the directory inside + /run/gluster/snaps/<snapname>/@snap_brick_mount_path + Way to mount the snap brick via mount api is this. + ret = mount (device, snap_brick_mount_path, entry->mnt_type, + MS_MGC_VAL, "nouuid"); + But for now, mounting using runner apis. + */ + runinit (&runner); + snprintf (msg, sizeof (msg), "mounting snapshot of the brick %s:%s", + original_brickinfo->hostname, original_brickinfo->path); + runner_add_args (&runner, "mount", "-o", "nouuid", device, + snap_brick_mount_path, NULL); + runner_log (&runner, "", GF_LOG_DEBUG, msg); + + /* let glusterd get blocked till snapshot is over */ + synclock_unlock (&priv->big_lock); + ret = runner_run (&runner); + synclock_lock (&priv->big_lock); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "mounting the snapshot " + "logical device %s failed (error: %s)", device, + strerror (errno)); + goto out; + } else + gf_log (this->name, GF_LOG_DEBUG, "mounting the snapshot " + "logical device %s successful", device); + + ret = stat (snap_brick_path, &statbuf); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "stat of the brick %s" + "(brick mount: %s) failed (%s)", snap_brick_path, + snap_brick_mount_path, strerror (errno)); + goto out; + } + ret = sys_lsetxattr (snap_brick_path, + GF_XATTR_VOL_ID_KEY, + snap_volinfo->volume_id, 16, + XATTR_REPLACE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "extended attribute %s on %s. Reason: " + "%s, snap: %s", GF_XATTR_VOL_ID_KEY, + snap_brick_path, strerror (errno), + snap_volinfo->volname); + goto out; + } + +out: + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "unmounting the snap brick" + " mount %s", snap_brick_mount_path); + umount (snap_brick_mount_path); + } + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int32_t +glusterd_add_bricks_to_snap_volume (dict_t *dict, dict_t *rsp_dict, + glusterd_volinfo_t *snap_vol, + glusterd_brickinfo_t *original_brickinfo, + glusterd_brickinfo_t *snap_brickinfo, + char **snap_brick_dir, int64_t volcount, + int32_t brick_count) +{ + char key[PATH_MAX] = ""; + char snap_brick_path[PATH_MAX] = ""; + char *snap_device = NULL; + gf_boolean_t add_missed_snap = _gf_false; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + GF_ASSERT (snap_vol); + GF_ASSERT (original_brickinfo); + GF_ASSERT (snap_brickinfo); + GF_ASSERT (snap_brick_dir); + + snprintf (key, sizeof(key) - 1, "vol%ld.brickdir%d", volcount, + brick_count); + ret = dict_get_ptr (dict, key, (void **)snap_brick_dir); + if (ret) { + /* Using original brickinfo here because it will be a + * pending snapshot and storing the original brickinfo + * will help in mapping while recreating the missed snapshot + */ + gf_log (this->name, GF_LOG_WARNING, "Unable to fetch " + "snap mount path (%s). Using original brickinfo", key); + snap_brickinfo->snap_status = -1; + strcpy (snap_brick_path, original_brickinfo->path); + + /* In origiator node add snaps missed + * from different nodes to the dict + */ + if (is_origin_glusterd (dict) == _gf_true) + add_missed_snap = _gf_true; + } else { + /* Create brick-path in the format /var/run/gluster/snaps/ * + * <snap-uuid>/<original-brick#>/snap-brick-dir * + */ + snprintf (snap_brick_path, sizeof(snap_brick_path), + "%s/%s/brick%d%s", snap_mount_folder, + snap_vol->volname, brick_count+1, + *snap_brick_dir); + } + + if ((snap_brickinfo->snap_status != -1) && + (!uuid_compare (original_brickinfo->uuid, MY_UUID)) && + (!glusterd_is_brick_started (original_brickinfo))) { + /* In case if the brick goes down after prevalidate. */ + gf_log (this->name, GF_LOG_WARNING, "brick %s:%s is not" + " started (snap: %s)", + original_brickinfo->hostname, + original_brickinfo->path, + snap_vol->snapshot->snapname); + + snap_brickinfo->snap_status = -1; + strcpy (snap_brick_path, original_brickinfo->path); + add_missed_snap = _gf_true; + } + + if (add_missed_snap) { + ret = glusterd_add_missed_snaps_to_dict (rsp_dict, + snap_vol->volname, + original_brickinfo, + brick_count + 1, + GF_SNAP_OPTION_TYPE_CREATE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to add missed" + " snapshot info for %s:%s in the rsp_dict", + original_brickinfo->hostname, + original_brickinfo->path); + goto out; + } + } + + snprintf (key, sizeof(key), "vol%ld.brick_snapdevice%d", + volcount, brick_count); + ret = dict_get_ptr (dict, key, (void **)&snap_device); + if (ret) { + /* If the device name is empty, so will be the brick path + * Hence the missed snap has already been added above + */ + gf_log (this->name, GF_LOG_ERROR, "Unable to fetch " + "snap device (%s). Leaving empty", key); + } else + strcpy (snap_brickinfo->device_path, snap_device); + + ret = gf_canonicalize_path (snap_brick_path); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to canonicalize path"); + goto out; + } + + strcpy (snap_brickinfo->hostname, original_brickinfo->hostname); + strcpy (snap_brickinfo->path, snap_brick_path); + uuid_copy (snap_brickinfo->uuid, original_brickinfo->uuid); + list_add_tail (&snap_brickinfo->brick_list, &snap_vol->bricks); + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int32_t +glusterd_take_brick_snapshot (glusterd_volinfo_t *origin_vol, + glusterd_volinfo_t *snap_vol, dict_t *rsp_dict, + glusterd_brickinfo_t *original_brickinfo, + glusterd_brickinfo_t *snap_brickinfo, + char *snap_brick_dir, int32_t brick_count) +{ + char *device = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (origin_vol); + GF_ASSERT (snap_vol); + GF_ASSERT (rsp_dict); + GF_ASSERT (original_brickinfo); + GF_ASSERT (snap_brickinfo); + GF_ASSERT (snap_brick_dir); + + device = glusterd_take_lvm_snapshot (snap_vol, original_brickinfo); + /* Fail the snapshot even though snapshot on one of + the bricks fails. At the end when we check whether + the snapshot volume meets quorum or not, then the + the snapshot can either be treated as success, or + in case of failure we can undo the changes and return + failure to cli. */ + if (!device) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to take snapshot of %s:%s", + original_brickinfo->hostname, + original_brickinfo->path); + goto out; + } + + /* create the complete brick here */ + ret = glusterd_snap_brick_create (device, snap_vol, + original_brickinfo, + brick_count, snap_brick_dir); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "not able to" + " create the brickinfo for the snap %s" + ", volume %s", snap_vol->snapshot->snapname, + origin_vol->volname); + goto out; + } + +out: + if (device) + GF_FREE (device); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Look for disconnected peers, for missed snap creates or deletes */ +static int32_t +glusterd_find_missed_snap (dict_t *rsp_dict, glusterd_volinfo_t *vol, + char *snap_uuid, struct list_head *peers, + int32_t op) +{ + int32_t brick_count = -1; + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (peers); + GF_ASSERT (vol); + GF_ASSERT (snap_uuid); + + brick_count = 0; + list_for_each_entry (brickinfo, &vol->bricks, brick_list) { + if (!uuid_compare (brickinfo->uuid, MY_UUID)) { + /* If the brick belongs to the same node */ + brick_count++; + continue; + } + + list_for_each_entry (peerinfo, peers, uuid_list) { + if (uuid_compare (peerinfo->uuid, brickinfo->uuid)) { + /* If the brick doesnt belong to this peer */ + continue; + } + + /* Found peer who owns the brick, * + * if peer is not connected or not * + * friend add it to missed snap list */ + if (!(peerinfo->connected) || + (peerinfo->state.state != + GD_FRIEND_STATE_BEFRIENDED)) { + ret = glusterd_add_missed_snaps_to_dict + (rsp_dict, + snap_uuid, + brickinfo, + brick_count + 1, + op); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snapshot " + "info for %s:%s in the " + "rsp_dict", brickinfo->hostname, + brickinfo->path); + goto out; + } + } + } + brick_count++; + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +glusterd_volinfo_t * +glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap, + dict_t *dict, dict_t *rsp_dict, int64_t volcount) +{ + char key[PATH_MAX] = ""; + char *snap_brick_dir = NULL; + char *username = NULL; + char *password = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *snap_vol = NULL; + uuid_t *snap_volid = NULL; + int32_t ret = -1; + int32_t brick_count = 0; + glusterd_brickinfo_t *snap_brickinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (dict); + GF_ASSERT (origin_vol); + GF_ASSERT (rsp_dict); + + /* fetch username, password and vol_id from dict*/ + snprintf (key, sizeof(key), "volume%ld_username", volcount); + ret = dict_get_str (dict, key, &username); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get %s for " + "snap %s", key, snap->snapname); + goto out; + } + + snprintf (key, sizeof(key), "volume%ld_password", volcount); + ret = dict_get_str (dict, key, &password); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get %s for " + "snap %s", key, snap->snapname); + goto out; + } + + snprintf (key, sizeof(key) - 1, "vol%ld_volid", volcount); + ret = dict_get_bin (dict, key, (void **)&snap_volid); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch snap_volid"); + goto out; + } + + /* We are not setting the username and password here as + * we need to set the user name and password passed in + * the dictionary + */ + ret = glusterd_volinfo_dup (origin_vol, &snap_vol, _gf_false); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to duplicate volinfo " + "for the snapshot %s", snap->snapname); + goto out; + } + + /* uuid is used as lvm snapshot name. + This will avoid restrictions on snapshot names provided by user */ + GLUSTERD_GET_UUID_NOHYPHEN (snap_vol->volname, *snap_volid); + uuid_copy (snap_vol->volume_id, *snap_volid); + snap_vol->is_snap_volume = _gf_true; + strcpy (snap_vol->parent_volname, origin_vol->volname); + snap_vol->snapshot = snap; + + glusterd_auth_set_username (snap_vol, username); + glusterd_auth_set_password (snap_vol, password); + + /* Adding snap brickinfos to the snap volinfo */ + brick_count = 0; + list_for_each_entry (brickinfo, &origin_vol->bricks, brick_list) { + snap_brickinfo = NULL; + + ret = glusterd_brickinfo_new (&snap_brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "initializing the brick for the snap " + "volume failed (snapname: %s)", snap->snapname); + goto out; + } + + ret = glusterd_add_bricks_to_snap_volume (dict, rsp_dict, + snap_vol, + brickinfo, + snap_brickinfo, + &snap_brick_dir, + volcount, + brick_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add the snap brick for " + "%s:%s to the snap volume", + brickinfo->hostname, brickinfo->path); + GF_FREE (snap_brickinfo); + goto out; + } + + /*Update the brickid for the new brick in new volume*/ + GLUSTERD_ASSIGN_BRICKID_TO_BRICKINFO (snap_brickinfo, snap_vol, + brick_count); + + /* Take snapshot of the brick */ + if ((uuid_compare (brickinfo->uuid, MY_UUID)) || + (snap_brickinfo->snap_status == -1)) { + brick_count++; + continue; + } + + ret = glusterd_take_brick_snapshot (origin_vol, snap_vol, + rsp_dict, brickinfo, + snap_brickinfo, + snap_brick_dir, + brick_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to take snapshot for %s:%s", + brickinfo->hostname, brickinfo->path); + goto out; + } + brick_count++; + } + + /*TODO: the quorum check of the snap volume here */ + + ret = glusterd_store_volinfo (snap_vol, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to store snapshot " + "volinfo (%s) for snap %s", snap_vol->volname, + snap->snapname); + goto out; + } + + ret = generate_brick_volfiles (snap_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "generating the brick " + "volfiles for the snap %s (volume: %s) failed", + snap->snapname, origin_vol->volname); + goto out; + } + + ret = generate_client_volfiles (snap_vol, GF_CLIENT_TRUSTED); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "generating the trusted " + "client volfiles for the snap %s (volume: %s) failed", + snap->snapname, origin_vol->volname); + goto out; + } + + ret = generate_client_volfiles (snap_vol, GF_CLIENT_OTHER); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "generating the client " + "volfiles for the snap %s (volume: %s) failed", + snap->snapname, origin_vol->volname); + goto out; + } + + ret = glusterd_list_add_snapvol (origin_vol, snap_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "could not add the snap " + "volume %s to the list", snap_vol->volname); + goto out; + } + + list_for_each_entry (brickinfo, &snap_vol->bricks, brick_list) { + if (uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + + if (brickinfo->snap_status == -1) { + gf_log (this->name, GF_LOG_INFO, + "not starting snap brick %s:%s for " + "for the snap %s (volume: %s)", + brickinfo->hostname, brickinfo->path, + snap->snapname, origin_vol->volname); + continue; + } + + ret = glusterd_brick_start (snap_vol, brickinfo, _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "starting the " + "brick %s:%s for the snap %s (volume: %s) " + "failed", brickinfo->hostname, brickinfo->path, + snap->snapname, origin_vol->volname); + goto out; + } + } + + snap_vol->status = GLUSTERD_STATUS_STARTED; + ret = glusterd_store_volinfo (snap_vol, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to store snap volinfo"); + goto out; + } + +out: + if (ret) { + if (snap_vol) + glusterd_snap_volume_remove (rsp_dict, snap_vol, + _gf_true, _gf_true); + snap_vol = NULL; + } + + return snap_vol; +} + +/* This is a snapshot remove handler function. This function will be + * executed in the originator node. This function is responsible for + * calling mgmt v3 framework to do the actual remove on all the bricks + * + * @param req RPC request object + * @param op gluster operation + * @param dict dictionary containing snapshot remove request + * @param err_str In case of an err this string should be populated + * @param len length of err_str buffer + * + * @return Negative value on Failure and 0 in success + */ +int +glusterd_handle_snapshot_remove (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int ret = -1; + int64_t volcount = 0; + char *snapname = NULL; + char *volname = NULL; + char key[PATH_MAX] = ""; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *tmp = NULL; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT (req); + GF_ASSERT (dict); + GF_ASSERT (err_str); + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get snapname"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + snprintf (err_str, len, "Snap (%s) does not exist", snapname); + gf_log (this->name, GF_LOG_ERROR, + "%s", err_str); + ret = -1; + goto out; + } + + /* Set volnames in the dict to get mgmt_v3 lock */ + list_for_each_entry_safe (snap_vol, tmp, &snap->volumes, vol_list) { + volcount++; + volname = gf_strdup (snap_vol->parent_volname); + if (!volname) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "strdup failed"); + goto out; + } + + snprintf (key, sizeof (key), "volname%ld", volcount); + ret = dict_set_dynstr (dict, key, volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "volume name in dictionary"); + GF_FREE (volname); + goto out; + } + volname = NULL; + } + ret = dict_set_int64 (dict, "volcount", volcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set volcount"); + goto out; + } + + ret = glusterd_mgmt_v3_initiate_snap_phases (req, op, dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to initiate snap " + "phases"); + goto out; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_snapshot_remove_prevalidate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int32_t ret = -1; + char *snapname = NULL; + xlator_t *this = NULL; + glusterd_snap_t *snap = NULL; + + this = THIS; + + if (!dict || !op_errstr) { + gf_log (this->name, GF_LOG_ERROR, "input parameters NULL"); + goto out; + } + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Getting the snap name " + "failed"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + gf_log (this->name, GF_LOG_ERROR, "Snap %s does not exist", + snapname); + ret = -1; + goto out; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_snapshot_status_prevalidate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int ret = -1; + char *snapname = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + int32_t cmd = -1; + glusterd_volinfo_t *volinfo = NULL; + char *volname = NULL; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + + GF_ASSERT (conf); + GF_ASSERT (op_errstr); + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, "Input dict is NULL"); + goto out; + } + + ret = dict_get_int32 (dict, "cmd", &cmd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not fetch status cmd"); + goto out; + } + + switch (cmd) { + case GF_SNAP_STATUS_TYPE_ALL: + { + break; + } + case GF_SNAP_STATUS_TYPE_SNAP: + { + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not fetch snapname"); + goto out; + } + + if (!glusterd_find_snap_by_name (snapname)) { + ret = gf_asprintf (op_errstr, "Snap (%s) " + "not found", snapname); + if (ret < 0) { + goto out; + } + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Snap (%s) " + "not found", snapname); + goto out; + } + break; + } + case GF_SNAP_STATUS_TYPE_VOL: + { + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not fetch volname"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + ret = gf_asprintf (op_errstr, "Volume (%s)" + "not found", volname); + if (ret < 0) { + goto out; + } + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Volume " + "%s not present", volname); + goto out; + } + break; + + } + default: + { + gf_log (this->name, GF_LOG_ERROR, "Invalid command"); + break; + } + } + ret = 0; + +out: + return ret; +} + +int32_t +glusterd_snapshot_remove_commit (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int32_t ret = -1; + char *snapname = NULL; + char *dup_snapname = NULL; + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *snap_volinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + GF_ASSERT (op_errstr); + + priv = this->private; + GF_ASSERT (priv); + + if (!dict || !op_errstr) { + gf_log (this->name, GF_LOG_ERROR, "input parameters NULL"); + goto out; + } + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Getting the snap name " + "failed"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + gf_log (this->name, GF_LOG_ERROR, "Snap %s does not exist", + snapname); + ret = -1; + goto out; + } + + if (is_origin_glusterd (dict) == _gf_true) { + /* TODO : As of now there is only volume in snapshot. + * Change this when multiple volume snapshot is introduced + */ + snap_volinfo = list_entry (snap->volumes.next, + glusterd_volinfo_t, + vol_list); + if (!snap_volinfo) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch snap_volinfo"); + ret = -1; + goto out; + } + + /* From origin glusterd check if * + * any peers with snap bricks is down */ + ret = glusterd_find_missed_snap (rsp_dict, snap_volinfo, + snap_volinfo->volname, + &priv->peers, + GF_SNAP_OPTION_TYPE_DELETE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to find missed snap deletes"); + goto out; + } + } + + ret = glusterd_snap_remove (rsp_dict, snap, _gf_true, _gf_false); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to remove snap %s", + snapname); + goto out; + } + + dup_snapname = gf_strdup (snapname); + if (!dup_snapname) { + gf_log (this->name, GF_LOG_ERROR, "Strdup failed"); + ret = -1; + goto out; + } + + ret = dict_set_dynstr (rsp_dict, "snapname", dup_snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set the snapname"); + GF_FREE (dup_snapname); + goto out; + } + + ret = 0; +out: + return ret; +} + +int32_t +glusterd_do_snap_cleanup (dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + int32_t ret = -1; + char *name = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_snap_t *snap = NULL; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); + + if (!dict || !op_errstr) { + gf_log (this->name, GF_LOG_ERROR, "input parameters NULL"); + goto out; + } + + ret = dict_get_str (dict, "snapname", &name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "getting the snap " + "name failed (volume: %s)", volinfo->volname); + goto out; + } + + /* + If the snapname is not found that means the failure happened at + staging, or in commit, before the snap object is created, in which + case there is nothing to cleanup. So set ret to 0. + */ + snap = glusterd_find_snap_by_name (name); + if (!snap) { + gf_log (this->name, GF_LOG_INFO, "snap %s is not found", name); + ret = 0; + goto out; + } + + ret = glusterd_snap_remove (rsp_dict, snap, _gf_true, _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "removing the snap %s failed", + name); + goto out; + } + + name = NULL; + + ret = 0; + +out: + + return ret; +} + +/* In case of a successful, delete or create operation, during post_validate * + * look for missed snap operations and update the missed snap lists */ +int32_t +glusterd_snapshot_update_snaps_post_validate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int32_t ret = -1; + int32_t missed_snap_count = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + GF_ASSERT (op_errstr); + + ret = dict_get_int32 (dict, "missed_snap_count", + &missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "No missed snaps"); + ret = 0; + goto out; + } + + ret = glusterd_store_update_missed_snaps (dict, missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to update missed_snaps_list"); + goto out; + } + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_snapshot_create_commit (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int ret = -1; + int64_t i = 0; + int64_t volcount = 0; + char *snapname = NULL; + char *volname = NULL; + char *tmp_name = NULL; + char key[PATH_MAX] = ""; + xlator_t *this = NULL; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *origin_vol = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + GF_ASSERT(rsp_dict); + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_int64 (dict, "volcount", &volcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "get the volume count"); + goto out; + } + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to fetch snapname"); + goto out; + } + tmp_name = gf_strdup (snapname); + if (!tmp_name) { + gf_log (this->name, GF_LOG_ERROR, "Out of memory"); + ret = -1; + goto out; + } + + ret = dict_set_dynstr (rsp_dict, "snapname", tmp_name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set snapname in rsp_dict"); + GF_FREE (tmp_name); + goto out; + } + tmp_name = NULL; + + snap = glusterd_create_snap_object (dict, rsp_dict); + if (!snap) { + gf_log (this->name, GF_LOG_ERROR, "creating the" + "snap object %s failed", snapname); + ret = -1; + goto out; + } + + for (i = 1; i <= volcount; i++) { + snprintf (key, sizeof (key), "volname%ld", i); + ret = dict_get_str (dict, key, &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &origin_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get the volinfo for " + "the volume %s", volname); + goto out; + } + + /* TODO: Create a stub where the bricks are + added parallely by worker threads so that + the snap creating happens parallely. */ + snap_vol = glusterd_do_snap_vol (origin_vol, snap, dict, + rsp_dict, i); + if (!snap_vol) { + ret = -1; + gf_log (this->name, GF_LOG_WARNING, "taking the " + "snapshot of the volume %s failed", volname); + goto out; + } + } + + snap->snap_status = GD_SNAP_STATUS_IN_USE; + ret = glusterd_store_snap (snap); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Could not store snap" + "object %s", snap->snapname); + goto out; + } + + ret = 0; + +out: + if (ret) { + if (snap) + glusterd_snap_remove (rsp_dict, snap, + _gf_true, _gf_true); + snap = NULL; + } + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +snap_max_hard_limit_set_commit (dict_t *dict, uint64_t value, + char *volname, char **op_errstr) +{ + char err_str[PATH_MAX] = ""; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (volname); + GF_ASSERT (op_errstr); + + conf = this->private; + + GF_ASSERT (conf); + + /* TODO: Initiate auto deletion when there is a limit change */ + if (!volname) { + /* For system limit */ + conf->snap_max_hard_limit = value; + + ret = glusterd_store_global_info (this); + if (ret) { + snprintf (err_str, PATH_MAX, "Failed to store " + "snap-max-hard-limit for system"); + goto out; + } + } else { + /* For one volume */ + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, PATH_MAX, "Failed to get the" + " volinfo for volume %s", volname); + goto out; + } + + volinfo->snap_max_hard_limit = value; + + ret = glusterd_store_volinfo (volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + snprintf (err_str, PATH_MAX, "Failed to store " + "snap-max-hard-limit for volume %s", volname); + goto out; + } + } + + ret = 0; +out: + if (ret) { + *op_errstr = gf_strdup (err_str); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + } + return ret; +} + +int +snap_max_limits_display_commit (dict_t *rsp_dict, char *volname, + char **op_errstr) +{ + char err_str[PATH_MAX] = ""; + char buf[PATH_MAX] = ""; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + int ret = -1; + uint64_t active_hard_limit = 0; + uint64_t snap_max_limit = 0; + uint64_t soft_limit_value = -1; + uint64_t count = 0; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (volname); + GF_ASSERT (op_errstr); + + conf = this->private; + + GF_ASSERT (conf); + + if (!volname) { + /* For system limit */ + list_for_each_entry (volinfo, &conf->volumes, vol_list) { + if (volinfo->is_snap_volume == _gf_true) + continue; + snap_max_limit = volinfo->snap_max_hard_limit; + if (snap_max_limit > conf->snap_max_hard_limit) + active_hard_limit = conf->snap_max_hard_limit; + else + active_hard_limit = snap_max_limit; + soft_limit_value = (active_hard_limit * + conf->snap_max_soft_limit) / 100; + + snprintf (buf, sizeof(buf), "volume%ld-volname", count); + ret = dict_set_str (rsp_dict, buf, volinfo->volname); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-snap-max-hard-limit", count); + ret = dict_set_uint64 (rsp_dict, buf, snap_max_limit); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-active-hard-limit", count); + ret = dict_set_uint64 (rsp_dict, buf, + active_hard_limit); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-snap-max-soft-limit", count); + ret = dict_set_uint64 (rsp_dict, buf, soft_limit_value); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + count++; + } + + ret = dict_set_uint64 (rsp_dict, "voldisplaycount", count); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set voldisplaycount"); + goto out; + } + } else { + /* For one volume */ + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, PATH_MAX, "Failed to get the" + " volinfo for volume %s", volname); + goto out; + } + + snap_max_limit = volinfo->snap_max_hard_limit; + if (snap_max_limit > conf->snap_max_hard_limit) + active_hard_limit = conf->snap_max_hard_limit; + else + active_hard_limit = snap_max_limit; + + soft_limit_value = (active_hard_limit * + conf->snap_max_soft_limit) / 100; + + snprintf (buf, sizeof(buf), "volume%ld-volname", count); + ret = dict_set_str (rsp_dict, buf, volinfo->volname); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-snap-max-hard-limit", count); + ret = dict_set_uint64 (rsp_dict, buf, snap_max_limit); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-active-hard-limit", count); + ret = dict_set_uint64 (rsp_dict, buf, active_hard_limit); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-snap-max-soft-limit", count); + ret = dict_set_uint64 (rsp_dict, buf, soft_limit_value); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + + count++; + + ret = dict_set_uint64 (rsp_dict, "voldisplaycount", count); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set voldisplaycount"); + goto out; + } + + } + + ret = dict_set_uint64 (rsp_dict, "snap-max-hard-limit", + conf->snap_max_hard_limit); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set sys-snap-max-hard-limit "); + goto out; + } + + ret = dict_set_uint64 (rsp_dict, "snap-max-soft-limit", + conf->snap_max_soft_limit); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set sys-snap-max-hard-limit "); + goto out; + } + + ret = 0; +out: + if (ret) { + *op_errstr = gf_strdup (err_str); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + } + return ret; +} + +int +glusterd_snapshot_config_commit (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + char *volname = NULL; + xlator_t *this = NULL; + int ret = -1; + char err_str[PATH_MAX] = {0,}; + glusterd_conf_t *conf = NULL; + int config_command = 0; + uint64_t hard_limit = 0; + uint64_t soft_limit = 0; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + conf = this->private; + + GF_ASSERT (conf); + + ret = dict_get_int32 (dict, "config-command", &config_command); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get config-command type"); + goto out; + } + + /* Ignore the return value of the following dict_get, + * as they are optional + */ + ret = dict_get_str (dict, "volname", &volname); + + ret = dict_get_uint64 (dict, "snap-max-hard-limit", &hard_limit); + + ret = dict_get_uint64 (dict, "snap-max-soft-limit", &soft_limit); + + switch (config_command) { + case GF_SNAP_CONFIG_TYPE_SET: + if (hard_limit) { + /* Commit ops for snap-max-hard-limit */ + ret = snap_max_hard_limit_set_commit (dict, hard_limit, + volname, + op_errstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "snap-max-hard-limit set " + "commit failed."); + goto out; + } + } + + if (soft_limit) { + /* For system limit */ + conf->snap_max_soft_limit = soft_limit; + + ret = glusterd_store_global_info (this); + if (ret) { + snprintf (err_str, PATH_MAX, "Failed to store " + "snap-max-soft-limit for system"); + *op_errstr = gf_strdup (err_str); + gf_log (this->name, GF_LOG_ERROR, "%s", + err_str); + goto out; + } + } + break; + + case GF_SNAP_CONFIG_DISPLAY: + /* Reading data from local node only */ + if (!is_origin_glusterd (dict)) { + ret = 0; + break; + } + + ret = snap_max_limits_display_commit (rsp_dict, volname, + op_errstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "snap-max-limit " + "display commit failed."); + goto out; + } + break; + default: + break; + } + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_get_brick_lvm_details (dict_t *rsp_dict, + glusterd_brickinfo_t *brickinfo, char *volname, + char *device, char *key_prefix) +{ + + int ret = -1; + glusterd_conf_t *priv = NULL; + runner_t runner = {0,}; + xlator_t *this = NULL; + char msg[PATH_MAX] = ""; + char buf[PATH_MAX] = ""; + char *ptr = NULL; + char *token = NULL; + char key[PATH_MAX] = ""; + char *value = NULL; + + GF_ASSERT (rsp_dict); + GF_ASSERT (brickinfo); + GF_ASSERT (volname); + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + device = glusterd_get_brick_mount_details (brickinfo); + if (!device) { + gf_log (this->name, GF_LOG_ERROR, "Getting device name for " + "the brick %s:%s failed", brickinfo->hostname, + brickinfo->path); + goto out; + } + runinit (&runner); + snprintf (msg, sizeof (msg), "running lvs command, " + "for getting snap status"); + /* Using lvs command fetch the Volume Group name, + * Percentage of data filled and Logical Volume size + * + * "-o" argument is used to get the desired information, + * example : "lvs /dev/VolGroup/thin_vol -o vgname,lv_size", + * will get us Volume Group name and Logical Volume size. + * + * Here separator used is ":", + * for the above given command with separator ":", + * The output will be "vgname:lvsize" + */ + runner_add_args (&runner, "lvs", device, "--noheading", "-o", + "vg_name,data_percent,lv_size", + "--separator", ":", NULL); + runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); + runner_log (&runner, "", GF_LOG_DEBUG, msg); + ret = runner_start (&runner); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not perform lvs action"); + goto end; + } + do { + ptr = fgets (buf, sizeof (buf), + runner_chio (&runner, STDOUT_FILENO)); + + if (ptr == NULL) + break; + token = strtok (buf, ":"); + if (token != NULL) { + while (token && token[0] == ' ') + token++; + if (!token) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Invalid vg entry"); + goto end; + } + value = gf_strdup (token); + if (!value) { + ret = -1; + goto end; + } + ret = snprintf (key, sizeof (key), "%s.vgname", + key_prefix); + if (ret < 0) { + goto end; + } + + ret = dict_set_dynstr (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save vgname "); + goto end; + } + } + + token = strtok (NULL, ":"); + if (token != NULL) { + value = gf_strdup (token); + if (!value) { + ret = -1; + goto end; + } + ret = snprintf (key, sizeof (key), "%s.data", + key_prefix); + if (ret < 0) { + goto end; + } + + ret = dict_set_dynstr (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save data percent "); + goto end; + } + } + token = strtok (NULL, ":"); + if (token != NULL) { + value = gf_strdup (token); + if (!value) { + ret = -1; + goto end; + } + ret = snprintf (key, sizeof (key), "%s.lvsize", + key_prefix); + if (ret < 0) { + goto end; + } + + ret = dict_set_dynstr (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save meta data percent "); + goto end; + } + } + + } while (ptr != NULL); + + ret = 0; + +end: + runner_end (&runner); + +out: + if (ret && value) { + GF_FREE (value); + } + + return ret; +} + +int +glusterd_get_single_brick_status (char **op_errstr, dict_t *rsp_dict, + char *keyprefix, int index, + glusterd_volinfo_t *snap_volinfo, + glusterd_brickinfo_t *brickinfo) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + char key[PATH_MAX] = ""; + char *device = NULL; + char *value = NULL; + char brick_path[PATH_MAX] = ""; + char pidfile[PATH_MAX] = ""; + pid_t pid = -1; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + GF_ASSERT (keyprefix); + GF_ASSERT (snap_volinfo); + GF_ASSERT (brickinfo); + + ret = snprintf (key, sizeof (key), "%s.brick%d.path", keyprefix, + index); + if (ret < 0) { + goto out; + } + + ret = snprintf (brick_path, sizeof (brick_path), + "%s:%s", brickinfo->hostname, brickinfo->path); + if (ret < 0) { + goto out; + } + + value = gf_strdup (brick_path); + if (!value) { + ret = -1; + goto out; + } + + ret = dict_set_dynstr (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to store " + "brick_path %s", brickinfo->path); + goto out; + } + + if (brickinfo->snap_status == -1) { + /* Setting vgname as "Pending Snapshot" */ + value = gf_strdup ("Pending Snapshot"); + if (!value) { + ret = -1; + goto out; + } + + snprintf (key, sizeof (key), "%s.brick%d.vgname", + keyprefix, index); + ret = dict_set_dynstr (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save vgname "); + goto out; + } + + ret = 0; + goto out; + } + value = NULL; + + ret = snprintf (key, sizeof (key), "%s.brick%d.status", + keyprefix, index); + if (ret < 0) { + goto out; + } + + if (brickinfo->status == GF_BRICK_STOPPED) { + value = gf_strdup ("No"); + if (!value) { + ret = -1; + goto out; + } + ret = dict_set_str (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save brick status"); + goto out; + } + value = NULL; + } else { + value = gf_strdup ("Yes"); + if (!value) { + ret = -1; + goto out; + } + ret = dict_set_str (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save brick status"); + goto out; + } + value = NULL; + + GLUSTERD_GET_BRICK_PIDFILE (pidfile, snap_volinfo, + brickinfo, priv); + ret = gf_is_service_running (pidfile, &pid); + + ret = snprintf (key, sizeof (key), "%s.brick%d.pid", + keyprefix, index); + if (ret < 0) { + goto out; + } + + ret = dict_set_int32 (rsp_dict, key, pid); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save pid %d", pid); + goto out; + } + } + + ret = snprintf (key, sizeof (key), "%s.brick%d", + keyprefix, index); + if (ret < 0) { + goto out; + } + + ret = glusterd_get_brick_lvm_details (rsp_dict, brickinfo, + snap_volinfo->volname, + device, key); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "brick LVM details"); + goto out; + } +out: + if (ret && value) { + GF_FREE (value); + } + + return ret; +} + +int +glusterd_get_single_snap_status (char **op_errstr, dict_t *rsp_dict, + char *keyprefix, glusterd_snap_t *snap) +{ + int ret = -1; + xlator_t *this = NULL; + char key[PATH_MAX] = ""; + char brickkey[PATH_MAX] = ""; + glusterd_volinfo_t *snap_volinfo = NULL; + glusterd_volinfo_t *tmp_volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + int volcount = 0; + int brickcount = 0; + + this = THIS; + GF_ASSERT (this); + + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + GF_ASSERT (keyprefix); + GF_ASSERT (snap); + + list_for_each_entry_safe (snap_volinfo, tmp_volinfo, &snap->volumes, + vol_list) { + ret = snprintf (key, sizeof (key), "%s.vol%d", keyprefix, + volcount); + if (ret < 0) { + goto out; + } + list_for_each_entry (brickinfo, &snap_volinfo->bricks, + brick_list) { + if (!glusterd_is_local_brick (this, snap_volinfo, + brickinfo)) { + brickcount++; + continue; + } + + ret = glusterd_get_single_brick_status (op_errstr, + rsp_dict, key, brickcount, + snap_volinfo, brickinfo); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Getting " + "single snap status failed"); + goto out; + } + brickcount++; + } + ret = snprintf (brickkey, sizeof (brickkey), "%s.brickcount", + key); + if (ret < 0) { + goto out; + } + + ret = dict_set_int32 (rsp_dict, brickkey, brickcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save brick count"); + goto out; + } + volcount++; + } + + ret = snprintf (key, sizeof (key), "%s.volcount", keyprefix); + if (ret < 0) { + goto out; + } + + ret = dict_set_int32 (rsp_dict, key, volcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save volcount"); + goto out; + } + +out: + + return ret; +} + +int +glusterd_get_each_snap_object_status (char **op_errstr, dict_t *rsp_dict, + glusterd_snap_t *snap, char *keyprefix) +{ + int ret = -1; + char key[PATH_MAX] = ""; + char *temp = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + GF_ASSERT (snap); + GF_ASSERT (keyprefix); + + /* TODO : Get all the snap volume info present in snap object, + * as of now, There will be only one snapvolinfo per snap object + */ + ret = snprintf (key, sizeof (key), "%s.snapname", keyprefix); + if (ret < 0) { + goto out; + } + + temp = gf_strdup (snap->snapname); + if (temp == NULL) { + ret = -1; + goto out; + } + ret = dict_set_dynstr (rsp_dict, key, temp); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not save " + "snap name"); + goto out; + } + + temp = NULL; + + ret = snprintf (key, sizeof (key), "%s.uuid", keyprefix); + if (ret < 0) { + goto out; + } + + temp = gf_strdup (uuid_utoa (snap->snap_id)); + if (temp == NULL) { + ret = -1; + goto out; + } + + ret = dict_set_dynstr (rsp_dict, key, temp); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not save " + "snap UUID"); + goto out; + } + + temp = NULL; + + ret = glusterd_get_single_snap_status (op_errstr, rsp_dict, keyprefix, + snap); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not get single snap status"); + goto out; + } + + ret = snprintf (key, sizeof (key), "%s.volcount", keyprefix); + if (ret < 0) { + goto out; + } + + ret = dict_set_int32 (rsp_dict, key, 1); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not save volcount"); + goto out; + } +out: + if (ret && temp) + GF_FREE (temp); + + return ret; +} + +int +glusterd_get_snap_status_of_volume (char **op_errstr, dict_t *rsp_dict, + char *volname, char *keyprefix) { + int ret = -1; + glusterd_volinfo_t *snap_volinfo = NULL; + glusterd_volinfo_t *temp_volinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + char key[PATH_MAX] = ""; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int i = 0; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + GF_ASSERT (volname); + GF_ASSERT (keyprefix); + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get volinfo of " + "volume %s", volname); + goto out; + } + + list_for_each_entry_safe (snap_volinfo, temp_volinfo, + &volinfo->snap_volumes, snapvol_list) { + ret = snprintf (key, sizeof (key), "status.snap%d", i); + if (ret < 0) { + goto out; + } + + ret = glusterd_get_each_snap_object_status (op_errstr, + rsp_dict, snap_volinfo->snapshot, key); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Function : " + "glusterd_get_single_snap_status failed"); + goto out; + } + i++; + } + + ret = dict_set_int32 (rsp_dict, "status.snapcount", i); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to save snapcount"); + ret = -1; + goto out; + } +out: + return ret; +} + +int +glusterd_get_all_snapshot_status (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int32_t i = 0; + int ret = -1; + char key[PATH_MAX] = ""; + glusterd_conf_t *priv = NULL; + glusterd_snap_t *snap = NULL; + glusterd_snap_t *tmp_snap = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + list_for_each_entry_safe (snap, tmp_snap, + &priv->snapshots, snap_list) { + ret = snprintf (key, sizeof (key), "status.snap%d", i); + if (ret < 0) { + goto out; + } + + ret = glusterd_get_each_snap_object_status (op_errstr, + rsp_dict, snap, key); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not get " + "the details of a snap object: %s", + snap->snapname); + goto out; + } + i++; + } + + ret = dict_set_int32 (rsp_dict, "status.snapcount", i); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not save snapcount"); + goto out; + } + + ret = 0; +out: + return ret; +} + + +int +glusterd_snapshot_status_commit (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + xlator_t *this = NULL; + int ret = -1; + glusterd_conf_t *conf = NULL; + char *get_buffer = NULL; + int32_t cmd = -1; + char *snapname = NULL; + glusterd_snap_t *snap = NULL; + char *volname = NULL; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + conf = this->private; + + GF_ASSERT (conf); + ret = dict_get_int32 (dict, "cmd", &cmd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get status cmd type"); + goto out; + } + + ret = dict_set_int32 (rsp_dict, "cmd", cmd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save status cmd in rsp dictionary"); + goto out; + } + switch (cmd) { + case GF_SNAP_STATUS_TYPE_ALL: + { + ret = glusterd_get_all_snapshot_status (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to " + "get snapshot status"); + goto out; + } + break; + } + case GF_SNAP_STATUS_TYPE_SNAP: + { + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to " + "get snap name"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + ret = gf_asprintf (op_errstr, "Snap (%s) " + "not found", snapname); + if (ret < 0) { + goto out; + } + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Unable to " + "get snap volinfo"); + goto out; + } + ret = glusterd_get_each_snap_object_status (op_errstr, + rsp_dict, snap, "status.snap0"); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to " + "get status of snap %s", get_buffer); + goto out; + } + break; + } + case GF_SNAP_STATUS_TYPE_VOL: + { + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to" + " get volume name"); + goto out; + } + + ret = glusterd_get_snap_status_of_volume (op_errstr, + rsp_dict, volname, "status.vol0"); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Function :" + " glusterd_get_snap_status_of_volume " + "failed"); + goto out; + } + } + } + ret = 0; +out: + return ret; +} + +int32_t +glusterd_snapshot_create_postvalidate (dict_t *dict, int32_t op_ret, + char **op_errstr, dict_t *rsp_dict) +{ + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int ret = -1; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + + priv = this->private; + GF_ASSERT (priv); + + if (op_ret) { + ret = glusterd_do_snap_cleanup (dict, op_errstr, rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "cleanup operation " + "failed"); + goto out; + } + } else { + ret = glusterd_snapshot_update_snaps_post_validate (dict, + op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "create snapshot"); + goto out; + } + } + + ret = 0; +out: + return ret; +} + +int32_t +glusterd_snapshot (dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int32_t snap_command = 0; + int ret = -1; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_int32 (dict, "type", &snap_command); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case (GF_SNAP_OPTION_TYPE_CREATE): + ret = glusterd_snapshot_create_commit (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "create snapshot"); + goto out; + } + break; + + case GF_SNAP_OPTION_TYPE_CONFIG: + ret = glusterd_snapshot_config_commit (dict, op_errstr, + rsp_dict); + break; + + case GF_SNAP_OPTION_TYPE_DELETE: + ret = glusterd_snapshot_remove_commit (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "delete snapshot"); + goto out; + } + break; + + case GF_SNAP_OPTION_TYPE_RESTORE: + ret = glusterd_snapshot_restore (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Failed to " + "restore snapshot"); + goto out; + } + + break; + + case GF_SNAP_OPTION_TYPE_STATUS: + ret = glusterd_snapshot_status_commit (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "show snapshot status"); + goto out; + } + break; + + + default: + gf_log (this->name, GF_LOG_WARNING, "invalid snap command"); + goto out; + break; + } + + ret = 0; + +out: + return ret; +} + +int +glusterd_snapshot_brickop (dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + int ret = -1; + int64_t vol_count = 0; + int64_t count = 1; + char key[1024] = {0,}; + char *volname = NULL; + int32_t snap_command = 0; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + + ret = dict_get_int32 (dict, "type", &snap_command); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case GF_SNAP_OPTION_TYPE_CREATE: + ret = dict_get_int64 (dict, "volcount", &vol_count); + if (ret) + goto out; + while (count <= vol_count) { + snprintf (key, 1024, "volname%"PRId64, count); + ret = dict_get_str (dict, key, &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to get volname"); + goto out; + } + ret = dict_set_str (dict, "volname", volname); + if (ret) + goto out; + + ret = gd_brick_op_phase (GD_OP_SNAP, NULL, dict, + op_errstr); + if (ret) + goto out; + volname = NULL; + count++; + } + + dict_del (dict, "volname"); + ret = 0; + break; + case GF_SNAP_OPTION_TYPE_DELETE: + break; + default: + break; + } + +out: + return ret; +} + +int +glusterd_snapshot_prevalidate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int snap_command = 0; + xlator_t *this = NULL; + int ret = -1; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + + ret = dict_get_int32 (dict, "type", &snap_command); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case (GF_SNAP_OPTION_TYPE_CREATE): + ret = glusterd_snapshot_create_prevalidate (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot create " + "pre-validation failed"); + goto out; + } + break; + + case (GF_SNAP_OPTION_TYPE_CONFIG): + ret = glusterd_snapshot_config_prevalidate (dict, op_errstr); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot config " + "pre-validation failed"); + goto out; + } + break; + + case GF_SNAP_OPTION_TYPE_RESTORE: + ret = glusterd_snapshot_restore_prevalidate (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot restore " + "validation failed"); + goto out; + } + break; + case GF_SNAP_OPTION_TYPE_DELETE: + ret = glusterd_snapshot_remove_prevalidate (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot remove " + "validation failed"); + goto out; + } + break; + + case GF_SNAP_OPTION_TYPE_STATUS: + ret = glusterd_snapshot_status_prevalidate (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot status " + "validation failed"); + goto out; + } + break; + + default: + gf_log (this->name, GF_LOG_WARNING, "invalid snap command"); + goto out; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_snapshot_postvalidate (dict_t *dict, int32_t op_ret, char **op_errstr, + dict_t *rsp_dict) +{ + int snap_command = 0; + xlator_t *this = NULL; + int ret = -1; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + + ret = dict_get_int32 (dict, "type", &snap_command); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case GF_SNAP_OPTION_TYPE_CREATE: + ret = glusterd_snapshot_create_postvalidate (dict, op_ret, + op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot create " + "post-validation failed"); + goto out; + } + break; + + case GF_SNAP_OPTION_TYPE_DELETE: + case GF_SNAP_OPTION_TYPE_RESTORE: + ret = glusterd_snapshot_update_snaps_post_validate (dict, + op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "update missed snaps list"); + goto out; + } + break; + + default: + gf_log (this->name, GF_LOG_WARNING, "invalid snap command"); + goto out; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_handle_snapshot_fn (rpcsvc_request_t *req) +{ + int32_t ret = 0; + dict_t *dict = NULL; + gf_cli_req cli_req = {{0},}; + glusterd_op_t cli_op = GD_OP_SNAP; + int type = 0; + glusterd_conf_t *conf = NULL; + char *host_uuid = NULL; + char err_str[2048] = {0,}; + xlator_t *this = NULL; + char *volname = NULL; + + GF_ASSERT (req); + + this = THIS; + GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); + + ret = xdr_to_generic (req->msg[0], &cli_req, + (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (cli_req.dict.dict_len > 0) { + dict = dict_new (); + if (!dict) + goto out; + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "unserialize req-buffer to dictionary"); + snprintf (err_str, sizeof (err_str), "Unable to decode " + "the command"); + goto out; + } + + dict->extra_stdfree = cli_req.dict.dict_val; + + host_uuid = gf_strdup (uuid_utoa(MY_UUID)); + if (host_uuid == NULL) { + snprintf (err_str, sizeof (err_str), "Failed to get " + "the uuid of local glusterd"); + ret = -1; + goto out; + } + ret = dict_set_dynstr (dict, "host-uuid", host_uuid); + if (ret) { + GF_FREE (host_uuid); + goto out; + } + + + } else { + gf_log (this->name, GF_LOG_ERROR, "request dict length is %d", + cli_req.dict.dict_len); + goto out; + } + + ret = dict_get_int32 (dict, "type", &type); + if (ret < 0) { + snprintf (err_str, sizeof (err_str), "Command type not found"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + switch (type) { + case GF_SNAP_OPTION_TYPE_CREATE: + ret = glusterd_handle_snapshot_create (req, cli_op, dict, + err_str, sizeof (err_str)); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot create " + "failed: %s", err_str); + } + break; + case GF_SNAP_OPTION_TYPE_RESTORE: + ret = glusterd_handle_snapshot_restore (req, cli_op, dict, + err_str, sizeof (err_str)); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot restore " + "failed: %s", err_str); + } + + break; + case GF_SNAP_OPTION_TYPE_INFO: + ret = glusterd_handle_snapshot_info (req, cli_op, dict, + err_str, sizeof (err_str)); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot info " + "failed"); + } + break; + case GF_SNAP_OPTION_TYPE_LIST: + ret = glusterd_handle_snapshot_list (req, cli_op, dict, + err_str, sizeof (err_str)); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot list " + "failed"); + } + break; + case GF_SNAP_OPTION_TYPE_CONFIG: + /* TODO : Type of lock to be taken when we are setting + * limits system wide + */ + ret = dict_get_str (dict, "volname", &volname); + if (!volname) { + ret = dict_set_int32 (dict, "hold_vol_locks", + _gf_false); + if (ret) { + gf_log ("cli", GF_LOG_ERROR, + "Unable to set hold_vol_locks value " + "as _gf_false"); + goto out; + } + + } + ret = glusterd_mgmt_v3_initiate_all_phases (req, cli_op, dict); + break; + case GF_SNAP_OPTION_TYPE_DELETE: + ret = glusterd_handle_snapshot_remove (req, cli_op, dict, + err_str, + sizeof (err_str)); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot delete " + "failed: %s", err_str); + } + break; + case GF_SNAP_OPTION_TYPE_START: + case GF_SNAP_OPTION_TYPE_STOP: + case GF_SNAP_OPTION_TYPE_STATUS: + ret = glusterd_handle_snapshot_status (req, cli_op, dict, + err_str, + sizeof (err_str)); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot status " + "failed: %s", err_str); + } + break; + default: + gf_log (this->name, GF_LOG_ERROR, "Unkown snapshot request " + "type (%d)", type); + ret = -1; /* Failure */ + } + +out: + if (ret) { + if (err_str[0] == '\0') + snprintf (err_str, sizeof (err_str), + "Operation failed"); + ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, + dict, err_str); + } + + return ret; +} + +int +glusterd_handle_snapshot (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, glusterd_handle_snapshot_fn); +} + +static inline void +glusterd_free_snap_op (glusterd_snap_op_t *snap_op) +{ + if (snap_op) { + if (snap_op->brick_path) + GF_FREE (snap_op->brick_path); + + GF_FREE (snap_op); + } +} + +/* Look for duplicates and accordingly update the list */ +int32_t +glusterd_update_missed_snap_entry (glusterd_missed_snap_info *missed_snapinfo, + glusterd_snap_op_t *missed_snap_op) +{ + int32_t ret = -1; + glusterd_snap_op_t *snap_opinfo = NULL; + gf_boolean_t match = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(missed_snapinfo); + GF_ASSERT(missed_snap_op); + + list_for_each_entry (snap_opinfo, &missed_snapinfo->snap_ops, + snap_ops_list) { + if ((!strcmp (snap_opinfo->brick_path, + missed_snap_op->brick_path)) && + (snap_opinfo->op == missed_snap_op->op)) { + /* If two entries have conflicting status + * GD_MISSED_SNAP_DONE takes precedence + */ + if ((snap_opinfo->status == GD_MISSED_SNAP_PENDING) && + (missed_snap_op->status == GD_MISSED_SNAP_DONE)) { + snap_opinfo->status = GD_MISSED_SNAP_DONE; + gf_log (this->name, GF_LOG_INFO, + "Updating missed snap status " + "for %s:%d:%s:%d as DONE", + missed_snapinfo->node_snap_info, + snap_opinfo->brick_num, + snap_opinfo->brick_path, + snap_opinfo->op); + ret = 0; + glusterd_free_snap_op (missed_snap_op); + goto out; + } + match = _gf_true; + break; + } else if ((snap_opinfo->brick_num == + missed_snap_op->brick_num) && + (snap_opinfo->op == GF_SNAP_OPTION_TYPE_CREATE) && + (missed_snap_op->op == + GF_SNAP_OPTION_TYPE_DELETE)) { + /* Optimizing create and delete entries for the same + * brick and same node + */ + gf_log (this->name, GF_LOG_INFO, + "Updating missed snap status " + "for %s:%d:%s:%d as DONE", + missed_snapinfo->node_snap_info, + snap_opinfo->brick_num, + snap_opinfo->brick_path, + snap_opinfo->op); + snap_opinfo->status = GD_MISSED_SNAP_DONE; + ret = 0; + glusterd_free_snap_op (missed_snap_op); + goto out; + } + } + + if (match == _gf_true) { + gf_log (this->name, GF_LOG_INFO, + "Duplicate entry. Not updating"); + glusterd_free_snap_op (missed_snap_op); + } else { + list_add_tail (&missed_snap_op->snap_ops_list, + &missed_snapinfo->snap_ops); + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Add new missed snap entry to the missed_snaps list. */ +int32_t +glusterd_store_missed_snaps_list (char *missed_info, int32_t brick_num, + char *brick_path, int32_t snap_op, + int32_t snap_status) +{ + int32_t ret = -1; + glusterd_missed_snap_info *missed_snapinfo = NULL; + glusterd_snap_op_t *missed_snap_op = NULL; + glusterd_conf_t *priv = NULL; + gf_boolean_t match = _gf_false; + gf_boolean_t free_missed_snap_info = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(missed_info); + GF_ASSERT(brick_path); + + priv = this->private; + GF_ASSERT (priv); + + /* Create the snap_op object consisting of the * + * snap id and the op */ + ret = glusterd_missed_snap_op_new (&missed_snap_op); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create new missed snap object."); + ret = -1; + goto out; + } + + missed_snap_op->brick_path = gf_strdup(brick_path); + if (!missed_snap_op->brick_path) { + ret = -1; + goto out; + } + missed_snap_op->brick_num = brick_num; + missed_snap_op->op = snap_op; + missed_snap_op->status = snap_status; + + /* Look for other entries for the same node and same snap */ + list_for_each_entry (missed_snapinfo, &priv->missed_snaps_list, + missed_snaps) { + if (!strcmp (missed_snapinfo->node_snap_info, + missed_info)) { + /* Found missed snapshot info for * + * the same node and same snap */ + match = _gf_true; + break; + } + } + + if (match == _gf_false) { + /* First snap op missed for the brick */ + ret = glusterd_missed_snapinfo_new (&missed_snapinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create missed snapinfo"); + goto out; + } + free_missed_snap_info = _gf_true; + missed_snapinfo->node_snap_info = gf_strdup(missed_info); + if (!missed_snapinfo->node_snap_info) { + ret = -1; + goto out; + } + + list_add_tail (&missed_snap_op->snap_ops_list, + &missed_snapinfo->snap_ops); + list_add_tail (&missed_snapinfo->missed_snaps, + &priv->missed_snaps_list); + + ret = 0; + goto out; + } else { + ret = glusterd_update_missed_snap_entry (missed_snapinfo, + missed_snap_op); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to update existing missed snap entry."); + goto out; + } + } + +out: + if (ret) { + glusterd_free_snap_op (missed_snap_op); + + if (missed_snapinfo && + (free_missed_snap_info == _gf_true)) { + if (missed_snapinfo->node_snap_info) + GF_FREE (missed_snapinfo->node_snap_info); + + GF_FREE (missed_snapinfo); + } + } + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Add missing snap entries to the in-memory conf->missed_snap_list */ +int32_t +glusterd_add_missed_snaps_to_list (dict_t *dict, int32_t missed_snap_count) +{ + char *buf = NULL; + char *tmp = NULL; + char *save_ptr = NULL; + char *nodeid = NULL; + char *snap_uuid = NULL; + char *brick_path = NULL; + char missed_info[PATH_MAX] = ""; + char name_buf[PATH_MAX] = ""; + int32_t i = -1; + int32_t ret = -1; + int32_t brick_num = -1; + int32_t snap_op = -1; + int32_t snap_status = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + + priv = this->private; + GF_ASSERT (priv); + + /* We can update the missed_snaps_list without acquiring * + * any additional locks as big lock will be held. */ + for (i = 0; i < missed_snap_count; i++) { + snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", + i); + ret = dict_get_str (dict, name_buf, &buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch %s", name_buf); + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "missed_snap_entry = %s", + buf); + + /* Need to make a duplicate string coz the same dictionary * + * is resent to the non-originator nodes */ + tmp = gf_strdup (buf); + if (!tmp) { + ret = -1; + goto out; + } + + /* Fetch the node-id, snap-id, brick_num, + * brick_path, snap_op and snap status + */ + nodeid = strtok_r (tmp, ":", &save_ptr); + snap_uuid = strtok_r (NULL, "=", &save_ptr); + brick_num = atoi(strtok_r (NULL, ":", &save_ptr)); + brick_path = strtok_r (NULL, ":", &save_ptr); + snap_op = atoi(strtok_r (NULL, ":", &save_ptr)); + snap_status = atoi(strtok_r (NULL, ":", &save_ptr)); + + if (!nodeid || !snap_uuid || !brick_path || + brick_num < 1 || snap_op < 1 || + snap_status < 1) { + gf_log (this->name, GF_LOG_ERROR, + "Invalid missed_snap_entry"); + ret = -1; + goto out; + } + + snprintf (missed_info, sizeof(missed_info), "%s:%s", + nodeid, snap_uuid); + + ret = glusterd_store_missed_snaps_list (missed_info, + brick_num, + brick_path, + snap_op, + snap_status); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to store missed snaps_list"); + goto out; + } + + GF_FREE (tmp); + tmp = NULL; + } + +out: + if (tmp) + GF_FREE (tmp); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index d5bfeda3fcf..e28a30c5ac4 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -46,7 +46,7 @@ #include <dirent.h> void -glusterd_replace_slash_with_hipen (char *str) +glusterd_replace_slash_with_hyphen (char *str) { char *ptr = NULL; @@ -85,7 +85,7 @@ glusterd_store_key_vol_brick_set (glusterd_brickinfo_t *brickinfo, GF_ASSERT (len >= PATH_MAX); snprintf (key_vol_brick, len, "%s", brickinfo->path); - glusterd_replace_slash_with_hipen (key_vol_brick); + glusterd_replace_slash_with_hyphen (key_vol_brick); } static void @@ -246,6 +246,20 @@ glusterd_store_brickinfo_write (int fd, glusterd_brickinfo_t *brickinfo) if (ret) goto out; + if (strlen(brickinfo->device_path) > 0) { + snprintf (value, sizeof(value), "%s", brickinfo->device_path); + ret = gf_store_save_value (fd, + GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH, value); + if (ret) + goto out; + } + + snprintf (value, sizeof(value), "%d", brickinfo->snap_status); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS, + value); + if (ret) + goto out; + if (!brickinfo->vg[0]) goto out; @@ -511,14 +525,13 @@ int _storeopts (dict_t *this, char *key, data_t *value, void *data) int32_t glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo) { - char *str = NULL; + char *str = NULL; + char buf[PATH_MAX] = {0,}; + int32_t ret = -1; GF_ASSERT (fd > 0); GF_ASSERT (volinfo); - char buf[PATH_MAX] = {0,}; - int32_t ret = -1; - snprintf (buf, sizeof (buf), "%d", volinfo->type); ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_TYPE, buf); if (ret) @@ -560,6 +573,14 @@ glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo) if (ret) goto out; + snprintf (buf, sizeof (buf), "%s", volinfo->parent_volname); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_PARENT_VOLNAME, buf); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to store " + GLUSTERD_STORE_KEY_PARENT_VOLNAME); + goto out; + } + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_ID, uuid_utoa (volinfo->volume_id)); if (ret) @@ -599,6 +620,23 @@ glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo) goto out; } + snprintf (buf, sizeof (buf), "%d", volinfo->is_volume_restored); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_IS_RESTORED, buf); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "Unable to write is_volume_restored"); + goto out; + } + + snprintf (buf, sizeof (buf), "%"PRIu64, volinfo->snap_max_hard_limit); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, + buf); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "Unable to write snap-max-hard-limit"); + goto out; + } + out: if (ret) gf_log (THIS->name, GF_LOG_ERROR, "Unable to write volume " @@ -616,8 +654,7 @@ glusterd_store_voldirpath_set (glusterd_volinfo_t *volinfo, char *voldirpath, priv = THIS->private; GF_ASSERT (priv); - snprintf (voldirpath, len, "%s/%s/%s", priv->workdir, - GLUSTERD_VOLUME_DIR_PREFIX, volinfo->volname); + GLUSTERD_GET_VOLUME_DIR (voldirpath, volinfo, priv); } static int32_t @@ -631,10 +668,32 @@ glusterd_store_create_volume_dir (glusterd_volinfo_t *volinfo) glusterd_store_voldirpath_set (volinfo, voldirpath, sizeof (voldirpath)); ret = gf_store_mkdir (voldirpath); + gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret); return ret; } +static int32_t +glusterd_store_create_snap_dir (glusterd_snap_t *snap) +{ + int32_t ret = -1; + char snapdirpath[PATH_MAX] = {0,}; + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + GF_ASSERT (priv); + GF_ASSERT (snap); + + GLUSTERD_GET_SNAP_DIR (snapdirpath, snap, priv); + + ret = mkdir_p (snapdirpath, 0755, _gf_true); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to create snaps dir " + "%s", snapdirpath); + } + return ret; +} + int32_t glusterd_store_volinfo_write (int fd, glusterd_volinfo_t *volinfo) { @@ -659,6 +718,49 @@ out: return ret; } +int32_t +glusterd_store_snapinfo_write (glusterd_snap_t *snap) +{ + int32_t ret = -1; + int fd = 0; + char buf[PATH_MAX] = ""; + + GF_ASSERT (snap); + + fd = gf_store_mkstemp (snap->shandle); + if (fd <= 0) + goto out; + + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_ID, + uuid_utoa (snap->snap_id)); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", snap->snap_status); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_STATUS, buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", snap->snap_restored); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_RESTORED, buf); + if (ret) + goto out; + + if (snap->description) { + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_DESC, + snap->description); + if (ret) + goto out; + } + + snprintf (buf, sizeof (buf), "%ld", snap->time_stamp); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_TIMESTAMP, buf); + +out: + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + static void glusterd_store_rbstatepath_set (glusterd_volinfo_t *volinfo, char *rbstatepath, size_t len) @@ -718,6 +820,36 @@ glusterd_store_quota_conf_path_set (glusterd_volinfo_t *volinfo, GLUSTERD_VOLUME_QUOTA_CONFIG); } +static void +glusterd_store_missed_snaps_list_path_set (char *missed_snaps_list, + size_t len) +{ + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + GF_ASSERT (priv); + GF_ASSERT (missed_snaps_list); + GF_ASSERT (len <= PATH_MAX); + + snprintf (missed_snaps_list, len, "%s/snaps/" + GLUSTERD_MISSED_SNAPS_LIST_FILE, priv->workdir); +} + +static void +glusterd_store_snapfpath_set (glusterd_snap_t *snap, char *snap_fpath, + size_t len) +{ + glusterd_conf_t *priv = NULL; + priv = THIS->private; + GF_ASSERT (priv); + GF_ASSERT (snap); + GF_ASSERT (snap_fpath); + GF_ASSERT (len <= PATH_MAX); + + snprintf (snap_fpath, len, "%s/snaps/%s/%s", priv->workdir, + snap->snapname, GLUSTERD_SNAP_INFO_FILE); +} + int32_t glusterd_store_create_rbstate_shandle_on_absence (glusterd_volinfo_t *volinfo) { @@ -778,6 +910,43 @@ glusterd_store_create_quota_conf_sh_on_absence (glusterd_volinfo_t *volinfo) return ret; } + +static int32_t +glusterd_store_create_missed_snaps_list_shandle_on_absence () +{ + char missed_snaps_list[PATH_MAX] = ""; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + priv = this->private; + GF_ASSERT (priv); + + glusterd_store_missed_snaps_list_path_set (missed_snaps_list, + sizeof(missed_snaps_list)); + + ret = gf_store_handle_create_on_absence + (&priv->missed_snaps_list_shandle, + missed_snaps_list); + return ret; +} + +int32_t +glusterd_store_create_snap_shandle_on_absence (glusterd_snap_t *snap) +{ + char snapfpath[PATH_MAX] = {0}; + int32_t ret = 0; + + GF_ASSERT (snap); + + glusterd_store_snapfpath_set (snap, snapfpath, sizeof (snapfpath)); + ret = gf_store_handle_create_on_absence (&snap->shandle, snapfpath); + return ret; +} + int32_t glusterd_store_brickinfos (glusterd_volinfo_t *volinfo, int vol_fd) { @@ -789,7 +958,7 @@ glusterd_store_brickinfos (glusterd_volinfo_t *volinfo, int vol_fd) list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { ret = glusterd_store_brickinfo (volinfo, brickinfo, - brick_count, vol_fd); + brick_count, vol_fd); if (ret) goto out; brick_count++; @@ -1094,6 +1263,60 @@ out: } int32_t +glusterd_store_snap_atomic_update (glusterd_snap_t *snap) +{ + int ret = -1; + GF_ASSERT (snap); + + ret = gf_store_rename_tmppath (snap->shandle); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, "Couldn't rename " + "temporary file(s): Reason %s", strerror (errno)); + + return ret; +} + +int32_t +glusterd_store_snap (glusterd_snap_t *snap) +{ + int32_t ret = -1; + + GF_ASSERT (snap); + + ret = glusterd_store_create_snap_dir (snap); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to create snap dir"); + goto out; + } + + ret = glusterd_store_create_snap_shandle_on_absence (snap); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to create snap info " + "file"); + goto out; + } + + ret = glusterd_store_snapinfo_write (snap); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to write snap info"); + goto out; + } + + ret = glusterd_store_snap_atomic_update (snap); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR,"Failed to do automic update"); + goto out; + } + +out: + if (ret && snap->shandle) + gf_store_unlink_tmppath (snap->shandle); + + gf_log (THIS->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t glusterd_store_volinfo (glusterd_volinfo_t *volinfo, glusterd_volinfo_ver_ac_t ac) { int32_t ret = -1; @@ -1150,7 +1373,6 @@ out: return ret; } - int32_t glusterd_store_delete_volume (glusterd_volinfo_t *volinfo) { @@ -1177,8 +1399,8 @@ glusterd_store_delete_volume (glusterd_volinfo_t *volinfo) GLUSTERD_GET_VOLUME_DIR (pathname, volinfo, priv); snprintf (delete_path, sizeof (delete_path), - "%s/"GLUSTERD_TRASH"/%s.deleted", priv->workdir, - uuid_utoa (volinfo->volume_id)); + "%s/"GLUSTERD_TRASH"/%s.deleted", priv->workdir, + uuid_utoa (volinfo->volume_id)); snprintf (trashdir, sizeof (trashdir), "%s/"GLUSTERD_TRASH, priv->workdir); @@ -1270,6 +1492,116 @@ out: return ret; } +/*TODO: cleanup the duplicate code and implement a generic function for + * deleting snap/volume depending on the parameter flag */ +int32_t +glusterd_store_delete_snap (glusterd_snap_t *snap) +{ + char pathname[PATH_MAX] = {0,}; + int32_t ret = 0; + glusterd_conf_t *priv = NULL; + DIR *dir = NULL; + struct dirent *entry = NULL; + char path[PATH_MAX] = {0,}; + char delete_path[PATH_MAX] = {0,}; + char trashdir[PATH_MAX] = {0,}; + struct stat st = {0, }; + xlator_t *this = NULL; + gf_boolean_t rename_fail = _gf_false; + + this = THIS; + priv = this->private; + GF_ASSERT (priv); + + GF_ASSERT (snap); + GLUSTERD_GET_SNAP_DIR (pathname, snap, priv); + + snprintf (delete_path, sizeof (delete_path), + "%s/"GLUSTERD_TRASH"/snap-%s.deleted", priv->workdir, + uuid_utoa (snap->snap_id)); + + snprintf (trashdir, sizeof (trashdir), "%s/"GLUSTERD_TRASH, + priv->workdir); + + ret = mkdir (trashdir, 0777); + if (ret && errno != EEXIST) { + gf_log (this->name, GF_LOG_ERROR, "Failed to create trash " + "directory, reason : %s", strerror (errno)); + ret = -1; + goto out; + } + + ret = rename (pathname, delete_path); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to rename snap " + "directory %s to %s", pathname, delete_path); + rename_fail = _gf_true; + goto out; + } + + dir = opendir (delete_path); + if (!dir) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to open directory %s." + " Reason : %s", delete_path, strerror (errno)); + ret = 0; + goto out; + } + + glusterd_for_each_entry (entry, dir); + while (entry) { + snprintf (path, PATH_MAX, "%s/%s", delete_path, entry->d_name); + ret = stat (path, &st); + if (ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to stat " + "entry %s : %s", path, strerror (errno)); + goto stat_failed; + } + + if (S_ISDIR (st.st_mode)) + ret = rmdir (path); + else + ret = unlink (path); + + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, " Failed to remove " + "%s. Reason : %s", path, strerror (errno)); + } + + gf_log (this->name, GF_LOG_DEBUG, "%s %s", + ret ? "Failed to remove":"Removed", + entry->d_name); +stat_failed: + memset (path, 0, sizeof(path)); + glusterd_for_each_entry (entry, dir); + } + + ret = closedir (dir); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to close dir %s. " + "Reason : %s",delete_path, strerror (errno)); + } + + ret = rmdir (delete_path); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to rmdir: %s,err: %s", + delete_path, strerror (errno)); + } + ret = rmdir (trashdir); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to rmdir: %s, Reason:" + " %s", trashdir, strerror (errno)); + } + +out: + if (snap->shandle) { + gf_store_handle_destroy (snap->shandle); + snap->shandle = NULL; + } + ret = (rename_fail == _gf_true) ? -1: 0; + + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} int glusterd_store_global_info (xlator_t *this) @@ -1280,6 +1612,7 @@ glusterd_store_global_info (xlator_t *this) char path[PATH_MAX] = {0,}; gf_store_handle_t *handle = NULL; char *uuid_str = NULL; + char buf[256] = {0, }; conf = this->private; @@ -1332,6 +1665,24 @@ glusterd_store_global_info (xlator_t *this) goto out; } + snprintf (buf, sizeof (buf), "%"PRIu64, conf->snap_max_hard_limit); + ret = gf_store_save_value (handle->fd, + GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Storing snap-max-hard-limit failed ret = %d", ret); + goto out; + } + + snprintf (buf, sizeof (buf), "%"PRIu64, conf->snap_max_soft_limit); + ret = gf_store_save_value (handle->fd, + GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT, buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Storing snap-max-soft-limit failed ret = %d", ret); + goto out; + } + ret = gf_store_rename_tmppath (handle); out: if (handle) { @@ -1405,15 +1756,94 @@ out: return ret; } +int +glusterd_retrieve_sys_snap_max_limit (xlator_t *this, uint64_t *limit, + char *key) +{ + char *limit_str = NULL; + glusterd_conf_t *priv = NULL; + int ret = -1; + uint64_t tmp_limit = 0; + char *tmp = NULL; + char path[PATH_MAX] = {0,}; + gf_store_handle_t *handle = NULL; + + GF_ASSERT (this); + priv = this->private; + + GF_ASSERT (priv); + GF_ASSERT (limit); + GF_ASSERT (key); + + if (!priv->handle) { + snprintf (path, PATH_MAX, "%s/%s", priv->workdir, + GLUSTERD_INFO_FILE); + ret = gf_store_handle_retrieve (path, &handle); + + if (ret) { + gf_log ("", GF_LOG_DEBUG, "Unable to get store " + "handle!"); + goto out; + } + + priv->handle = handle; + } + + ret = gf_store_retrieve_value (priv->handle, + key, + &limit_str); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "No previous %s present", key); + goto out; + } + + tmp_limit = strtoul (limit_str, &tmp, 10); + if ((tmp_limit <= 0) || (tmp && strlen (tmp) > 1)) { + gf_log (this->name, GF_LOG_WARNING, "invalid version number"); + goto out; + } + + *limit = tmp_limit; + + ret = 0; +out: + if (limit_str) + GF_FREE (limit_str); + + return ret; +} static int glusterd_restore_op_version (xlator_t *this) { - glusterd_conf_t *conf = NULL; - int ret = 0; - int op_version = 0; + glusterd_conf_t *conf = NULL; + int ret = 0; + int op_version = 0; conf = this->private; + ret = glusterd_retrieve_sys_snap_max_limit (this, + &conf->snap_max_hard_limit, + GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Unable to retrieve system snap-max-hard-limit, " + "setting it to default value(%d)", + GLUSTERD_SNAPS_MAX_HARD_LIMIT); + conf->snap_max_hard_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + } + + ret = glusterd_retrieve_sys_snap_max_limit (this, + &conf->snap_max_soft_limit, + GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Unable to retrieve system snap-max-soft-limit, " + "setting it to default value(%d)", + GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT); + conf->snap_max_soft_limit = GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT; + } + ret = glusterd_retrieve_op_version (this, &op_version); if (!ret) { if ((op_version < GD_OP_VERSION_MIN) || @@ -1501,7 +1931,6 @@ out: int32_t glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo) { - int32_t ret = 0; glusterd_brickinfo_t *brickinfo = NULL; gf_store_iter_t *iter = NULL; @@ -1523,7 +1952,7 @@ glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo) priv = THIS->private; - GLUSTERD_GET_BRICK_DIR (brickdir, volinfo, priv) + GLUSTERD_GET_BRICK_DIR (brickdir, volinfo, priv); ret = gf_store_iter_new (volinfo->shandle, &tmpiter); @@ -1606,6 +2035,13 @@ glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo) } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED, strlen (GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED))) { gf_string2int (value, &brickinfo->decommissioned); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH, + strlen (GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH))) { + strncpy (brickinfo->device_path, value, + sizeof (brickinfo->device_path)); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS, + strlen (GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS))) { + gf_string2int (value, &brickinfo->snap_status); } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_VGNAME, strlen (GLUSTERD_STORE_KEY_BRICK_VGNAME))) { @@ -1659,10 +2095,9 @@ out: int32_t -glusterd_store_retrieve_rbstate (char *volname) +glusterd_store_retrieve_rbstate (glusterd_volinfo_t *volinfo) { int32_t ret = -1; - glusterd_volinfo_t *volinfo = NULL; gf_store_iter_t *iter = NULL; char *key = NULL; char *value = NULL; @@ -1670,15 +2105,13 @@ glusterd_store_retrieve_rbstate (char *volname) glusterd_conf_t *priv = NULL; char path[PATH_MAX] = {0,}; gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; + xlator_t *this = NULL; - priv = THIS->private; - - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, "Couldn't get" - "volinfo for %s.", volname); - goto out; - } + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (volinfo); GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, priv); snprintf (path, sizeof (path), "%s/%s", volpath, @@ -1755,16 +2188,15 @@ glusterd_store_retrieve_rbstate (char *volname) goto out; out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); return ret; } int32_t -glusterd_store_retrieve_node_state (char *volname) +glusterd_store_retrieve_node_state (glusterd_volinfo_t *volinfo) { int32_t ret = -1; - glusterd_volinfo_t *volinfo = NULL; gf_store_iter_t *iter = NULL; char *key = NULL; char *value = NULL; @@ -1779,13 +2211,8 @@ glusterd_store_retrieve_node_state (char *volname) this = THIS; GF_ASSERT (this); priv = this->private; - - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, "Couldn't get" - "volinfo for %s.", volname); - goto out; - } + GF_ASSERT (priv); + GF_ASSERT (volinfo); GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, priv); snprintf (path, sizeof (path), "%s/%s", volpath, @@ -1866,50 +2293,59 @@ out: dict_unref (volinfo->rebal.dict); if (tmp_dict) dict_unref (tmp_dict); - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); return ret; } -int32_t -glusterd_store_retrieve_volume (char *volname) -{ - int32_t ret = -1; - glusterd_volinfo_t *volinfo = NULL; - gf_store_iter_t *iter = NULL; - char *key = NULL; - char *value = NULL; - char volpath[PATH_MAX] = {0,}; - glusterd_conf_t *priv = NULL; - char path[PATH_MAX] = {0,}; - int exists = 0; - gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; - ret = glusterd_volinfo_new (&volinfo); - if (ret) - goto out; +int +glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo) +{ + int ret = -1; + int exists = 0; + char *key = NULL; + char *value = NULL; + char volpath[PATH_MAX] = {0,}; + char path[PATH_MAX] = {0,}; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + gf_store_iter_t *iter = NULL; + gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; - strncpy (volinfo->volname, volname, GLUSTERD_MAX_VOLUME_NAME); + this = THIS; + GF_ASSERT (this); + conf = THIS->private; + GF_ASSERT (volinfo); - priv = THIS->private; + GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, conf); - GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, priv); snprintf (path, sizeof (path), "%s/%s", volpath, GLUSTERD_VOLUME_INFO_FILE); ret = gf_store_handle_retrieve (path, &volinfo->shandle); - if (ret) + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "volinfo handle is NULL"); goto out; + } ret = gf_store_iter_new (volinfo->shandle, &iter); - if (ret) + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get new store " + "iter"); goto out; + } ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); - if (ret) + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get next store " + "iter"); goto out; + } while (!ret) { + gf_log ("", GF_LOG_DEBUG, "key = %s value = %s", key, value); if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_TYPE, strlen (GLUSTERD_STORE_KEY_VOL_TYPE))) { volinfo->type = atoi (value); @@ -1978,6 +2414,15 @@ glusterd_store_retrieve_volume (char *volname) } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_CAPS, strlen (GLUSTERD_STORE_KEY_VOL_CAPS))) { volinfo->caps = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, + strlen (GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT))) { + volinfo->snap_max_hard_limit = (uint64_t) atoll (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_IS_RESTORED, + strlen (GLUSTERD_STORE_KEY_VOL_IS_RESTORED))) { + volinfo->is_volume_restored = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_PARENT_VOLNAME, + strlen (GLUSTERD_STORE_KEY_PARENT_VOLNAME))) { + strncpy (volinfo->parent_volname, value, sizeof(volinfo->parent_volname) - 1); } else { if (is_key_glusterd_hooks_friendly (key)) { @@ -2076,10 +2521,49 @@ glusterd_store_retrieve_volume (char *volname) goto out; ret = gf_store_iter_destroy (iter); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to destroy store " + "iter"); + goto out; + } + + ret = 0; +out: + return ret; +} + +glusterd_volinfo_t* +glusterd_store_retrieve_volume (char *volname, glusterd_snap_t *snap) +{ + int32_t ret = -1; + glusterd_volinfo_t *volinfo = NULL; + glusterd_volinfo_t *origin_volinfo = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (volname); + ret = glusterd_volinfo_new (&volinfo); if (ret) goto out; + strncpy (volinfo->volname, volname, GLUSTERD_MAX_VOLUME_NAME); + volinfo->snapshot = snap; + if (snap) + volinfo->is_snap_volume = _gf_true; + + ret = glusterd_store_update_volinfo (volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to update volinfo " + "for %s volume", volname); + goto out; + } + ret = glusterd_store_retrieve_bricks (volinfo); if (ret) goto out; @@ -2105,13 +2589,30 @@ glusterd_store_retrieve_volume (char *volname) goto out; - list_add_order (&volinfo->vol_list, &priv->volumes, - glusterd_compare_volume_name); + if (!snap) { + list_add_order (&volinfo->vol_list, &priv->volumes, + glusterd_compare_volume_name); + } else { + ret = glusterd_volinfo_find (volinfo->parent_volname, + &origin_volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Parent volinfo " + "not found for %s volume", volname); + goto out; + } + glusterd_list_add_snapvol (origin_volinfo, volinfo); + } out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + if (ret) { + if (volinfo) + glusterd_volinfo_delete (volinfo); + volinfo = NULL; + } - return ret; + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); + + return volinfo; } inline void @@ -2211,9 +2712,9 @@ out: } int32_t -glusterd_store_retrieve_volumes (xlator_t *this) +glusterd_store_retrieve_volumes (xlator_t *this, glusterd_snap_t *snap) { - int32_t ret = 0; + int32_t ret = -1; char path[PATH_MAX] = {0,}; glusterd_conf_t *priv = NULL; DIR *dir = NULL; @@ -2225,51 +2726,413 @@ glusterd_store_retrieve_volumes (xlator_t *this) GF_ASSERT (priv); - snprintf (path, PATH_MAX, "%s/%s", priv->workdir, - GLUSTERD_VOLUME_DIR_PREFIX); + if (snap) + snprintf (path, PATH_MAX, "%s/snaps/%s", priv->workdir, + snap->snapname); + else + snprintf (path, PATH_MAX, "%s/%s", priv->workdir, + GLUSTERD_VOLUME_DIR_PREFIX); dir = opendir (path); if (!dir) { gf_log ("", GF_LOG_ERROR, "Unable to open dir %s", path); - ret = -1; goto out; } glusterd_for_each_entry (entry, dir); while (entry) { - ret = glusterd_store_retrieve_volume (entry->d_name); - if (ret) { + if ( entry->d_type != DT_DIR ) + goto next; + + volinfo = glusterd_store_retrieve_volume (entry->d_name, snap); + if (!volinfo) { gf_log ("", GF_LOG_ERROR, "Unable to restore " "volume: %s", entry->d_name); + ret = -1; goto out; } - ret = glusterd_store_retrieve_rbstate (entry->d_name); + ret = glusterd_store_retrieve_rbstate (volinfo); if (ret) { /* Backward compatibility */ gf_log ("", GF_LOG_INFO, "Creating a new rbstate " "for volume: %s.", entry->d_name); - ret = glusterd_volinfo_find (entry->d_name, &volinfo); ret = glusterd_store_create_rbstate_shandle_on_absence (volinfo); ret = glusterd_store_perform_rbstate_store (volinfo); } - ret = glusterd_store_retrieve_node_state (entry->d_name); + ret = glusterd_store_retrieve_node_state (volinfo); if (ret) { /* Backward compatibility */ gf_log ("", GF_LOG_INFO, "Creating a new node_state " "for volume: %s.", entry->d_name); - ret = glusterd_volinfo_find (entry->d_name, &volinfo); - ret = glusterd_store_create_nodestate_sh_on_absence (volinfo); ret = glusterd_store_perform_node_state_store (volinfo); } +next: + glusterd_for_each_entry (entry, dir); + } + + ret = 0; + +out: + if (dir) + closedir (dir); + gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + + return ret; +} + +int32_t +glusterd_resolve_snap_bricks (xlator_t *this, glusterd_snap_t *snap) +{ + int32_t ret = -1; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + + GF_ASSERT (this); + GF_VALIDATE_OR_GOTO (this->name, snap, out); + + list_for_each_entry (volinfo, &snap->volumes, vol_list) { + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + ret = glusterd_resolve_brick (brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "resolve brick failed in restore"); + goto out; + } + } + } + + ret = 0; + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); + + return ret; +} + +int +glusterd_store_update_snap (glusterd_snap_t *snap) +{ + int ret = -1; + char *key = NULL; + char *value = NULL; + char snappath[PATH_MAX] = {0,}; + char path[PATH_MAX] = {0,}; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + gf_store_iter_t *iter = NULL; + gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; + + this = THIS; + conf = this->private; + GF_ASSERT (snap); + + GLUSTERD_GET_SNAP_DIR (snappath, snap, conf); + + snprintf (path, sizeof (path), "%s/%s", snappath, + GLUSTERD_SNAP_INFO_FILE); + + ret = gf_store_handle_retrieve (path, &snap->shandle); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "snap handle is NULL"); + goto out; + } + + ret = gf_store_iter_new (snap->shandle, &iter); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get new store " + "iter"); + goto out; + } + + ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get next store " + "iter"); + goto out; + } + + while (!ret) { + gf_log (this->name, GF_LOG_DEBUG, "key = %s value = %s", + key, value); + + if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_ID, + strlen (GLUSTERD_STORE_KEY_SNAP_ID))) { + ret = uuid_parse (value, snap->snap_id); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "Failed to parse uuid"); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_RESTORED, + strlen (GLUSTERD_STORE_KEY_SNAP_RESTORED))) { + snap->snap_restored = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_STATUS, + strlen (GLUSTERD_STORE_KEY_SNAP_STATUS))) { + snap->snap_status = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_DESC, + strlen (GLUSTERD_STORE_KEY_SNAP_DESC))) { + snap->description = gf_strdup (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_TIMESTAMP, + strlen (GLUSTERD_STORE_KEY_SNAP_TIMESTAMP))) { + snap->time_stamp = atoi (value); + } + + GF_FREE (key); + GF_FREE (value); + key = NULL; + value = NULL; + + ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); + } + + if (op_errno != GD_STORE_EOF) + goto out; + + ret = gf_store_iter_destroy (iter); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to destroy store " + "iter"); + } + +out: + return ret; +} + +int32_t +glusterd_store_retrieve_snap (char *snapname) +{ + int32_t ret = -1; + dict_t *dict = NULL; + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (snapname); + + dict = dict_new(); + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create dict"); + ret = -1; + goto out; + } + + snap = glusterd_new_snap_object (); + if (!snap) { + gf_log (this->name, GF_LOG_ERROR, "Failed to create " + " snap object"); + goto out; + } + + strncpy (snap->snapname, snapname, strlen(snapname)); + ret = glusterd_store_update_snap (snap); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to update snapshot " + "for %s snap", snapname); + goto out; + } + + ret = glusterd_store_retrieve_volumes (this, snap); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to retrieve " + "snap volumes for snap %s", snapname); + goto out; + } + + /* Unlike bricks of normal volumes which are resolved at the end of + the glusterd restore, the bricks belonging to the snap volumes of + each snap should be resolved as part of snapshot restore itself. + Because if the snapshot has to be removed, then resolving bricks + helps glusterd in understanding what all bricks have its own uuid + and killing those bricks. + */ + ret = glusterd_resolve_snap_bricks (this, snap); + if (ret) + gf_log (this->name, GF_LOG_WARNING, "resolving the snap bricks" + " failed (snap: %s)", snap?snap->snapname:""); + + /* When the snapshot command from cli is received, the on disk and + in memory structures for the snapshot are created (with the status) + being marked as GD_SNAP_STATUS_INIT. Once the backend snapshot is + taken, the status is changed to GD_SNAP_STATUS_IN_USE. If glusterd + dies after taking the backend snapshot, but before updating the + status, then when glusterd comes up, it should treat that snapshot + as a failed snapshot and clean it up. + */ + if (snap->snap_status != GD_SNAP_STATUS_IN_USE) { + ret = glusterd_snap_remove (dict, snap, _gf_true, _gf_true); + if (ret) + gf_log (this->name, GF_LOG_WARNING, "failed to remove" + " the snapshot %s", snap->snapname); + goto out; + } + + /* TODO: list_add_order can do 'N-square' comparisions and + is not efficient. Find a better solution to store the snap + in order */ + list_add_order (&snap->snap_list, &priv->snapshots, + glusterd_compare_snap_time); + +out: + if (dict) + dict_unref (dict); + + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); + return ret; +} + +/* Read the missed_snap_list and update the in-memory structs */ +int32_t +glusterd_store_retrieve_missed_snaps_list (xlator_t *this) +{ + char buf[PATH_MAX] = ""; + char path[PATH_MAX] = ""; + char *missed_node_info = NULL; + char *brick_path = NULL; + char *value = NULL; + char *save_ptr = NULL; + FILE *fp = NULL; + int32_t brick_num = -1; + int32_t snap_op = -1; + int32_t snap_status = -1; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + gf_store_op_errno_t store_errno = GD_STORE_SUCCESS; + + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + /* Get the path of the missed_snap_list */ + glusterd_store_missed_snaps_list_path_set (path, sizeof(path)); + + fp = fopen (path, "r"); + if (!fp) { + /* If errno is ENOENT then there are no missed snaps yet */ + if (errno != ENOENT) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to open %s. " + "Error: %s", path, strerror(errno)); + } else { + gf_log (this->name, GF_LOG_INFO, + "No missed snaps list."); + ret = 0; + } + goto out; + } + + do { + ret = gf_store_read_and_tokenize (fp, buf, + &missed_node_info, &value, + &store_errno); + if (ret) { + if (store_errno == GD_STORE_EOF) { + gf_log (this->name, + GF_LOG_DEBUG, + "EOF for missed_snap_list"); + ret = 0; + break; + } + gf_log (this->name, GF_LOG_ERROR, + "Failed to fetch data from " + "missed_snaps_list. Error: %s", + gf_store_strerror (store_errno)); + goto out; + } + + /* Fetch the brick_num, brick_path, snap_op and snap status */ + brick_num = atoi(strtok_r (value, ":", &save_ptr)); + brick_path = strtok_r (NULL, ":", &save_ptr); + snap_op = atoi(strtok_r (NULL, ":", &save_ptr)); + snap_status = atoi(strtok_r (NULL, ":", &save_ptr)); + + if (!missed_node_info || !brick_path || + brick_num < 1 || snap_op < 1 || + snap_status < 1) { + gf_log (this->name, GF_LOG_ERROR, + "Invalid missed_snap_entry"); + ret = -1; + goto out; + } + + ret = glusterd_store_missed_snaps_list (missed_node_info, + brick_num, + brick_path, + snap_op, + snap_status); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to store missed snaps_list"); + goto out; + } + + } while (store_errno == GD_STORE_SUCCESS); + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); + return ret; +} + +int32_t +glusterd_store_retrieve_snaps (xlator_t *this) +{ + int32_t ret = 0; + char path[PATH_MAX] = {0,}; + glusterd_conf_t *priv = NULL; + DIR *dir = NULL; + struct dirent *entry = NULL; + + GF_ASSERT (this); + priv = this->private; + + GF_ASSERT (priv); + + snprintf (path, PATH_MAX, "%s/snaps", priv->workdir); + + dir = opendir (path); + + if (!dir) { + /* If snaps dir doesn't exists ignore the error for + backward compatibility */ + if (errno != ENOENT) { + ret = -1; + gf_log ("", GF_LOG_ERROR, "Unable to open dir %s", path); + } + goto out; + } + + glusterd_for_each_entry (entry, dir); + + while (entry) { + if (entry->d_type == DT_DIR) { + ret = glusterd_store_retrieve_snap (entry->d_name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to restore snapshot: %s", + entry->d_name); + goto out; + } + } + glusterd_for_each_entry (entry, dir); } + /* Retrieve missed_snaps_list */ + ret = glusterd_store_retrieve_missed_snaps_list (this); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Failed to retrieve missed_snaps_list"); + goto out; + } + out: if (dir) closedir (dir); @@ -2278,6 +3141,126 @@ out: return ret; } +/* Writes all the contents of conf->missed_snap_list */ +int32_t +glusterd_store_write_missed_snapinfo (int32_t fd) +{ + char value[PATH_MAX] = ""; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + glusterd_missed_snap_info *missed_snapinfo = NULL; + glusterd_snap_op_t *snap_opinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT (priv); + + /* Write the missed_snap_entry */ + list_for_each_entry (missed_snapinfo, &priv->missed_snaps_list, + missed_snaps) { + list_for_each_entry (snap_opinfo, + &missed_snapinfo->snap_ops, + snap_ops_list) { + snprintf (value, sizeof(value), "%d:%s:%d:%d", + snap_opinfo->brick_num, + snap_opinfo->brick_path, + snap_opinfo->op, snap_opinfo->status); + ret = gf_store_save_value + (fd, + missed_snapinfo->node_snap_info, + value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to write missed snapinfo"); + goto out; + } + } + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Adds the missed snap entries to the in-memory conf->missed_snap_list * + * and writes them to disk */ +int32_t +glusterd_store_update_missed_snaps (dict_t *dict, int32_t missed_snap_count) +{ + int32_t fd = -1; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + + priv = this->private; + GF_ASSERT (priv); + + if (missed_snap_count < 1) { + gf_log (this->name, GF_LOG_DEBUG, "No missed snaps"); + ret = 0; + goto out; + } + + ret = glusterd_store_create_missed_snaps_list_shandle_on_absence (); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to obtain " + "missed_snaps_list store handle."); + goto out; + } + + fd = gf_store_mkstemp (priv->missed_snaps_list_shandle); + if (fd <= 0) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create tmp file"); + ret = -1; + goto out; + } + + ret = glusterd_add_missed_snaps_to_list (dict, missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snaps to list"); + goto out; + } + + ret = glusterd_store_write_missed_snapinfo (fd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to write missed snaps to disk"); + goto out; + } + + ret = gf_store_rename_tmppath (priv->missed_snaps_list_shandle); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to rename the tmp file"); + goto out; + } +out: + if (ret && (fd > 0)) { + ret = gf_store_unlink_tmppath (priv->missed_snaps_list_shandle); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to unlink the tmp file"); + } + ret = -1; + } + + if (fd > 0) + close (fd); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + int32_t glusterd_store_delete_peerinfo (glusterd_peerinfo_t *peerinfo) { @@ -2681,7 +3664,11 @@ glusterd_restore () goto out; } - ret = glusterd_store_retrieve_volumes (this); + ret = glusterd_store_retrieve_volumes (this, NULL); + if (ret) + goto out; + + ret = glusterd_store_retrieve_snaps (this); if (ret) goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index 955abb09f08..64c073a8a39 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -35,43 +35,56 @@ typedef enum glusterd_store_ver_ac_{ } glusterd_volinfo_ver_ac_t; -#define GLUSTERD_STORE_UUID_KEY "UUID" - -#define GLUSTERD_STORE_KEY_VOL_TYPE "type" -#define GLUSTERD_STORE_KEY_VOL_COUNT "count" -#define GLUSTERD_STORE_KEY_VOL_STATUS "status" -#define GLUSTERD_STORE_KEY_VOL_PORT "port" -#define GLUSTERD_STORE_KEY_VOL_SUB_COUNT "sub_count" -#define GLUSTERD_STORE_KEY_VOL_STRIPE_CNT "stripe_count" -#define GLUSTERD_STORE_KEY_VOL_REPLICA_CNT "replica_count" -#define GLUSTERD_STORE_KEY_VOL_BRICK "brick" -#define GLUSTERD_STORE_KEY_VOL_VERSION "version" -#define GLUSTERD_STORE_KEY_VOL_TRANSPORT "transport-type" -#define GLUSTERD_STORE_KEY_VOL_ID "volume-id" -#define GLUSTERD_STORE_KEY_RB_STATUS "rb_status" -#define GLUSTERD_STORE_KEY_RB_SRC_BRICK "rb_src" -#define GLUSTERD_STORE_KEY_RB_DST_BRICK "rb_dst" -#define GLUSTERD_STORE_KEY_RB_DST_PORT "rb_port" -#define GLUSTERD_STORE_KEY_VOL_DEFRAG "rebalance_status" -#define GLUSTERD_STORE_KEY_DEFRAG_OP "rebalance_op" -#define GLUSTERD_STORE_KEY_USERNAME "username" -#define GLUSTERD_STORE_KEY_PASSWORD "password" -#define GLUSTERD_STORE_KEY_VOL_OP_VERSION "op-version" +#define GLUSTERD_STORE_UUID_KEY "UUID" + +#define GLUSTERD_STORE_KEY_VOL_TYPE "type" +#define GLUSTERD_STORE_KEY_VOL_COUNT "count" +#define GLUSTERD_STORE_KEY_VOL_STATUS "status" +#define GLUSTERD_STORE_KEY_VOL_PORT "port" +#define GLUSTERD_STORE_KEY_VOL_SUB_COUNT "sub_count" +#define GLUSTERD_STORE_KEY_VOL_STRIPE_CNT "stripe_count" +#define GLUSTERD_STORE_KEY_VOL_REPLICA_CNT "replica_count" +#define GLUSTERD_STORE_KEY_VOL_BRICK "brick" +#define GLUSTERD_STORE_KEY_VOL_VERSION "version" +#define GLUSTERD_STORE_KEY_VOL_TRANSPORT "transport-type" +#define GLUSTERD_STORE_KEY_VOL_ID "volume-id" +#define GLUSTERD_STORE_KEY_VOL_IS_RESTORED "is-volume-restored" +#define GLUSTERD_STORE_KEY_RB_STATUS "rb_status" +#define GLUSTERD_STORE_KEY_RB_SRC_BRICK "rb_src" +#define GLUSTERD_STORE_KEY_RB_DST_BRICK "rb_dst" +#define GLUSTERD_STORE_KEY_RB_DST_PORT "rb_port" +#define GLUSTERD_STORE_KEY_VOL_DEFRAG "rebalance_status" +#define GLUSTERD_STORE_KEY_DEFRAG_OP "rebalance_op" +#define GLUSTERD_STORE_KEY_USERNAME "username" +#define GLUSTERD_STORE_KEY_PASSWORD "password" +#define GLUSTERD_STORE_KEY_PARENT_VOLNAME "parent_volname" +#define GLUSTERD_STORE_KEY_VOL_OP_VERSION "op-version" #define GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION "client-op-version" -#define GLUSTERD_STORE_KEY_BRICK_HOSTNAME "hostname" -#define GLUSTERD_STORE_KEY_BRICK_PATH "path" -#define GLUSTERD_STORE_KEY_BRICK_PORT "listen-port" -#define GLUSTERD_STORE_KEY_BRICK_RDMA_PORT "rdma.listen-port" +#define GLUSTERD_STORE_KEY_SNAP_NAME "name" +#define GLUSTERD_STORE_KEY_SNAP_ID "snap-id" +#define GLUSTERD_STORE_KEY_SNAP_DESC "desc" +#define GLUSTERD_STORE_KEY_SNAP_TIMESTAMP "time-stamp" +#define GLUSTERD_STORE_KEY_SNAP_STATUS "status" +#define GLUSTERD_STORE_KEY_SNAP_RESTORED "snap-restored" +#define GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT "snap-max-hard-limit" +#define GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT "snap-max-soft-limit" + +#define GLUSTERD_STORE_KEY_BRICK_HOSTNAME "hostname" +#define GLUSTERD_STORE_KEY_BRICK_PATH "path" +#define GLUSTERD_STORE_KEY_BRICK_PORT "listen-port" +#define GLUSTERD_STORE_KEY_BRICK_RDMA_PORT "rdma.listen-port" #define GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED "decommissioned" -#define GLUSTERD_STORE_KEY_BRICK_VGNAME "vg" -#define GLUSTERD_STORE_KEY_BRICK_ID "brick-id" +#define GLUSTERD_STORE_KEY_BRICK_VGNAME "vg" +#define GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH "device_path" +#define GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS "snap-status" +#define GLUSTERD_STORE_KEY_BRICK_ID "brick-id" -#define GLUSTERD_STORE_KEY_PEER_UUID "uuid" -#define GLUSTERD_STORE_KEY_PEER_HOSTNAME "hostname" -#define GLUSTERD_STORE_KEY_PEER_STATE "state" +#define GLUSTERD_STORE_KEY_PEER_UUID "uuid" +#define GLUSTERD_STORE_KEY_PEER_HOSTNAME "hostname" +#define GLUSTERD_STORE_KEY_PEER_STATE "state" -#define GLUSTERD_STORE_KEY_VOL_CAPS "caps" +#define GLUSTERD_STORE_KEY_VOL_CAPS "caps" #define glusterd_for_each_entry(entry, dir) \ do {\ @@ -94,6 +107,9 @@ int32_t glusterd_store_delete_volume (glusterd_volinfo_t *volinfo); int32_t +glusterd_store_delete_snap (glusterd_snap_t *snap); + +int32_t glusterd_retrieve_uuid (); int32_t @@ -128,8 +144,17 @@ int32_t glusterd_store_retrieve_options (xlator_t *this); int32_t +glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo); + +int32_t glusterd_store_options (xlator_t *this, dict_t *opts); +void +glusterd_replace_slash_with_hyphen (char *str); + +int32_t +glusterd_store_perform_volume_store (glusterd_volinfo_t *volinfo); + int32_t glusterd_store_create_quota_conf_sh_on_absence (glusterd_volinfo_t *volinfo); @@ -139,4 +164,11 @@ glusterd_store_retrieve_quota_version (glusterd_volinfo_t *volinfo); int glusterd_store_save_quota_version_and_cksum (glusterd_volinfo_t *volinfo); +int32_t +glusterd_store_snap (glusterd_snap_t *snap); + +int32_t +glusterd_store_update_missed_snaps (dict_t *dict, + int32_t missed_snap_count); + #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c index 578bce897df..b36d6f61680 100644 --- a/xlators/mgmt/glusterd/src/glusterd-syncop.c +++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c @@ -21,7 +21,7 @@ extern glusterd_op_info_t opinfo; -static inline void +void gd_synctask_barrier_wait (struct syncargs *args, int count) { glusterd_conf_t *conf = THIS->private; @@ -56,17 +56,17 @@ gd_mgmt_v3_collate_errors (struct syncargs *args, int op_ret, int op_errno, "Error: %s", op_errstr); switch (op_code) { - case GLUSTERD_MGMT_V3_VOLUME_LOCK: + case GLUSTERD_MGMT_V3_LOCK: { snprintf (op_err, sizeof(op_err) - 1, - "Locking volume failed " + "Locking failed " "on %s. %s", peer_str, err_str); break; } - case GLUSTERD_MGMT_V3_VOLUME_UNLOCK: + case GLUSTERD_MGMT_V3_UNLOCK: { snprintf (op_err, sizeof(op_err) - 1, - "Unlocking volume failed " + "Unlocking failed " "on %s. %s", peer_str, err_str); break; } @@ -164,7 +164,7 @@ gd_collate_errors (struct syncargs *args, int op_ret, int op_errno, return; } -static void +void gd_syncargs_init (struct syncargs *args, dict_t *op_ctx) { args->dict = op_ctx; @@ -268,7 +268,7 @@ extern struct rpc_clnt_program gd_mgmt_prog; extern struct rpc_clnt_program gd_brick_prog; extern struct rpc_clnt_program gd_mgmt_v3_prog; -static int +int glusterd_syncop_aggr_rsp_dict (glusterd_op_t op, dict_t *aggr, dict_t *rsp) { int ret = 0; @@ -339,6 +339,12 @@ glusterd_syncop_aggr_rsp_dict (glusterd_op_t op, dict_t *aggr, dict_t *rsp) goto out; break; + case GD_OP_SNAP: + ret = glusterd_snap_use_rsp_dict (aggr, rsp); + if (ret) + goto out; + break; + default: break; } @@ -347,13 +353,13 @@ out: } int32_t -_gd_syncop_mgmt_volume_lock_cbk (struct rpc_req *req, struct iovec *iov, +gd_syncop_mgmt_v3_lock_cbk_fn (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { int ret = -1; struct syncargs *args = NULL; glusterd_peerinfo_t *peerinfo = NULL; - gd1_mgmt_volume_lock_rsp rsp = {{0},}; + gd1_mgmt_v3_lock_rsp rsp = {{0},}; call_frame_t *frame = NULL; int op_ret = -1; int op_errno = -1; @@ -374,7 +380,7 @@ _gd_syncop_mgmt_volume_lock_cbk (struct rpc_req *req, struct iovec *iov, } ret = xdr_to_generic (*iov, &rsp, - (xdrproc_t)xdr_gd1_mgmt_volume_lock_rsp); + (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp); if (ret < 0) goto out; @@ -384,7 +390,7 @@ _gd_syncop_mgmt_volume_lock_cbk (struct rpc_req *req, struct iovec *iov, op_errno = rsp.op_errno; out: gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, - GLUSTERD_MGMT_V3_VOLUME_LOCK, + GLUSTERD_MGMT_V3_LOCK, peerinfo, rsp.uuid); STACK_DESTROY (frame->root); synctask_barrier_wake(args); @@ -392,21 +398,21 @@ out: } int32_t -gd_syncop_mgmt_volume_lock_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) +gd_syncop_mgmt_v3_lock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) { return glusterd_big_locked_cbk (req, iov, count, myframe, - _gd_syncop_mgmt_volume_lock_cbk); + gd_syncop_mgmt_v3_lock_cbk_fn); } int -gd_syncop_mgmt_volume_lock (glusterd_op_t op, dict_t *op_ctx, - glusterd_peerinfo_t *peerinfo, - struct syncargs *args, uuid_t my_uuid, - uuid_t recv_uuid, uuid_t txn_id) +gd_syncop_mgmt_v3_lock (glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid, uuid_t txn_id) { int ret = -1; - gd1_mgmt_volume_lock_req req = {{0},}; + gd1_mgmt_v3_lock_req req = {{0},}; glusterd_conf_t *conf = THIS->private; GF_ASSERT(op_ctx); @@ -425,10 +431,10 @@ gd_syncop_mgmt_volume_lock (glusterd_op_t op, dict_t *op_ctx, synclock_unlock (&conf->big_lock); ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, &gd_mgmt_v3_prog, - GLUSTERD_MGMT_V3_VOLUME_LOCK, - gd_syncop_mgmt_volume_lock_cbk, + GLUSTERD_MGMT_V3_LOCK, + gd_syncop_mgmt_v3_lock_cbk, (xdrproc_t) - xdr_gd1_mgmt_volume_lock_req); + xdr_gd1_mgmt_v3_lock_req); synclock_lock (&conf->big_lock); out: gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); @@ -436,13 +442,13 @@ out: } int32_t -_gd_syncop_mgmt_volume_unlock_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) +gd_syncop_mgmt_v3_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) { int ret = -1; struct syncargs *args = NULL; glusterd_peerinfo_t *peerinfo = NULL; - gd1_mgmt_volume_unlock_rsp rsp = {{0},}; + gd1_mgmt_v3_unlock_rsp rsp = {{0},}; call_frame_t *frame = NULL; int op_ret = -1; int op_errno = -1; @@ -463,7 +469,7 @@ _gd_syncop_mgmt_volume_unlock_cbk (struct rpc_req *req, struct iovec *iov, } ret = xdr_to_generic (*iov, &rsp, - (xdrproc_t)xdr_gd1_mgmt_volume_unlock_rsp); + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp); if (ret < 0) goto out; @@ -476,7 +482,7 @@ _gd_syncop_mgmt_volume_unlock_cbk (struct rpc_req *req, struct iovec *iov, op_errno = rsp.op_errno; out: gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, - GLUSTERD_MGMT_V3_VOLUME_UNLOCK, + GLUSTERD_MGMT_V3_UNLOCK, peerinfo, rsp.uuid); STACK_DESTROY (frame->root); synctask_barrier_wake(args); @@ -484,20 +490,20 @@ out: } int32_t -gd_syncop_mgmt_volume_unlock_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) +gd_syncop_mgmt_v3_unlock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) { return glusterd_big_locked_cbk (req, iov, count, myframe, - _gd_syncop_mgmt_volume_unlock_cbk); + gd_syncop_mgmt_v3_unlock_cbk_fn); } int -gd_syncop_mgmt_volume_unlock (dict_t *op_ctx, glusterd_peerinfo_t *peerinfo, - struct syncargs *args, uuid_t my_uuid, - uuid_t recv_uuid, uuid_t txn_id) +gd_syncop_mgmt_v3_unlock (dict_t *op_ctx, glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid, uuid_t txn_id) { int ret = -1; - gd1_mgmt_volume_unlock_req req = {{0},}; + gd1_mgmt_v3_unlock_req req = {{0},}; glusterd_conf_t *conf = THIS->private; GF_ASSERT(op_ctx); @@ -515,10 +521,10 @@ gd_syncop_mgmt_volume_unlock (dict_t *op_ctx, glusterd_peerinfo_t *peerinfo, synclock_unlock (&conf->big_lock); ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, &gd_mgmt_v3_prog, - GLUSTERD_MGMT_V3_VOLUME_UNLOCK, - gd_syncop_mgmt_volume_unlock_cbk, + GLUSTERD_MGMT_V3_UNLOCK, + gd_syncop_mgmt_v3_unlock_cbk, (xdrproc_t) - xdr_gd1_mgmt_volume_unlock_req); + xdr_gd1_mgmt_v3_unlock_req); synclock_lock (&conf->big_lock); out: gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); @@ -884,10 +890,12 @@ gd_syncop_mgmt_brick_op (struct rpc_clnt *rpc, glusterd_pending_node_t *pnode, GD_SYNCOP (rpc, (&args), NULL, gd_syncop_brick_op_cbk, req, &gd_brick_prog, req->op, xdr_gd1_mgmt_brick_op_req); - if (args.errstr && errstr) - *errstr = args.errstr; - else - GF_FREE (args.errstr); + if (args.errstr) { + if ((strlen(args.errstr) > 0) && errstr) + *errstr = args.errstr; + else + GF_FREE (args.errstr); + } if (GD_OP_STATUS_VOLUME == op) { ret = dict_set_int32 (args.dict, "index", pnode->index); @@ -1097,8 +1105,8 @@ gd_lock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, dict_t *op_ctx, gd_syncop_mgmt_lock (peerinfo, &args, MY_UUID, peer_uuid); } else - gd_syncop_mgmt_volume_lock (op, op_ctx, peerinfo, &args, - MY_UUID, peer_uuid, txn_id); + gd_syncop_mgmt_v3_lock (op, op_ctx, peerinfo, &args, + MY_UUID, peer_uuid, txn_id); peer_cnt++; } gd_synctask_barrier_wait((&args), peer_cnt); @@ -1321,7 +1329,7 @@ gd_unlock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, int *op_ret, glusterd_peerinfo_t *peerinfo = NULL; glusterd_peerinfo_t *tmp = NULL; uuid_t tmp_uuid = {0}; - int peer_cnt = 0; + int peer_cnt = 0; int ret = -1; xlator_t *this = NULL; struct syncargs args = {0}; @@ -1358,9 +1366,9 @@ gd_unlock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, int *op_ret, if (volname) { list_for_each_entry_safe (peerinfo, tmp, peers, op_peers_list) { - gd_syncop_mgmt_volume_unlock (op_ctx, peerinfo, - &args, MY_UUID, - tmp_uuid, txn_id); + gd_syncop_mgmt_v3_unlock (op_ctx, peerinfo, + &args, MY_UUID, + tmp_uuid, txn_id); peer_cnt++; list_del_init (&peerinfo->op_peers_list); } @@ -1386,7 +1394,7 @@ out: if (is_acquired) { /* Based on the op-version, - * we release the cluster or volume lock + * we release the cluster or mgmt_v3 lock * and clear the op */ glusterd_op_clear_op (op); @@ -1394,7 +1402,8 @@ out: glusterd_unlock (MY_UUID); else { if (volname) { - ret = glusterd_volume_unlock (volname, MY_UUID); + ret = glusterd_mgmt_v3_unlock (volname, MY_UUID, + "vol"); if (ret) gf_log (this->name, GF_LOG_ERROR, "Unable to release lock for %s", @@ -1551,7 +1560,7 @@ gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req) goto out; } - /* Based on the op_version, acquire a cluster or volume lock */ + /* Based on the op_version, acquire a cluster or mgmt_v3 lock */ if (conf->op_version < GD_OP_VERSION_4) { ret = glusterd_lock (MY_UUID); if (ret) { @@ -1580,7 +1589,7 @@ gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req) goto out; } - ret = glusterd_volume_lock (volname, MY_UUID); + ret = glusterd_mgmt_v3_lock (volname, MY_UUID, "vol"); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Unable to acquire lock for %s", volname); diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.h b/xlators/mgmt/glusterd/src/glusterd-syncop.h index 7e4d47f9a8e..e83ea2f4c46 100644 --- a/xlators/mgmt/glusterd/src/glusterd-syncop.h +++ b/xlators/mgmt/glusterd/src/glusterd-syncop.h @@ -12,6 +12,7 @@ #include "syncop.h" #include "glusterd-sm.h" +#include "glusterd.h" #define GD_SYNC_OPCODE_KEY "sync-mgmt-operation" @@ -51,4 +52,20 @@ int gd_syncop_mgmt_stage_op (struct rpc_clnt *rpc, struct syncargs *arg, int gd_syncop_mgmt_commit_op (struct rpc_clnt *rpc, struct syncargs *arg, uuid_t my_uuid, uuid_t recv_uuid, int op, dict_t *dict_out, dict_t *op_ctx); + +void +gd_synctask_barrier_wait (struct syncargs *args, int count); + +int +gd_build_peers_list (struct list_head *peers, struct list_head *xact_peers, + glusterd_op_t op); +int +gd_brick_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + char **op_errstr); + +int +glusterd_syncop_aggr_rsp_dict (glusterd_op_t op, dict_t *aggr, dict_t *rsp); + +void +gd_syncargs_init (struct syncargs *args, dict_t *op_ctx); #endif /* __RPC_SYNCOP_H */ diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 8c5cd7a43c6..721ffe27fe7 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -491,8 +491,11 @@ glusterd_volinfo_new (glusterd_volinfo_t **volinfo) if (!new_volinfo) goto out; + LOCK_INIT (&new_volinfo->lock); INIT_LIST_HEAD (&new_volinfo->vol_list); + INIT_LIST_HEAD (&new_volinfo->snapvol_list); INIT_LIST_HEAD (&new_volinfo->bricks); + INIT_LIST_HEAD (&new_volinfo->snap_volumes); new_volinfo->dict = dict_new (); if (!new_volinfo->dict) { @@ -508,6 +511,10 @@ glusterd_volinfo_new (glusterd_volinfo_t **volinfo) goto out; } + snprintf (new_volinfo->parent_volname, GLUSTERD_MAX_VOLUME_NAME, "N/A"); + + new_volinfo->snap_max_hard_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + new_volinfo->xl = THIS; pthread_mutex_init (&new_volinfo->reflock, NULL); @@ -520,6 +527,224 @@ out: return ret; } +/* This function will create a new volinfo and then + * dup the entries from volinfo to the new_volinfo. + * + * @param volinfo volinfo which will be duplicated + * @param dup_volinfo new volinfo which will be created + * @param set_userauth if this true then auth info is also set + * + * @return 0 on success else -1 + */ +int32_t +glusterd_volinfo_dup (glusterd_volinfo_t *volinfo, + glusterd_volinfo_t **dup_volinfo, + gf_boolean_t set_userauth) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *new_volinfo = NULL; + + this = THIS; + GF_ASSERT (this); + GF_VALIDATE_OR_GOTO (this->name, volinfo, out); + GF_VALIDATE_OR_GOTO (this->name, dup_volinfo, out); + + ret = glusterd_volinfo_new (&new_volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "not able to create the " + "duplicate volinfo for the volume %s", + volinfo->volname); + goto out; + } + + new_volinfo->type = volinfo->type; + new_volinfo->replica_count = volinfo->replica_count; + new_volinfo->stripe_count = volinfo->stripe_count; + new_volinfo->dist_leaf_count = volinfo->dist_leaf_count; + new_volinfo->sub_count = volinfo->sub_count; + new_volinfo->transport_type = volinfo->transport_type; + new_volinfo->nfs_transport_type = volinfo->nfs_transport_type; + new_volinfo->brick_count = volinfo->brick_count; + + dict_copy (volinfo->dict, new_volinfo->dict); + gd_update_volume_op_versions (new_volinfo); + + if (set_userauth) { + glusterd_auth_set_username (new_volinfo, + volinfo->auth.username); + glusterd_auth_set_password (new_volinfo, + volinfo->auth.password); + } + + *dup_volinfo = new_volinfo; + ret = 0; +out: + if (ret && (NULL != new_volinfo)) { + (void) glusterd_volinfo_delete (new_volinfo); + } + return ret; +} + +/* This function will duplicate brickinfo + * + * @param brickinfo Source brickinfo + * @param dup_brickinfo Destination brickinfo + * + * @return 0 on success else -1 + */ +int32_t +glusterd_brickinfo_dup (glusterd_brickinfo_t *brickinfo, + glusterd_brickinfo_t *dup_brickinfo) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + GF_VALIDATE_OR_GOTO (this->name, brickinfo, out); + GF_VALIDATE_OR_GOTO (this->name, dup_brickinfo, out); + + strcpy (dup_brickinfo->hostname, brickinfo->hostname); + strcpy (dup_brickinfo->path, brickinfo->path); + strcpy (dup_brickinfo->device_path, brickinfo->device_path); + ret = gf_canonicalize_path (dup_brickinfo->path); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to canonicalize " + "brick path"); + goto out; + } + uuid_copy (dup_brickinfo->uuid, brickinfo->uuid); + + dup_brickinfo->port = brickinfo->port; + dup_brickinfo->rdma_port = brickinfo->rdma_port; + if (NULL != brickinfo->logfile) { + dup_brickinfo->logfile = gf_strdup (brickinfo->logfile); + if (NULL == dup_brickinfo->logfile) { + ret = -1; + goto out; + } + } + dup_brickinfo->status = brickinfo->status; + dup_brickinfo->snap_status = brickinfo->snap_status; +out: + return ret; +} + +/* This function will copy snap volinfo to the new + * passed volinfo and regenerate backend store files + * for the restored snap. + * + * @param new_volinfo new volinfo + * @param snap_volinfo volinfo of snap volume + * + * @return 0 on success and -1 on failure + * + * TODO: Duplicate all members of volinfo, e.g. geo-rep sync slaves + */ +int32_t +glusterd_snap_volinfo_restore (dict_t *rsp_dict, + glusterd_volinfo_t *new_volinfo, + glusterd_volinfo_t *snap_volinfo) +{ + int32_t brick_count = -1; + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *new_brickinfo = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + + GF_VALIDATE_OR_GOTO (this->name, new_volinfo, out); + GF_VALIDATE_OR_GOTO (this->name, snap_volinfo, out); + + brick_count = 0; + list_for_each_entry (brickinfo, &snap_volinfo->bricks, brick_list) { + ret = glusterd_brickinfo_new (&new_brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to create " + "new brickinfo"); + goto out; + } + + /* Duplicate brickinfo */ + ret = glusterd_brickinfo_dup (brickinfo, new_brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to dup " + "brickinfo"); + goto out; + } + + /*Update the brickid for the new brick in new volume*/ + GLUSTERD_ASSIGN_BRICKID_TO_BRICKINFO (new_brickinfo, + new_volinfo, + brick_count); + + /* If the brick is not of this peer, or snapshot is missed * + * for the brick do not replace the xattr for it */ + if ((!uuid_compare (brickinfo->uuid, MY_UUID)) && + (brickinfo->snap_status != -1)) { + /* We need to replace the volume id of all the bricks + * to the volume id of the origin volume. new_volinfo + * has the origin volume's volume id*/ + ret = sys_lsetxattr (new_brickinfo->path, + GF_XATTR_VOL_ID_KEY, + new_volinfo->volume_id, + sizeof (new_volinfo->volume_id), + XATTR_REPLACE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "set extended attribute %s on %s. " + "Reason: %s, snap: %s", + GF_XATTR_VOL_ID_KEY, + new_brickinfo->path, strerror (errno), + new_volinfo->volname); + goto out; + } + } + + /* If a snapshot is pending for this brick then + * restore should also be pending + */ + if (brickinfo->snap_status == -1) { + /* Adding missed delete to the dict */ + ret = glusterd_add_missed_snaps_to_dict + (rsp_dict, + snap_volinfo->volname, + brickinfo, + brick_count + 1, + GF_SNAP_OPTION_TYPE_RESTORE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snapshot info " + "for %s:%s in the rsp_dict", + brickinfo->hostname, + brickinfo->path); + goto out; + } + } + + list_add_tail (&new_brickinfo->brick_list, + &new_volinfo->bricks); + /* ownership of new_brickinfo is passed to new_volinfo */ + new_brickinfo = NULL; + brick_count++; + } + + /* Regenerate all volfiles */ + ret = glusterd_create_volfiles_and_notify_services (new_volinfo); + +out: + if (ret && (NULL != new_brickinfo)) { + (void) glusterd_brickinfo_delete (new_brickinfo); + } + + return ret; +} + void glusterd_auth_cleanup (glusterd_volinfo_t *volinfo) { @@ -620,6 +845,7 @@ glusterd_volinfo_delete (glusterd_volinfo_t *volinfo) GF_ASSERT (volinfo); list_del_init (&volinfo->vol_list); + list_del_init (&volinfo->snapvol_list); ret = glusterd_volume_brickinfos_delete (volinfo); if (ret) @@ -645,7 +871,6 @@ out: return ret; } - int32_t glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo) { @@ -1160,6 +1385,42 @@ glusterd_volinfo_find_by_volume_id (uuid_t volume_id, glusterd_volinfo_t **volin return ret; } +int +glusterd_snap_volinfo_find_by_volume_id (uuid_t volume_id, + glusterd_volinfo_t **volinfo) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *voliter = NULL; + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (volinfo); + + if (uuid_is_null(volume_id)) { + gf_log (this->name, GF_LOG_WARNING, "Volume UUID is NULL"); + goto out; + } + + list_for_each_entry (snap, &priv->snapshots, snap_list) { + list_for_each_entry (voliter, &snap->volumes, vol_list) { + if (uuid_compare (volume_id, voliter->volume_id)) + continue; + *volinfo = voliter; + ret = 0; + goto out; + } + } + + gf_log (this->name, GF_LOG_WARNING, "Snap volume not found"); +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + int32_t glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo) { @@ -1169,7 +1430,6 @@ glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo) glusterd_conf_t *priv = NULL; GF_ASSERT (volname); - this = THIS; GF_ASSERT (this); @@ -1178,7 +1438,8 @@ glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo) list_for_each_entry (tmp_volinfo, &priv->volumes, vol_list) { if (!strcmp (tmp_volinfo->volname, volname)) { - gf_log (this->name, GF_LOG_DEBUG, "Volume %s found", volname); + gf_log (this->name, GF_LOG_DEBUG, "Volume %s found", + volname); ret = 0; *volinfo = tmp_volinfo; break; @@ -1190,6 +1451,68 @@ glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo) } int32_t +glusterd_snap_volinfo_find (char *snap_volname, glusterd_snap_t *snap, + glusterd_volinfo_t **volinfo) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (snap); + GF_ASSERT (snap_volname); + + list_for_each_entry (snap_vol, &snap->volumes, vol_list) { + if (!strcmp (snap_vol->volname, snap_volname)) { + ret = 0; + *volinfo = snap_vol; + goto out; + } + } + + gf_log (this->name, GF_LOG_WARNING, "Snap volume %s not found", + snap_volname); +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_snap_volinfo_find_from_parent_volname (char *origin_volname, + glusterd_snap_t *snap, + glusterd_volinfo_t **volinfo) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (snap); + GF_ASSERT (origin_volname); + + list_for_each_entry (snap_vol, &snap->volumes, vol_list) { + if (!strcmp (snap_vol->parent_volname, origin_volname)) { + ret = 0; + *volinfo = snap_vol; + goto out; + } + } + + gf_log (this->name, GF_LOG_DEBUG, "Snap volume not found(snap: %s, " + "origin-volume: %s", snap->snapname, origin_volname); + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t glusterd_service_stop (const char *service, char *pidfile, int sig, gf_boolean_t force_kill) { @@ -1281,10 +1604,9 @@ glusterd_set_brick_socket_filepath (glusterd_volinfo_t *volinfo, */ int32_t glusterd_brick_connect (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) + glusterd_brickinfo_t *brickinfo, char *socketpath) { int ret = 0; - char socketpath[PATH_MAX] = {0}; char volume_id_str[64]; char *brickid = NULL; dict_t *options = NULL; @@ -1293,12 +1615,9 @@ glusterd_brick_connect (glusterd_volinfo_t *volinfo, GF_ASSERT (volinfo); GF_ASSERT (brickinfo); + GF_ASSERT (socketpath); if (brickinfo->rpc == NULL) { - glusterd_set_brick_socket_filepath (volinfo, brickinfo, - socketpath, - sizeof (socketpath)); - /* Setting frame-timeout to 10mins (600seconds). * Unix domain sockets ensures that the connection is reliable. * The default timeout of 30mins used for unreliable network @@ -1379,9 +1698,23 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, priv = this->private; GF_ASSERT (priv); + if (brickinfo->snap_status == -1) { + gf_log (this->name, GF_LOG_INFO, + "Snapshot is pending on %s:%s. " + "Hence not starting the brick", + brickinfo->hostname, + brickinfo->path); + ret = 0; + goto out; + } + ret = _mk_rundir_p (volinfo); if (ret) goto out; + + glusterd_set_brick_socket_filepath (volinfo, brickinfo, socketpath, + sizeof (socketpath)); + GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv); if (gf_is_service_running (pidfile, NULL)) goto connect; @@ -1417,8 +1750,15 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, runner_argprintf (&runner, "--log-file=%s", valgrind_logfile); } - snprintf (volfile, PATH_MAX, "%s.%s.%s", volinfo->volname, - brickinfo->hostname, exp_path); + if (volinfo->is_snap_volume) { + snprintf (volfile, PATH_MAX,"/%s/%s/%s.%s.%s", + GLUSTERD_VOL_SNAP_DIR_PREFIX, + volinfo->snapshot->snapname, volinfo->volname, + brickinfo->hostname, exp_path); + } else { + snprintf (volfile, PATH_MAX, "%s.%s.%s", volinfo->volname, + brickinfo->hostname, exp_path); + } if (volinfo->logdir) { snprintf (logfile, PATH_MAX, "%s/%s.log", @@ -1430,9 +1770,6 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, if (!brickinfo->logfile) brickinfo->logfile = gf_strdup (logfile); - glusterd_set_brick_socket_filepath (volinfo, brickinfo, socketpath, - sizeof (socketpath)); - (void) snprintf (glusterd_uuid, 1024, "*-posix.glusterd-uuid=%s", uuid_utoa (MY_UUID)); runner_add_args (&runner, SBIN_DIR"/glusterfsd", @@ -1480,9 +1817,13 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, brickinfo->rdma_port = rdma_port; connect: - ret = glusterd_brick_connect (volinfo, brickinfo); - if (ret) + ret = glusterd_brick_connect (volinfo, brickinfo, socketpath); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to connect to brick %s:%s on %s", + brickinfo->hostname, brickinfo->path, socketpath); goto out; + } out: return ret; } @@ -1546,10 +1887,10 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo, gf_boolean_t del_brick) { - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; - char pidfile[PATH_MAX] = {0,}; - int ret = 0; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + char pidfile[PATH_MAX] = {0,}; + int ret = 0; GF_ASSERT (volinfo); GF_ASSERT (brickinfo); @@ -1803,7 +2144,7 @@ out: } int glusterd_compute_cksum (glusterd_volinfo_t *volinfo, - gf_boolean_t is_quota_conf) + gf_boolean_t is_quota_conf) { int ret = -1; uint32_t cs = 0; @@ -1918,7 +2259,10 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, glusterd_dict_ctx_t ctx = {0}; char *rebalance_id_str = NULL; char *rb_id_str = NULL; + xlator_t *this = NULL; + this = THIS; + GF_ASSERT (this); GF_ASSERT (dict); GF_ASSERT (volinfo); @@ -1933,6 +2277,15 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, if (ret) goto out; + snprintf (key, sizeof (key), "volume%d.is_volume_restored", count); + ret = dict_set_int32 (dict, key, volinfo->is_volume_restored); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to set " + "is_volume_restored option for %s volume", + volinfo->volname); + goto out; + } + memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.brick_count", count); ret = dict_set_int32 (dict, key, volinfo->brick_count); @@ -1987,6 +2340,20 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, if (ret) goto out; + snprintf (key, sizeof (key), "volume%d.is_snap_volume", count); + ret = dict_set_uint32 (dict, key, volinfo->is_snap_volume); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Unable to set %s", key); + goto out; + } + + snprintf (key, sizeof (key), "volume%d.snap-max-hard-limit", count); + ret = dict_set_uint64 (dict, key, volinfo->snap_max_hard_limit); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Unable to set %s", key); + goto out; + } + volume_id_str = gf_strdup (uuid_utoa (volinfo->volume_id)); if (!volume_id_str) { ret = -1; @@ -2162,6 +2529,28 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, if (ret) goto out; + snprintf (key, sizeof (key), "volume%d.brick%d.snap_status", + count, i); + ret = dict_set_int32 (dict, key, brickinfo->snap_status); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set snap_status for %s:%s", + brickinfo->hostname, + brickinfo->path); + goto out; + } + + snprintf (key, sizeof (key), "volume%d.brick%d.device_path", + count, i); + ret = dict_set_str (dict, key, brickinfo->device_path); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set snap_device for %s:%s", + brickinfo->hostname, + brickinfo->path); + goto out; + } + i++; } @@ -2716,7 +3105,7 @@ glusterd_do_volume_quorum_action (xlator_t *this, glusterd_volinfo_t *volinfo, gf_boolean_t meets_quorum) { glusterd_brickinfo_t *brickinfo = NULL; - glusterd_conf_t *conf = NULL; + glusterd_conf_t *conf = NULL; conf = this->private; if (volinfo->status != GLUSTERD_STATUS_STARTED) @@ -2834,6 +3223,8 @@ glusterd_import_new_brick (dict_t *vols, int32_t vol_count, { char key[512] = {0,}; int ret = -1; + int32_t snap_status = 0; + char *snap_device = NULL; char *hostname = NULL; char *path = NULL; char *brick_id = NULL; @@ -2877,12 +3268,30 @@ glusterd_import_new_brick (dict_t *vols, int32_t vol_count, ret = 0; } + snprintf (key, sizeof (key), "volume%d.brick%d.snap_status", + vol_count, brick_count); + ret = dict_get_int32 (vols, key, &snap_status); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload", key); + goto out; + } + + snprintf (key, sizeof (key), "volume%d.brick%d.device_path", + vol_count, brick_count); + ret = dict_get_str (vols, key, &snap_device); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload", key); + goto out; + } + ret = glusterd_brickinfo_new (&new_brickinfo); if (ret) goto out; strcpy (new_brickinfo->path, path); strcpy (new_brickinfo->hostname, hostname); + strcpy (new_brickinfo->device_path, snap_device); + new_brickinfo->snap_status = snap_status; new_brickinfo->decommissioned = decommissioned; if (brick_id) strcpy (new_brickinfo->brick_id, brick_id); @@ -3092,6 +3501,7 @@ glusterd_import_volinfo (dict_t *vols, int count, char *rb_id_str = NULL; int op_version = 0; int client_op_version = 0; + uint32_t is_snap_volume = 0; GF_ASSERT (vols); GF_ASSERT (volinfo); @@ -3103,6 +3513,22 @@ glusterd_import_volinfo (dict_t *vols, int count, goto out; } + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.is_snap_volume", count); + ret = dict_get_uint32 (vols, key, &is_snap_volume); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload for %s", + key, volname); + goto out; + } + + if (is_snap_volume == _gf_true) { + gf_log (THIS->name, GF_LOG_DEBUG, + "Not syncing snap volume %s", volname); + ret = 0; + goto out; + } + ret = glusterd_volinfo_new (&new_volinfo); if (ret) goto out; @@ -3229,6 +3655,25 @@ glusterd_import_volinfo (dict_t *vols, int count, goto out; } + new_volinfo->is_snap_volume = is_snap_volume; + + snprintf (key, sizeof (key), "volume%d.is_volume_restored", count); + ret = dict_get_uint32 (vols, key, &new_volinfo->is_volume_restored); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to get " + "is_volume_restored option for %s", + volname); + goto out; + } + + snprintf (key, sizeof (key), "volume%d.snap-max-hard-limit", count); + ret = dict_get_uint64 (vols, key, &new_volinfo->snap_max_hard_limit); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload for %s", + key, volname); + goto out; + } + memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.rebalance", count); ret = dict_get_uint32 (vols, key, &new_volinfo->rebal.defrag_cmd); @@ -3568,6 +4013,12 @@ glusterd_import_friend_volume (dict_t *vols, size_t count) if (ret) goto out; + if (!new_volinfo) { + gf_log (this->name, GF_LOG_DEBUG, + "Not importing snap volume"); + goto out; + } + ret = glusterd_volinfo_find (new_volinfo->volname, &old_volinfo); if (0 == ret) { (void) gd_check_and_update_rebalance_info (old_volinfo, @@ -4756,20 +5207,42 @@ out: int glusterd_restart_bricks (glusterd_conf_t *conf) { + int ret = 0; glusterd_volinfo_t *volinfo = NULL; glusterd_brickinfo_t *brickinfo = NULL; + glusterd_snap_t *snap = NULL; gf_boolean_t start_nodesvcs = _gf_false; - int ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); list_for_each_entry (volinfo, &conf->volumes, vol_list) { if (volinfo->status != GLUSTERD_STATUS_STARTED) continue; start_nodesvcs = _gf_true; + gf_log (this->name, GF_LOG_DEBUG, "starting the volume %s", + volinfo->volname); list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { glusterd_brick_start (volinfo, brickinfo, _gf_false); } } + list_for_each_entry (snap, &conf->snapshots, snap_list) { + list_for_each_entry (volinfo, &snap->volumes, vol_list) { + if (volinfo->status != GLUSTERD_STATUS_STARTED) + continue; + start_nodesvcs = _gf_true; + gf_log (this->name, GF_LOG_DEBUG, "starting the snap " + "volume %s", volinfo->volname); + list_for_each_entry (brickinfo, &volinfo->bricks, + brick_list) { + glusterd_brick_start (volinfo, brickinfo, + _gf_false); + } + } + } + if (start_nodesvcs) glusterd_nodesvcs_handle_graph_change (NULL); @@ -4986,7 +5459,7 @@ out: } #ifdef GF_LINUX_HOST_OS -static int +int glusterd_get_brick_root (char *path, char **mount_point) { char *ptr = NULL; @@ -5165,6 +5638,31 @@ glusterd_add_inode_size_to_dict (dict_t *dict, int count) return ret; } +struct mntent * +glusterd_get_mnt_entry_info (char *mnt_pt, FILE *mtab) +{ + struct mntent *entry = NULL; + + mtab = setmntent (_PATH_MOUNTED, "r"); + if (!mtab) + goto out; + + entry = getmntent (mtab); + + while (1) { + if (!entry) + goto out; + + if (!strcmp (entry->mnt_dir, mnt_pt) && + strcmp (entry->mnt_type, "rootfs")) + break; + entry = getmntent (mtab); + } + +out: + return entry; +} + static int glusterd_add_brick_mount_details (glusterd_brickinfo_t *brickinfo, dict_t *dict, int count) @@ -5176,8 +5674,8 @@ glusterd_add_brick_mount_details (glusterd_brickinfo_t *brickinfo, char *fs_name = NULL; char *mnt_options = NULL; char *device = NULL; - FILE *mtab = NULL; struct mntent *entry = NULL; + FILE *mtab = NULL; snprintf (base_key, sizeof (base_key), "brick%d", count); @@ -5185,25 +5683,12 @@ glusterd_add_brick_mount_details (glusterd_brickinfo_t *brickinfo, if (ret) goto out; - mtab = setmntent (_PATH_MOUNTED, "r"); - if (!mtab) { + entry = glusterd_get_mnt_entry_info (mnt_pt, mtab); + if (!entry) { ret = -1; goto out; } - entry = getmntent (mtab); - - while (1) { - if (!entry) { - ret = -1; - goto out; - } - if (!strcmp (entry->mnt_dir, mnt_pt) && - strcmp (entry->mnt_type, "rootfs")) - break; - entry = getmntent (mtab); - } - /* get device file */ memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "%s.device", base_key); @@ -5236,6 +5721,45 @@ glusterd_add_brick_mount_details (glusterd_brickinfo_t *brickinfo, return ret; } + +char* +glusterd_get_brick_mount_details (glusterd_brickinfo_t *brickinfo) +{ + int ret = -1; + char *mnt_pt = NULL; + char *device = NULL; + FILE *mtab = NULL; + struct mntent *entry = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (brickinfo); + + ret = glusterd_get_brick_root (brickinfo->path, &mnt_pt); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get mount point " + "for %s brick", brickinfo->path); + goto out; + } + + entry = glusterd_get_mnt_entry_info (mnt_pt, mtab); + if (NULL == entry) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get mnt entry " + "for %s mount path", mnt_pt); + goto out; + } + + /* get the fs_name/device */ + device = gf_strdup (entry->mnt_fsname); + +out: + if (NULL != mtab) { + endmntent (mtab); + } + + return device; +} #endif int @@ -8238,6 +8762,279 @@ out: } int +glusterd_snap_config_use_rsp_dict (dict_t *dst, dict_t *src) +{ + char buf[PATH_MAX] = ""; + char *volname = NULL; + int ret = -1; + int config_command = 0; + uint64_t i = 0; + uint64_t value = 0; + uint64_t voldisplaycount = 0; + + if (!dst || !src) { + gf_log ("", GF_LOG_ERROR, "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32 (dst, "config-command", &config_command); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "failed to get config-command type"); + goto out; + } + + switch (config_command) { + case GF_SNAP_CONFIG_DISPLAY: + ret = dict_get_uint64 (src, "snap-max-hard-limit", &value); + if (!ret) { + ret = dict_set_uint64 (dst, "snap-max-hard-limit", value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set snap_max_hard_limit"); + goto out; + } + } else { + /* Received dummy response from other nodes */ + ret = 0; + goto out; + } + + ret = dict_get_uint64 (src, "snap-max-soft-limit", &value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get snap_max_soft_limit"); + goto out; + } + + ret = dict_set_uint64 (dst, "snap-max-soft-limit", value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set snap_max_soft_limit"); + goto out; + } + + ret = dict_get_uint64 (src, "voldisplaycount", + &voldisplaycount); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get voldisplaycount"); + goto out; + } + + ret = dict_set_uint64 (dst, "voldisplaycount", + voldisplaycount); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set voldisplaycount"); + goto out; + } + + for (i = 0; i < voldisplaycount; i++) { + snprintf (buf, sizeof(buf), "volume%ld-volname", i); + ret = dict_get_str (src, buf, &volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get %s", buf); + goto out; + } + ret = dict_set_str (dst, buf, volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-snap-max-hard-limit", i); + ret = dict_get_uint64 (src, buf, &value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get %s", buf); + goto out; + } + ret = dict_set_uint64 (dst, buf, value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-active-hard-limit", i); + ret = dict_get_uint64 (src, buf, &value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get %s", buf); + goto out; + } + ret = dict_set_uint64 (dst, buf, value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-snap-max-soft-limit", i); + ret = dict_get_uint64 (src, buf, &value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get %s", buf); + goto out; + } + ret = dict_set_uint64 (dst, buf, value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set %s", buf); + goto out; + } + } + + break; + default: + break; + } + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +/* Aggregate missed_snap_counts from different nodes and save it * + * in the req_dict of the originator node */ +int +glusterd_snap_create_use_rsp_dict (dict_t *dst, dict_t *src) +{ + char *buf = NULL; + char *tmp_str = NULL; + char name_buf[PATH_MAX] = ""; + int32_t i = -1; + int32_t ret = -1; + int32_t src_missed_snap_count = -1; + int32_t dst_missed_snap_count = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + if (!dst || !src) { + gf_log (this->name, GF_LOG_ERROR, "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32 (src, "missed_snap_count", + &src_missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "No missed snaps"); + ret = 0; + goto out; + } + + ret = dict_get_int32 (dst, "missed_snap_count", + &dst_missed_snap_count); + if (ret) { + /* Initialize dst_missed_count for the first time */ + dst_missed_snap_count = 0; + } + + for (i = 0; i < src_missed_snap_count; i++) { + snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", + i); + ret = dict_get_str (src, name_buf, &buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch %s", name_buf); + goto out; + } + + snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", + dst_missed_snap_count); + + tmp_str = gf_strdup (buf); + if (!tmp_str) { + ret = -1; + goto out; + } + + ret = dict_set_dynstr (dst, name_buf, tmp_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set %s", name_buf); + goto out; + } + + tmp_str = NULL; + dst_missed_snap_count++; + } + + ret = dict_set_int32 (dst, "missed_snap_count", dst_missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set dst_missed_snap_count"); + goto out; + } + +out: + if (ret && tmp_str) + GF_FREE(tmp_str); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_snap_use_rsp_dict (dict_t *dst, dict_t *src) +{ + int ret = -1; + int32_t snap_command = 0; + + if (!dst || !src) { + gf_log ("", GF_LOG_ERROR, "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32 (dst, "type", &snap_command); + if (ret) { + gf_log ("", GF_LOG_ERROR, "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case GF_SNAP_OPTION_TYPE_CREATE: + case GF_SNAP_OPTION_TYPE_DELETE: + ret = glusterd_snap_create_use_rsp_dict (dst, src); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to use rsp dict"); + goto out; + } + break; + case GF_SNAP_OPTION_TYPE_CONFIG: + ret = glusterd_snap_config_use_rsp_dict (dst, src); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to use rsp dict"); + goto out; + } + break; + default: + // copy the response dictinary's contents to the dict to be + // sent back to the cli + dict_copy (src, dst); + break; + } + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int glusterd_sys_exec_output_rsp_dict (dict_t *dst, dict_t *src) { char output_name[PATH_MAX] = ""; @@ -9146,6 +9943,102 @@ out: return is_status_tasks; } +int +glusterd_compare_snap_time(struct list_head *list1, struct list_head *list2) +{ + glusterd_snap_t *snap1 = NULL; + glusterd_snap_t *snap2 = NULL; + double diff_time = 0; + + GF_ASSERT (list1); + GF_ASSERT (list2); + + snap1 = list_entry(list1, glusterd_snap_t, snap_list); + snap2 = list_entry(list2, glusterd_snap_t, snap_list); + diff_time = difftime(snap1->time_stamp, snap2->time_stamp); + + return ((int)diff_time); +} + +int +glusterd_compare_snap_vol_time(struct list_head *list1, struct list_head *list2) +{ + glusterd_volinfo_t *snapvol1 = NULL; + glusterd_volinfo_t *snapvol2 = NULL; + double diff_time = 0; + + GF_ASSERT (list1); + GF_ASSERT (list2); + + snapvol1 = list_entry(list1, glusterd_volinfo_t, snapvol_list); + snapvol2 = list_entry(list2, glusterd_volinfo_t, snapvol_list); + diff_time = difftime(snapvol1->snapshot->time_stamp, + snapvol2->snapshot->time_stamp); + + return ((int)diff_time); +} + +int32_t +glusterd_missed_snapinfo_new (glusterd_missed_snap_info **missed_snapinfo) +{ + glusterd_missed_snap_info *new_missed_snapinfo = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (missed_snapinfo); + + new_missed_snapinfo = GF_CALLOC (1, sizeof(*new_missed_snapinfo), + gf_gld_mt_missed_snapinfo_t); + + if (!new_missed_snapinfo) + goto out; + + new_missed_snapinfo->node_snap_info = NULL; + INIT_LIST_HEAD (&new_missed_snapinfo->missed_snaps); + INIT_LIST_HEAD (&new_missed_snapinfo->snap_ops); + + *missed_snapinfo = new_missed_snapinfo; + + ret = 0; + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_missed_snap_op_new (glusterd_snap_op_t **snap_op) +{ + glusterd_snap_op_t *new_snap_op = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (snap_op); + + new_snap_op = GF_CALLOC (1, sizeof(*new_snap_op), + gf_gld_mt_missed_snapinfo_t); + + if (!new_snap_op) + goto out; + + new_snap_op->brick_path = NULL; + new_snap_op->brick_num = -1; + new_snap_op->op = -1; + new_snap_op->status = -1; + INIT_LIST_HEAD (&new_snap_op->snap_ops_list); + + *snap_op = new_snap_op; + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + /* Tells if rebalance needs to be started for the given volume on the peer * * Rebalance should be started on a peer only if an involved brick is present on diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index ec59d914336..1964c88c5c3 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -85,6 +85,11 @@ glusterd_submit_request (struct rpc_clnt *rpc, void *req, int32_t glusterd_volinfo_new (glusterd_volinfo_t **volinfo); +int32_t +glusterd_volinfo_dup (glusterd_volinfo_t *volinfo, + glusterd_volinfo_t **dup_volinfo, + gf_boolean_t set_userauth); + char * glusterd_auth_get_username (glusterd_volinfo_t *volinfo); @@ -119,11 +124,23 @@ int32_t glusterd_peer_hostname_new (char *hostname, glusterd_peer_hostname_t **name); int32_t +glusterd_snap_volinfo_find (char *volname, glusterd_snap_t *snap, + glusterd_volinfo_t **volinfo); +int32_t +glusterd_snap_volinfo_find_from_parent_volname (char *origin_volname, + glusterd_snap_t *snap, + glusterd_volinfo_t **volinfo); + +int32_t glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo); int glusterd_volinfo_find_by_volume_id (uuid_t volume_id, glusterd_volinfo_t **volinfo); +int +glusterd_snap_volinfo_find_by_volume_id (uuid_t volume_id, + glusterd_volinfo_t **volinfo); + int32_t glusterd_service_stop(const char *service, char *pidfile, int sig, gf_boolean_t force_kill); @@ -302,6 +319,7 @@ glusterd_is_defrag_on (glusterd_volinfo_t *volinfo); int32_t glusterd_volinfo_bricks_delete (glusterd_volinfo_t *volinfo); + int glusterd_friend_find_by_uuid (uuid_t uuid, glusterd_peerinfo_t **peerinfo); @@ -371,7 +389,7 @@ gf_boolean_t glusterd_peerinfo_is_uuid_unknown (glusterd_peerinfo_t *peerinfo); int32_t glusterd_brick_connect (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo); + glusterd_brickinfo_t *brickinfo, char *socketpath); int32_t glusterd_brick_disconnect (glusterd_brickinfo_t *brickinfo); int32_t @@ -379,12 +397,16 @@ glusterd_delete_volume (glusterd_volinfo_t *volinfo); int32_t glusterd_delete_brick (glusterd_volinfo_t* volinfo, glusterd_brickinfo_t *brickinfo); + int32_t glusterd_delete_all_bricks (glusterd_volinfo_t* volinfo); + int glusterd_spawn_daemons (void *opaque); + int glusterd_restart_gsyncds (glusterd_conf_t *conf); + int glusterd_start_gsync (glusterd_volinfo_t *master_vol, char *slave, char *path_list, char *conf_path, @@ -510,6 +532,8 @@ int glusterd_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict); int glusterd_sys_exec_output_rsp_dict (dict_t *aggr, dict_t *rsp_dict); +int +glusterd_snap_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict); int32_t glusterd_handle_node_rsp (dict_t *req_ctx, void *pending_entry, glusterd_op_t op, dict_t *rsp_dict, dict_t *op_ctx, @@ -660,4 +684,37 @@ glusterd_rpc_clnt_unref (glusterd_conf_t *conf, rpc_clnt_t *rpc); int32_t glusterd_compare_volume_name(struct list_head *, struct list_head *); +#ifdef GF_LINUX_HOST_OS +char* +glusterd_get_brick_mount_details (glusterd_brickinfo_t *brickinfo); +struct mntent * +glusterd_get_mnt_entry_info (char *mnt_pt, FILE *mtab); +int +glusterd_get_brick_root (char *path, char **mount_point); +#endif //LINUX_HOST + +int +glusterd_compare_snap_time(struct list_head *, struct list_head *); + +int +glusterd_compare_snap_vol_time(struct list_head *, struct list_head *); + +int32_t +glusterd_snap_volinfo_restore (dict_t *rsp_dict, + glusterd_volinfo_t *new_volinfo, + glusterd_volinfo_t *snap_volinfo); +int32_t +glusterd_lvm_snapshot_remove (dict_t *rsp_dict, glusterd_volinfo_t *snap_vol); + +int32_t +glusterd_missed_snapinfo_new (glusterd_missed_snap_info **missed_snapinfo); + +int32_t +glusterd_missed_snap_op_new (glusterd_snap_op_t **snap_op); + +int32_t +glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict, char *snap_uuid, + glusterd_brickinfo_t *brickinfo, + int32_t brick_number, int32_t op); + #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 9012003c98a..78395827937 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -28,6 +28,8 @@ #include "logging.h" #include "dict.h" #include "graph-utils.h" +#include "glusterd-store.h" +#include "glusterd-hooks.h" #include "trie.h" #include "glusterd-mem-types.h" #include "cli1-xdr.h" @@ -1648,7 +1650,8 @@ server_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, } /* Check for read-only volume option, and add it to the graph */ - if (dict_get_str_boolean (set_dict, "features.read-only", 0)) { + if (dict_get_str_boolean (set_dict, "features.read-only", 0) + || volinfo -> is_snap_volume) { xl = volgen_graph_add (graph, "features/read-only", volname); if (!xl) { ret = -1; @@ -3553,6 +3556,8 @@ generate_client_volfiles (glusterd_volinfo_t *volinfo, out: if (dict) dict_unref (dict); + + gf_log ("", GF_LOG_TRACE, "Returning %d", ret); return ret; } @@ -4182,3 +4187,128 @@ gd_is_boolean_option (char *key) return _gf_false; } + +/* This function will restore origin volume to it's snap. + * The restore operation will simply replace the Gluster origin + * volume with the snap volume. + * TODO: Multi-volume delete to be done. + * Cleanup in case of restore failure is pending. + * + * @param orig_vol volinfo of origin volume + * @param snap_vol volinfo of snapshot volume + * + * @return 0 on success and negative value on error + */ +int +gd_restore_snap_volume (dict_t *rsp_dict, + glusterd_volinfo_t *orig_vol, + glusterd_volinfo_t *snap_vol) +{ + int ret = -1; + glusterd_volinfo_t *new_volinfo = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *temp_volinfo = NULL; + glusterd_volinfo_t *voliter = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + conf = this->private; + GF_ASSERT (conf); + + GF_VALIDATE_OR_GOTO (this->name, orig_vol, out); + GF_VALIDATE_OR_GOTO (this->name, snap_vol, out); + snap = snap_vol->snapshot; + GF_VALIDATE_OR_GOTO (this->name, snap, out); + + /* Snap volume must be stoped before performing the + * restore operation. + */ + ret = glusterd_stop_volume (snap_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to stop " + "snap volume(%s)", snap_vol->volname); + goto out; + } + + /* Create a new volinfo for the restored volume */ + ret = glusterd_volinfo_dup (snap_vol, &new_volinfo, _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to create volinfo"); + goto out; + } + + /* Following entries need to be derived from origin volume. */ + strcpy (new_volinfo->volname, orig_vol->volname); + uuid_copy (new_volinfo->volume_id, orig_vol->volume_id); + new_volinfo->snap_count = orig_vol->snap_count; + new_volinfo->snap_max_hard_limit = orig_vol->snap_max_hard_limit; + new_volinfo->is_volume_restored = _gf_true; + + /* Bump the version of the restored volume, so that nodes * + * which are done can sync during handshake */ + new_volinfo->version = orig_vol->version; + + list_for_each_entry_safe (voliter, temp_volinfo, + &orig_vol->snap_volumes, snapvol_list) { + list_add_tail (&voliter->snapvol_list, + &new_volinfo->snap_volumes); + } + /* Copy the snap vol info to the new_volinfo.*/ + ret = glusterd_snap_volinfo_restore (rsp_dict, new_volinfo, snap_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to restore snap"); + (void)glusterd_volinfo_delete (new_volinfo); + goto out; + } + + /* If the orig_vol is already restored then we should delete + * the backend LVMs */ + if (orig_vol->is_volume_restored) { + ret = glusterd_lvm_snapshot_remove (rsp_dict, orig_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to remove " + "LVM backend"); + (void)glusterd_volinfo_delete (new_volinfo); + goto out; + } + } + + /* Once the new_volinfo is completely constructed then delete + * the orinal volinfo + */ + ret = glusterd_volinfo_delete (orig_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to delete volinfo"); + (void)glusterd_volinfo_delete (new_volinfo); + goto out; + } + /* New volinfo always shows the status as created. Therefore + * set the status to stop. */ + glusterd_set_volume_status (new_volinfo, GLUSTERD_STATUS_STOPPED); + + list_add_tail (&new_volinfo->vol_list, &conf->volumes); + + /* Now delete the snap entry. As a first step delete the snap + * volume information stored in store. */ + ret = glusterd_snap_remove (rsp_dict, snap, _gf_false, _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Failed to delete " + "snap %s", snap->snapname); + goto out; + } + + ret = glusterd_store_volinfo (new_volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to store volinfo"); + goto out; + } + + ret = 0; +out: + + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.h b/xlators/mgmt/glusterd/src/glusterd-volgen.h index 1683f905022..1f9416106c4 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.h +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.h @@ -126,6 +126,10 @@ int glusterd_create_quotad_volfile (); int glusterd_delete_volfile (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo); +int +glusterd_delete_snap_volfile (glusterd_volinfo_t *volinfo, + glusterd_volinfo_t *snap_volinfo, + glusterd_brickinfo_t *brickinfo); int glusterd_volinfo_get (glusterd_volinfo_t *volinfo, char *key, char **value); int glusterd_volinfo_get_boolean (glusterd_volinfo_t *volinfo, char *key); @@ -139,8 +143,15 @@ glusterd_check_voloption_flags (char *key, int32_t flags); gf_boolean_t glusterd_is_valid_volfpath (char *volname, char *brick); int generate_brick_volfiles (glusterd_volinfo_t *volinfo); +int generate_snap_brick_volfiles (glusterd_volinfo_t *volinfo, + glusterd_volinfo_t *snap_volinfo); int generate_client_volfiles (glusterd_volinfo_t *volinfo, glusterd_client_type_t client_type); +int +generate_snap_client_volfiles (glusterd_volinfo_t *actual_volinfo, + glusterd_volinfo_t *snap_volinfo, + glusterd_client_type_t client_type, + gf_boolean_t vol_restore); int glusterd_get_volopt_content (dict_t *dict, gf_boolean_t xml_out); char* glusterd_get_trans_type_rb (gf_transport_type ttype); @@ -161,4 +172,7 @@ gd_is_xlator_option (char *key); gf_boolean_t gd_is_boolean_option (char *key); +int gd_restore_snap_volume (dict_t *rsp_dict, + glusterd_volinfo_t *orig_vol, + glusterd_volinfo_t *snap_vol); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 135faa40ac5..eac926d9533 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -1211,6 +1211,16 @@ glusterd_op_stage_delete_volume (dict_t *dict, char **op_errstr) goto out; } + if (volinfo->snap_count > 0 || !list_empty(&volinfo->snap_volumes)) { + snprintf (msg, sizeof (msg), "Cannot delete Volume %s ," + "as it has %ld snapshots. " + "To delete the volume, " + "first delete all the snapshots under it.", + volname, volinfo->snap_count); + ret = -1; + goto out; + } + ret = 0; out: @@ -1772,54 +1782,48 @@ out: return ret; } - int -glusterd_op_stop_volume (dict_t *dict) +glusterd_stop_volume (glusterd_volinfo_t *volinfo) { - int ret = 0; - int flags = 0; - char *volname = NULL; - glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - xlator_t *this = NULL; - char mountdir[PATH_MAX] = {0,}; - runner_t runner = {0,}; - char pidfile[PATH_MAX] = {0,}; + int ret = -1; + glusterd_brickinfo_t *brickinfo = NULL; + char mountdir[PATH_MAX] = {0,}; + runner_t runner = {0,}; + char pidfile[PATH_MAX] = {0,}; + xlator_t *this = NULL; this = THIS; GF_ASSERT (this); - ret = glusterd_op_stop_volume_args_get (dict, &volname, &flags); - if (ret) - goto out; - - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, FMTSTR_CHECK_VOL_EXISTS, - volname); - goto out; - } + GF_VALIDATE_OR_GOTO (this->name, volinfo, out); list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { ret = glusterd_brick_stop (volinfo, brickinfo, _gf_false); - if (ret) + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to stop " + "brick (%s)", brickinfo->path); goto out; + } } glusterd_set_volume_status (volinfo, GLUSTERD_STATUS_STOPPED); ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); - if (ret) + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to store volinfo of " + "%s volume", volinfo->volname); goto out; + } /* If quota auxiliary mount is present, unmount it */ - GLUSTERFS_GET_AUX_MOUNT_PIDFILE (pidfile, volname); + GLUSTERFS_GET_AUX_MOUNT_PIDFILE (pidfile, volinfo->volname); if (!gf_is_service_running (pidfile, NULL)) { gf_log (this->name, GF_LOG_DEBUG, "Aux mount of volume %s " - "absent", volname); + "absent", volinfo->volname); } else { - GLUSTERD_GET_QUOTA_AUX_MOUNT_PATH (mountdir, volname, "/"); + GLUSTERD_GET_QUOTA_AUX_MOUNT_PATH (mountdir, volinfo->volname, + "/"); runinit (&runner); runner_add_args (&runner, "umount", @@ -1837,6 +1841,45 @@ glusterd_op_stop_volume (dict_t *dict) } ret = glusterd_nodesvcs_handle_graph_change (volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to notify graph " + "change for %s volume", volinfo->volname); + goto out; + } + +out: + return ret; +} + +int +glusterd_op_stop_volume (dict_t *dict) +{ + int ret = 0; + int flags = 0; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + ret = glusterd_op_stop_volume_args_get (dict, &volname, &flags); + if (ret) + goto out; + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, FMTSTR_CHECK_VOL_EXISTS, + volname); + goto out; + } + + ret = glusterd_stop_volume (volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to stop %s volume", + volname); + goto out; + } out: return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index a5f6945450e..85a6b920adf 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -56,6 +56,8 @@ extern struct rpcsvc_program gd_svc_cli_prog_ro; extern struct rpc_clnt_program gd_brick_prog; extern struct rpcsvc_program glusterd_mgmt_hndsk_prog; +extern char snap_mount_folder[PATH_MAX]; + rpcsvc_cbk_program_t glusterd_cbk_prog = { .progname = "Gluster Callback", .prognum = GLUSTER_CBK_PROGRAM, @@ -110,6 +112,7 @@ const char *gd_op_list[GD_OP_MAX + 1] = { [GD_OP_COPY_FILE] = "Copy File", [GD_OP_SYS_EXEC] = "Execute system commands", [GD_OP_GSYNC_CREATE] = "Geo-replication Create", + [GD_OP_SNAP] = "Snapshot", [GD_OP_MAX] = "Invalid op" }; @@ -1092,6 +1095,71 @@ glusterd_stop_uds_listener (xlator_t *this) return; } +static int +glusterd_init_snap_folder (xlator_t *this) +{ + int ret = -1; + struct stat buf = {0,}; + + GF_ASSERT (this); + + /* Snapshot volumes are mounted under /var/run/gluster/snaps folder. + * But /var/run is normally a symbolic link to /run folder, which + * creates problems as the entry point in the mtab for the mount point + * and glusterd maintained entry point will be different. Therefore + * identify the correct run folder and use it for snap volume mounting. + */ + ret = lstat (GLUSTERD_VAR_RUN_DIR, &buf); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "stat fails on %s, exiting. (errno = %d)", + GLUSTERD_VAR_RUN_DIR, errno); + goto out; + } + + /* If /var/run is symlink then use /run folder */ + if (S_ISLNK (buf.st_mode)) { + strcpy (snap_mount_folder, GLUSTERD_RUN_DIR); + } else { + strcpy (snap_mount_folder, GLUSTERD_VAR_RUN_DIR); + } + + strcat (snap_mount_folder, GLUSTERD_DEFAULT_SNAPS_BRICK_DIR); + + ret = stat (snap_mount_folder, &buf); + if ((ret != 0) && (ENOENT != errno)) { + gf_log (this->name, GF_LOG_ERROR, + "stat fails on %s, exiting. (errno = %d)", + snap_mount_folder, errno); + ret = -1; + goto out; + } + + if ((!ret) && (!S_ISDIR(buf.st_mode))) { + gf_log (this->name, GF_LOG_CRITICAL, + "Provided snap path %s is not a directory," + "exiting", snap_mount_folder); + ret = -1; + goto out; + } + + if ((-1 == ret) && (ENOENT == errno)) { + /* Create missing folders */ + ret = mkdir_p (snap_mount_folder, 0777, _gf_false); + + if (-1 == ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "Unable to create directory %s" + " ,errno = %d", snap_mount_folder, errno); + goto out; + } + } + +out: + return ret; +} + + /* * init - called during glusterd initialization * @@ -1158,6 +1226,14 @@ init (xlator_t *this) gf_log (this->name, GF_LOG_INFO, "Using %s as working directory", workdir); + ret = glusterd_init_snap_folder (this); + + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, "Unable to create " + "snap backend folder"); + exit (1); + } + snprintf (cmd_log_filename, PATH_MAX,"%s/.cmd_log_history", DEFAULT_LOG_FILE_DIRECTORY); ret = gf_cmd_log_init (cmd_log_filename); @@ -1301,6 +1377,9 @@ init (xlator_t *this) INIT_LIST_HEAD (&conf->peers); INIT_LIST_HEAD (&conf->volumes); + INIT_LIST_HEAD (&conf->snapshots); + INIT_LIST_HEAD (&conf->missed_snaps_list); + pthread_mutex_init (&conf->mutex, NULL); conf->rpc = rpc; conf->uds_rpc = uds_rpc; @@ -1342,7 +1421,7 @@ init (xlator_t *this) } this->private = conf; - glusterd_vol_lock_init (); + glusterd_mgmt_v3_lock_init (); glusterd_txn_opinfo_dict_init (); (void) glusterd_nodesvc_set_online_status ("glustershd", _gf_false); @@ -1435,7 +1514,7 @@ fini (xlator_t *this) if (conf->handle) gf_store_handle_destroy (conf->handle); glusterd_sm_tr_log_delete (&conf->op_sm_log); - glusterd_vol_lock_fini (); + glusterd_mgmt_v3_lock_fini (); glusterd_txn_opinfo_dict_fini (); GF_FREE (conf); @@ -1553,5 +1632,9 @@ struct volume_options options[] = { .type = GF_OPTION_TYPE_INT, .description = "Sets the base port for portmap query" }, + { .key = {"snap-brick-path"}, + .type = GF_OPTION_TYPE_STR, + .description = "directory where the bricks for the snapshots will be created" + }, { .key = {NULL} }, }; diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 0694f7386ae..7568a2e9ad4 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -52,6 +52,9 @@ "S56glusterd-geo-rep-create-post.sh" +#define GLUSTERD_SNAPS_MAX_HARD_LIMIT 256 +#define GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT 90 +#define GLUSTERD_SNAPS_MAX_SOFT_LIMIT_PERCENT 100 #define GLUSTERD_SERVER_QUORUM "server" #define FMTSTR_CHECK_VOL_EXISTS "Volume %s does not exist" @@ -72,6 +75,9 @@ struct glusterd_volinfo_; typedef struct glusterd_volinfo_ glusterd_volinfo_t; +struct glusterd_snap_; +typedef struct glusterd_snap_ glusterd_snap_t; + typedef enum glusterd_op_ { GD_OP_NONE = 0, GD_OP_CREATE_VOLUME, @@ -101,6 +107,7 @@ typedef enum glusterd_op_ { GD_OP_COPY_FILE, GD_OP_SYS_EXEC, GD_OP_GSYNC_CREATE, + GD_OP_SNAP, GD_OP_MAX, } glusterd_op_t; @@ -136,13 +143,15 @@ typedef struct { nodesrv_t *quotad; struct pmap_registry *pmap; struct list_head volumes; + struct list_head snapshots; /*List of snap volumes */ pthread_mutex_t xprt_lock; struct list_head xprt_list; gf_store_handle_t *handle; gf_timer_t *timer; glusterd_sm_tr_log_t op_sm_log; struct rpc_clnt_program *gfs_mgmt; - dict_t *vol_lock; /* Dict for saving vol locks */ + dict_t *mgmt_v3_lock; /* Dict for saving + * mgmt_v3 locks */ dict_t *glusterd_txn_opinfo; /* Dict for saving * transaction opinfos */ uuid_t global_txn_id; /* To be used in @@ -150,20 +159,25 @@ typedef struct { * cluster with no * transaction ids */ - struct list_head mount_specs; - gf_boolean_t valgrind; - pthread_t brick_thread; - void *hooks_priv; + struct list_head mount_specs; + gf_boolean_t valgrind; + pthread_t brick_thread; + void *hooks_priv; /* need for proper handshake_t */ - int op_version; /* Starts with 1 for 3.3.0 */ - xlator_t *xl; /* Should be set to 'THIS' before creating thread */ - gf_boolean_t pending_quorum_action; - dict_t *opts; - synclock_t big_lock; - gf_boolean_t restart_done; - rpcsvc_t *uds_rpc; /* RPCSVC for the unix domain socket */ - uint32_t base_port; + int op_version; /* Starts with 1 for 3.3.0 */ + xlator_t *xl; /* Should be set to 'THIS' before creating thread */ + gf_boolean_t pending_quorum_action; + dict_t *opts; + synclock_t big_lock; + gf_boolean_t restart_done; + rpcsvc_t *uds_rpc; /* RPCSVC for the unix domain socket */ + uint32_t base_port; + uint64_t snap_max_hard_limit; + uint64_t snap_max_soft_limit; + char *snap_bricks_directory; + gf_store_handle_t *missed_snaps_list_shandle; + struct list_head missed_snaps_list; } glusterd_conf_t; @@ -175,6 +189,7 @@ typedef enum gf_brick_status { struct glusterd_brickinfo { char hostname[1024]; char path[PATH_MAX]; + char device_path[PATH_MAX]; char brick_id[1024];/*Client xlator name, AFR changelog name*/ struct list_head brick_list; uuid_t uuid; @@ -188,6 +203,7 @@ struct glusterd_brickinfo { int decommissioned; char vg[PATH_MAX]; /* FIXME: Use max size for length of vg */ int caps; /* Capability */ + int32_t snap_status; }; typedef struct glusterd_brickinfo glusterd_brickinfo_t; @@ -278,11 +294,36 @@ struct glusterd_replace_brick_ { typedef struct glusterd_replace_brick_ glusterd_replace_brick_t; struct glusterd_volinfo_ { + gf_lock_t lock; char volname[GLUSTERD_MAX_VOLUME_NAME]; + gf_boolean_t is_snap_volume; + glusterd_snap_t *snapshot; + gf_boolean_t is_volume_restored; + char parent_volname[GLUSTERD_MAX_VOLUME_NAME]; + /* In case of a snap volume + i.e (is_snap_volume == TRUE) this + field will contain the name of + the volume which is snapped. In + case of a non-snap volume, this + field will be initialized as N/A */ int type; int brick_count; + uint64_t snap_count; + uint64_t snap_max_hard_limit; struct list_head vol_list; + /* In case of a snap volume + i.e (is_snap_volume == TRUE) this + is linked to glusterd_snap_t->volumes. + In case of a non-snap volume, this is + linked to glusterd_conf_t->volumes */ + struct list_head snapvol_list; + /* This is a current pointer for + glusterd_volinfo_t->snap_volumes */ struct list_head bricks; + struct list_head snap_volumes; + /* TODO : Need to remove this, as this + * is already part of snapshot object. + */ glusterd_volume_status status; int sub_count; /* backward compatibility */ int stripe_count; @@ -291,11 +332,11 @@ struct glusterd_volinfo_ { distribute volume */ int dist_leaf_count; /* Number of bricks in one distribute subvolume */ - int port; - gf_store_handle_t *shandle; - gf_store_handle_t *rb_shandle; - gf_store_handle_t *node_state_shandle; - gf_store_handle_t *quota_conf_shandle; + int port; + gf_store_handle_t *shandle; + gf_store_handle_t *rb_shandle; + gf_store_handle_t *node_state_shandle; + gf_store_handle_t *quota_conf_shandle; /* Defrag/rebalance related */ glusterd_rebalance_t rebal; @@ -303,12 +344,12 @@ struct glusterd_volinfo_ { /* Replace brick status */ glusterd_replace_brick_t rep_brick; - int version; - uint32_t quota_conf_version; - uint32_t cksum; - uint32_t quota_conf_cksum; - gf_transport_type transport_type; - gf_transport_type nfs_transport_type; + int version; + uint32_t quota_conf_version; + uint32_t cksum; + uint32_t quota_conf_cksum; + gf_transport_type transport_type; + gf_transport_type nfs_transport_type; dict_t *dict; @@ -330,6 +371,41 @@ struct glusterd_volinfo_ { int refcnt; }; +typedef enum gd_snap_status_ { + GD_SNAP_STATUS_NONE, + GD_SNAP_STATUS_INIT, + GD_SNAP_STATUS_IN_USE, + GD_SNAP_STATUS_DECOMMISSION, + GD_SNAP_STATUS_RESTORED, +} gd_snap_status_t; + +struct glusterd_snap_ { + gf_lock_t lock; + struct list_head volumes; + struct list_head snap_list; + char snapname[GLUSTERD_MAX_SNAP_NAME]; + uuid_t snap_id; + char *description; + time_t time_stamp; + gf_boolean_t snap_restored; + gd_snap_status_t snap_status; + gf_store_handle_t *shandle; +}; + +typedef struct glusterd_snap_op_ { + int32_t brick_num; + char *brick_path; + int32_t op; + int32_t status; + struct list_head snap_ops_list; +} glusterd_snap_op_t; + +typedef struct glusterd_missed_snap_ { + char *node_snap_info; + struct list_head missed_snaps; + struct list_head snap_ops; +} glusterd_missed_snap_info; + typedef enum gd_node_type_ { GD_NODE_NONE, GD_NODE_BRICK, @@ -339,6 +415,12 @@ typedef enum gd_node_type_ { GD_NODE_QUOTAD, } gd_node_type; +typedef enum missed_snap_stat { + GD_MISSED_SNAP_NONE, + GD_MISSED_SNAP_PENDING, + GD_MISSED_SNAP_DONE, +} missed_snap_stat; + typedef struct glusterd_pending_node_ { struct list_head list; void *node; @@ -371,12 +453,19 @@ enum glusterd_vol_comp_status_ { #define GLUSTERD_VOLUME_DIR_PREFIX "vols" #define GLUSTERD_PEER_DIR_PREFIX "peers" #define GLUSTERD_VOLUME_INFO_FILE "info" +#define GLUSTERD_SNAP_INFO_FILE "info" #define GLUSTERD_VOLUME_RBSTATE_FILE "rbstate" #define GLUSTERD_BRICK_INFO_DIR "bricks" #define GLUSTERD_CKSUM_FILE "cksum" #define GLUSTERD_VOL_QUOTA_CKSUM_FILE "quota.cksum" #define GLUSTERD_TRASH "trash" #define GLUSTERD_NODE_STATE_FILE "node_state.info" +#define GLUSTERD_MISSED_SNAPS_LIST_FILE "missed_snaps_list" +#define GLUSTERD_VOL_SNAP_DIR_PREFIX "snaps" + +#define GLUSTERD_DEFAULT_SNAPS_BRICK_DIR "/gluster/snaps" +#define GLUSTERD_VAR_RUN_DIR "/var/run" +#define GLUSTERD_RUN_DIR "/run" /* definitions related to replace brick */ #define RB_CLIENT_MOUNTPOINT "rb_mount" @@ -389,14 +478,29 @@ enum glusterd_vol_comp_status_ { typedef ssize_t (*gd_serialize_t) (struct iovec outmsg, void *args); -#define GLUSTERD_GET_VOLUME_DIR(path, volinfo, priv) \ - snprintf (path, PATH_MAX, "%s/vols/%s", priv->workdir,\ - volinfo->volname); +#define GLUSTERD_GET_VOLUME_DIR(path, volinfo, priv) \ + if (volinfo->is_snap_volume) { \ + snprintf (path, PATH_MAX, "%s/snaps/%s/%s", priv->workdir, \ + volinfo->snapshot->snapname, volinfo->volname); \ + } else { \ + snprintf (path, PATH_MAX, "%s/vols/%s", priv->workdir, \ + volinfo->volname); \ + } -#define GLUSTERD_GET_BRICK_DIR(path, volinfo, priv) \ - snprintf (path, PATH_MAX, "%s/%s/%s/%s", priv->workdir,\ - GLUSTERD_VOLUME_DIR_PREFIX, volinfo->volname, \ - GLUSTERD_BRICK_INFO_DIR); +#define GLUSTERD_GET_SNAP_DIR(path, snap, priv) \ + snprintf (path, PATH_MAX, "%s/snaps/%s", priv->workdir, \ + snap->snapname); + +#define GLUSTERD_GET_BRICK_DIR(path, volinfo, priv) \ + if (volinfo->is_snap_volume) { \ + snprintf (path, PATH_MAX, "%s/snaps/%s/%s/%s", priv->workdir, \ + volinfo->snapshot->snapname, volinfo->volname, \ + GLUSTERD_BRICK_INFO_DIR); \ + } else { \ + snprintf (path, PATH_MAX, "%s/%s/%s/%s", priv->workdir, \ + GLUSTERD_VOLUME_DIR_PREFIX, volinfo->volname, \ + GLUSTERD_BRICK_INFO_DIR); \ + } #define GLUSTERD_GET_NFS_DIR(path, priv) \ snprintf (path, PATH_MAX, "%s/nfs", priv->workdir); @@ -423,7 +527,7 @@ typedef ssize_t (*gd_serialize_t) (struct iovec outmsg, void *args); GLUSTERD_GET_VOLUME_DIR (volpath, volinfo, priv); \ GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, exp_path); \ snprintf (pidfile, PATH_MAX, "%s/run/%s-%s.pid", \ - volpath, brickinfo->hostname, exp_path); \ + volpath, brickinfo->hostname, exp_path); \ } while (0) #define GLUSTERD_GET_NFS_PIDFILE(pidfile,nfspath) { \ @@ -472,6 +576,20 @@ typedef ssize_t (*gd_serialize_t) (struct iovec outmsg, void *args); DEFAULT_VAR_RUN_DIRECTORY"/%s.pid", volname); \ } +#define GLUSTERD_GET_UUID_NOHYPHEN(ret_string, uuid) do { \ + char *snap_volname_ptr = ret_string; \ + char *snap_volid_ptr = uuid_utoa(uuid); \ + while (*snap_volid_ptr) { \ + if (*snap_volid_ptr == '-') { \ + snap_volid_ptr++; \ + } else { \ + (*snap_volname_ptr++) = \ + (*snap_volid_ptr++); \ + } \ + } \ + *snap_volname_ptr = '\0'; \ + } while (0) + int glusterd_uuid_init(); int glusterd_uuid_generate_save (); @@ -533,12 +651,12 @@ int glusterd_op_unlock_send_resp (rpcsvc_request_t *req, int32_t status); int -glusterd_op_volume_lock_send_resp (rpcsvc_request_t *req, - uuid_t *txn_id, int32_t status); +glusterd_op_mgmt_v3_lock_send_resp (rpcsvc_request_t *req, + uuid_t *txn_id, int32_t status); int -glusterd_op_volume_unlock_send_resp (rpcsvc_request_t *req, - uuid_t *txn_id, int32_t status); +glusterd_op_mgmt_v3_unlock_send_resp (rpcsvc_request_t *req, + uuid_t *txn_id, int32_t status); int glusterd_op_stage_send_resp (rpcsvc_request_t *req, @@ -756,6 +874,9 @@ int glusterd_handle_cli_heal_volume (rpcsvc_request_t *req); int glusterd_handle_cli_list_volume (rpcsvc_request_t *req); +int +glusterd_handle_snapshot (rpcsvc_request_t *req); + /* op-sm functions */ int glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr); int glusterd_op_heal_volume (dict_t *dict, char **op_errstr); @@ -808,6 +929,9 @@ int glusterd_op_statedump_volume_args_get (dict_t *dict, char **volname, int glusterd_op_gsync_args_get (dict_t *dict, char **op_errstr, char **master, char **slave, char **host_uuid); + +int glusterd_stop_volume (glusterd_volinfo_t *volinfo); + /* Synctask part */ int32_t glusterd_op_begin_synctask (rpcsvc_request_t *req, glusterd_op_t op, void *dict); @@ -823,4 +947,50 @@ glusterd_txn_opinfo_dict_fini (); void glusterd_txn_opinfo_init (); +/* snapshot */ +glusterd_snap_t* +glusterd_new_snap_object(); + +int32_t +glusterd_list_add_snapvol (glusterd_volinfo_t *origin_vol, + glusterd_volinfo_t *snap_vol); + +glusterd_snap_t* +glusterd_remove_snap_by_id (uuid_t snap_id); + +glusterd_snap_t* +glusterd_remove_snap_by_name (char *snap_name); + +glusterd_snap_t* +glusterd_find_snap_by_name (char *snap_name); + +glusterd_snap_t* +glusterd_find_snap_by_id (uuid_t snap_id); + +int +glusterd_snapshot_prevalidate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict); +int +glusterd_snapshot_brickop (dict_t *dict, char **op_errstr, dict_t *rsp_dict); +int +glusterd_snapshot (dict_t *dict, char **op_errstr, dict_t *rsp_dict); +int +glusterd_snapshot_postvalidate (dict_t *dict, int32_t op_ret, char **op_errstr, + dict_t *rsp_dict); +char * +glusterd_build_snap_device_path (char *device, char *snapname); +int32_t +glusterd_snap_remove (dict_t *rsp_dict, glusterd_snap_t *snap, + gf_boolean_t remove_lvm, gf_boolean_t force); +int32_t +glusterd_snapshot_cleanup (dict_t *dict, char **op_errstr, dict_t *rsp_dict); + +int32_t +glusterd_add_missed_snaps_to_list (dict_t *dict, int32_t missed_snap_count); + +int32_t +glusterd_store_missed_snaps_list (char *missed_info, int32_t brick_num, + char *brick_path, int32_t snap_op, + int32_t snap_status); + #endif diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c index c11011abf9f..76c0036e08b 100644 --- a/xlators/protocol/server/src/server-helpers.c +++ b/xlators/protocol/server/src/server-helpers.c @@ -1092,3 +1092,327 @@ out: return ret; } + +int32_t +gf_barrier_transmit (server_conf_t *conf, gf_barrier_payload_t *payload) +{ + gf_barrier_t *barrier = NULL; + int32_t ret = -1; + client_t *client = NULL; + gf_boolean_t lk_heal = _gf_false; + call_frame_t *frame = NULL; + server_state_t *state = NULL; + + GF_VALIDATE_OR_GOTO ("barrier", conf, out); + GF_VALIDATE_OR_GOTO ("barrier", conf->barrier, out); + GF_VALIDATE_OR_GOTO ("barrier", payload, out); + + barrier = conf->barrier; + + frame = payload->frame; + if (frame) { + state = CALL_STATE (frame); + frame->local = NULL; + client = frame->root->client; + } + /* currently lk fops are not barrier'ed. This is reflecting code in + * server_submit_reply */ + if (client) + lk_heal = ((server_conf_t *) client->this->private)->lk_heal; + + ret = rpcsvc_submit_generic (payload->req, &payload->rsp, 1, + payload->payload, payload->payload_count, + payload->iobref); + iobuf_unref (payload->iob); + if (ret == -1) { + gf_log_callingfn ("", GF_LOG_ERROR, "Reply submission failed"); + if (frame && client && !lk_heal) { + server_connection_cleanup (frame->this, client, + INTERNAL_LOCKS | POSIX_LOCKS); + } else { + /* TODO: Failure of open(dir), create, inodelk, entrylk + or lk fops send failure must be handled specially. */ + } + goto ret; + } + + ret = 0; +ret: + if (state) { + free_state (state); + } + + if (frame) { + gf_client_unref (client); + STACK_DESTROY (frame->root); + } + + if (payload->free_iobref) { + iobref_unref (payload->iobref); + } +out: + return ret; +} + +gf_barrier_payload_t * +gf_barrier_dequeue (gf_barrier_t *barrier) +{ + gf_barrier_payload_t *payload = NULL; + + if (!barrier || list_empty (&barrier->queue)) + return NULL; + + payload = list_entry (barrier->queue.next, + gf_barrier_payload_t, list); + if (payload) { + list_del_init (&payload->list); + barrier->cur_size--; + } + + return payload; +} + + +void* +gf_barrier_dequeue_start (void *data) +{ + server_conf_t *conf = NULL; + gf_barrier_t *barrier = NULL; + gf_barrier_payload_t *payload = NULL; + + conf = (server_conf_t *)data; + if (!conf || !conf->barrier) + return NULL; + barrier = conf->barrier; + + LOCK (&barrier->lock); + { + while (barrier->cur_size) { + payload = gf_barrier_dequeue (barrier); + if (payload) { + if (gf_barrier_transmit (conf, payload)) { + gf_log ("server", GF_LOG_WARNING, + "Failed to transmit"); + } + GF_FREE (payload); + } + } + } + UNLOCK (&barrier->lock); + return NULL; +} + +void +gf_barrier_timeout (void *data) +{ + server_conf_t *conf = NULL; + gf_barrier_t *barrier = NULL; + gf_boolean_t need_dequeue = _gf_false; + + conf = (server_conf_t *)data; + if (!conf || !conf->barrier) + goto out; + barrier = conf->barrier; + + gf_log ("", GF_LOG_INFO, "barrier timed-out"); + LOCK (&barrier->lock); + { + need_dequeue = barrier->on; + barrier->on = _gf_false; + } + UNLOCK (&barrier->lock); + + if (need_dequeue == _gf_true) + gf_barrier_dequeue_start (data); +out: + return; +} + + +int32_t +gf_barrier_start (xlator_t *this) +{ + server_conf_t *conf = NULL; + gf_barrier_t *barrier = NULL; + int32_t ret = -1; + struct timespec time = {0,}; + + conf = this->private; + + GF_VALIDATE_OR_GOTO ("server", this, out); + GF_VALIDATE_OR_GOTO (this->name, conf, out); + GF_VALIDATE_OR_GOTO (this->name, conf->barrier, out); + + barrier = conf->barrier; + + gf_log (this->name, GF_LOG_INFO, "barrier start called"); + LOCK (&barrier->lock); + { + /* if barrier is on, reset timer */ + if (barrier->on == _gf_true) { + ret = gf_timer_call_cancel (this->ctx, barrier->timer); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "unset timer, failing barrier start"); + goto unlock; + } + } + + barrier->on = _gf_true; + time.tv_sec = barrier->time_out; + time.tv_nsec = 0; + + barrier->timer = gf_timer_call_after (this->ctx, time, + gf_barrier_timeout, + (void *)conf); + if (!barrier->timer) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "timer, failing barrier start"); + barrier->on = _gf_false; + } + } +unlock: + UNLOCK (&barrier->lock); + + ret = 0; +out: + return ret; +} + +int32_t +gf_barrier_stop (xlator_t *this) +{ + server_conf_t *conf = NULL; + gf_barrier_t *barrier = NULL; + int32_t ret = -1; + gf_boolean_t need_dequeue = _gf_false; + + conf = this->private; + + GF_VALIDATE_OR_GOTO ("server", this, out); + GF_VALIDATE_OR_GOTO (this->name, conf, out); + GF_VALIDATE_OR_GOTO (this->name, conf->barrier, out); + + barrier = conf->barrier; + + gf_log (this->name, GF_LOG_INFO, "barrier stop called"); + LOCK (&barrier->lock); + { + need_dequeue = barrier->on; + barrier->on = _gf_false; + } + UNLOCK (&barrier->lock); + + if (need_dequeue == _gf_true) { + gf_timer_call_cancel (this->ctx, barrier->timer); + ret = gf_thread_create (&conf->barrier_th, NULL, + gf_barrier_dequeue_start, + conf); + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "Failed to start un-barriering"); + goto out; + } + } + ret = 0; +out: + return ret; +} + +int32_t +gf_barrier_fops_configure (xlator_t *this, gf_barrier_t *barrier, char *str) +{ + int32_t ret = -1; + char *dup_str = NULL; + char *str_tok = NULL; + char *save_ptr = NULL; + uint64_t fops = 0; + + /* by defaul fsync & flush needs to be barriered */ + + fops |= 1 << GFS3_OP_FSYNC; + fops |= 1 << GFS3_OP_FLUSH; + + if (!str) + goto done; + + dup_str = gf_strdup (str); + if (!dup_str) + goto done; + + str_tok = strtok_r (dup_str, ",", &save_ptr); + if (!str_tok) + goto done; + + fops = 0; + while (str_tok) { + if (!strcmp(str_tok, "writev")) { + fops |= ((uint64_t)1 << GFS3_OP_WRITE); + } else if (!strcmp(str_tok, "fsync")) { + fops |= ((uint64_t)1 << GFS3_OP_FSYNC); + } else if (!strcmp(str_tok, "read")) { + fops |= ((uint64_t)1 << GFS3_OP_READ); + } else if (!strcmp(str_tok, "rename")) { + fops |= ((uint64_t)1 << GFS3_OP_RENAME); + } else if (!strcmp(str_tok, "flush")) { + fops |= ((uint64_t)1 << GFS3_OP_FLUSH); + } else if (!strcmp(str_tok, "ftruncate")) { + fops |= ((uint64_t)1 << GFS3_OP_FTRUNCATE); + } else if (!strcmp(str_tok, "fallocate")) { + fops |= ((uint64_t)1 << GFS3_OP_FALLOCATE); + } else if (!strcmp(str_tok, "rmdir")) { + fops |= ((uint64_t)1 << GFS3_OP_RMDIR); + } else { + gf_log ("barrier", GF_LOG_ERROR, + "Invalid barrier fop %s", str_tok); + } + + str_tok = strtok_r (NULL, ",", &save_ptr); + } +done: + LOCK (&barrier->lock); + { + barrier->fops = fops; + } + UNLOCK (&barrier->lock); + ret = 0; + + GF_FREE (dup_str); + return ret; +} + +void +gf_barrier_enqueue (gf_barrier_t *barrier, gf_barrier_payload_t *payload) +{ + list_add_tail (&payload->list, &barrier->queue); + barrier->cur_size++; +} + +gf_barrier_payload_t * +gf_barrier_payload (rpcsvc_request_t *req, struct iovec *rsp, + call_frame_t *frame, struct iovec *payload_orig, + int payloadcount, struct iobref *iobref, + struct iobuf *iob, gf_boolean_t free_iobref) +{ + gf_barrier_payload_t *payload = NULL; + + if (!rsp) + return NULL; + + payload = GF_CALLOC (1, sizeof (*payload),1); + if (!payload) + return NULL; + + INIT_LIST_HEAD (&payload->list); + + payload->req = req; + memcpy (&payload->rsp, rsp, sizeof (struct iovec)); + payload->frame = frame; + payload->payload = payload_orig; + payload->payload_count = payloadcount; + payload->iobref = iobref; + payload->iob = iob; + payload->free_iobref = free_iobref; + + return payload; +} diff --git a/xlators/protocol/server/src/server-helpers.h b/xlators/protocol/server/src/server-helpers.h index 3c257b3bcef..486048b8ad3 100644 --- a/xlators/protocol/server/src/server-helpers.h +++ b/xlators/protocol/server/src/server-helpers.h @@ -29,6 +29,10 @@ #define IS_NOT_ROOT(pathlen) ((pathlen > 2)? 1 : 0) +#define is_fop_barriered(fops, procnum) (fops & ((uint64_t)1 << procnum)) + +#define barrier_add_to_queue(barrier) (barrier->on || barrier->cur_size) + void free_state (server_state_t *state); void server_loc_wipe (loc_t *loc); @@ -58,4 +62,15 @@ int auth_set_username_passwd (dict_t *input_params, dict_t *config_params, server_ctx_t *server_ctx_get (client_t *client, xlator_t *xlator); +int32_t gf_barrier_start (xlator_t *this); +int32_t gf_barrier_stop (xlator_t *this); +int32_t gf_barrier_fops_configure (xlator_t *this, gf_barrier_t *barrier, + char *str); +void gf_barrier_enqueue (gf_barrier_t *barrier, gf_barrier_payload_t *stub); +gf_barrier_payload_t * +gf_barrier_payload (rpcsvc_request_t *req, struct iovec *rsp, + call_frame_t *frame, struct iovec *payload, + int payloadcount, struct iobref *iobref, + struct iobuf *iob, gf_boolean_t free_iobref); + #endif /* !_SERVER_HELPERS_H */ diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c index 378aa393a50..7855170586a 100644 --- a/xlators/protocol/server/src/server.c +++ b/xlators/protocol/server/src/server.c @@ -130,8 +130,6 @@ ret: return iob; } - - int server_submit_reply (call_frame_t *frame, rpcsvc_request_t *req, void *arg, struct iovec *payload, int payloadcount, @@ -144,6 +142,10 @@ server_submit_reply (call_frame_t *frame, rpcsvc_request_t *req, void *arg, char new_iobref = 0; client_t *client = NULL; gf_boolean_t lk_heal = _gf_false; + server_conf_t *conf = NULL; + gf_barrier_t *barrier = NULL; + gf_barrier_payload_t *stub = NULL; + gf_boolean_t barriered = _gf_false; GF_VALIDATE_OR_GOTO ("server", req, ret); @@ -151,6 +153,7 @@ server_submit_reply (call_frame_t *frame, rpcsvc_request_t *req, void *arg, state = CALL_STATE (frame); frame->local = NULL; client = frame->root->client; + conf = (server_conf_t *) client->this->private; } if (client) @@ -173,6 +176,32 @@ server_submit_reply (call_frame_t *frame, rpcsvc_request_t *req, void *arg, iobref_add (iobref, iob); + if (conf) + barrier = conf->barrier; + if (barrier) { + /* todo: write's with fd flags set to O_SYNC and O_DIRECT */ + LOCK (&barrier->lock); + { + if (is_fop_barriered (barrier->fops, req->procnum) && + (barrier_add_to_queue (barrier))) { + stub = gf_barrier_payload (req, &rsp, frame, + payload, + payloadcount, iobref, + iob, new_iobref); + if (stub) { + gf_barrier_enqueue (barrier, stub); + barriered = _gf_true; + } else { + gf_log ("", GF_LOG_ERROR, "Failed to " + " barrier fop %"PRIu64, + ((uint64_t)1 << req->procnum)); + } + } + } + UNLOCK (&barrier->lock); + if (barriered == _gf_true) + goto out; + } /* Then, submit the message for transmission. */ ret = rpcsvc_submit_generic (req, &rsp, 1, payload, payloadcount, iobref); @@ -214,7 +243,7 @@ ret: if (new_iobref) { iobref_unref (iobref); } - +out: return ret; } @@ -798,6 +827,8 @@ init (xlator_t *this) server_conf_t *conf = NULL; rpcsvc_listener_t *listener = NULL; char *statedump_path = NULL; + gf_barrier_t *barrier = NULL; + char *str = NULL; GF_VALIDATE_OR_GOTO ("init", this, out); if (this->children == NULL) { @@ -946,6 +977,37 @@ init (xlator_t *this) } } #endif + /* barrier related */ + barrier = GF_CALLOC (1, sizeof (*barrier),1); + if (!barrier) { + gf_log (this->name, GF_LOG_WARNING, + "WARNING: Failed to allocate barrier"); + ret = -1; + goto out; + } + + LOCK_INIT (&barrier->lock); + INIT_LIST_HEAD (&barrier->queue); + barrier->on = _gf_false; + + GF_OPTION_INIT ("barrier-queue-length", barrier->max_size, + int64, out); + GF_OPTION_INIT ("barrier-timeout", barrier->time_out, + uint64, out); + + ret = dict_get_str (this->options, "barrier-fops", &str); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "setting barrier fops to default value"); + } + ret = gf_barrier_fops_configure (this, barrier, str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "invalid barrier fops specified"); + goto out; + } + + conf->barrier = barrier; this->private = conf; ret = 0; @@ -999,12 +1061,48 @@ int notify (xlator_t *this, int32_t event, void *data, ...) { int ret = 0; + int32_t val = 0; + dict_t *dict = NULL; + dict_t *output = NULL; + va_list ap; + + dict = data; + va_start (ap, data); + output = va_arg (ap, dict_t*); + va_end (ap); + switch (event) { + case GF_EVENT_VOLUME_BARRIER_OP: + ret = dict_get_int32 (dict, "barrier", &val); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Wrong BARRIER event"); + goto out; + } + /* !val un-barrier, if val, barrier */ + if (val) { + ret = gf_barrier_start (this); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Barrier start failed"); + } else { + ret = gf_barrier_stop (this); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Barrier stop failed"); + } + ret = dict_set_int32 (output, "barrier-status", ret); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to set barrier-status in dict"); + break; + + /* todo: call default_notify to make other xlators handle it.*/ default: default_notify (this, event, data); break; } - +out: return ret; } @@ -1128,6 +1226,23 @@ struct volume_options options[] = { "overrides the auth.allow option. By default, all" " connections are allowed." }, - + {.key = {"barrier-timeout"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "60", + .min = 0, + .max = 360, + .description = "Barrier timeout in seconds", + }, + {.key = {"barrier-queue-length"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "4096", + .min = 0, + .max = 16384, + .description = "Barrier queue length", + }, + {.key = {"barrier-fops"}, + .type = GF_OPTION_TYPE_STR, + .description = "Allow a comma seperated fop lists", + }, { .key = {NULL} }, }; diff --git a/xlators/protocol/server/src/server.h b/xlators/protocol/server/src/server.h index 43e84921c8a..782327d775b 100644 --- a/xlators/protocol/server/src/server.h +++ b/xlators/protocol/server/src/server.h @@ -28,6 +28,34 @@ #define GF_MAX_SOCKET_WINDOW_SIZE (1 * GF_UNIT_MB) #define GF_MIN_SOCKET_WINDOW_SIZE (0) +struct _gf_barrier_payload { + rpcsvc_request_t *req; + struct iovec rsp; + call_frame_t *frame; + struct iovec *payload; + struct iobref *iobref; + struct iobuf *iob; + int payload_count; + gf_boolean_t free_iobref; + struct list_head list; +}; + +typedef struct _gf_barrier_payload gf_barrier_payload_t; + +struct _gf_barrier { + gf_lock_t lock; + gf_boolean_t on; + gf_boolean_t force; + size_t cur_size; + int64_t max_size; + uint64_t fops; + gf_timer_t *timer; + uint64_t time_out; + struct list_head queue; +}; + +typedef struct _gf_barrier gf_barrier_t; + typedef enum { INTERNAL_LOCKS = 1, POSIX_LOCKS = 2, @@ -56,7 +84,9 @@ struct server_conf { struct timespec grace_ts; dict_t *auth_modules; pthread_mutex_t mutex; + gf_barrier_t *barrier; struct list_head xprt_list; + pthread_t barrier_th; }; typedef struct server_conf server_conf_t; |