From 97ce783de326b51fcba65737f07db2c314d1e218 Mon Sep 17 00:00:00 2001 From: Avra Sengupta Date: Thu, 6 Feb 2014 13:03:58 +0530 Subject: glusterd: Volume locks and transaction specific opinfos With this patch we are replacing the existing cluster-wide lock taken on glusterds across the cluster, with volume locks which are also taken on glusterds across the cluster, but are volume specific. So with the volume locks we are able to perform more than one gluster operation at the same time, as long as the operations are being performed on different volumes. We maintain a global list of volume-locks (using a dict for a list) where the key is the volume name, and which saves the uuid of the originator glusterd. These locks are held and released per volume transaction. In order to acheive multiple gluster operations occuring at the same time, we also separate opinfos in the op-state-machine, as a part of this patch. To do so, we generate a unique transaction-id (uuid) per gluster transaction. An opinfo is then associated with this transaction id, which is used throughout the transaction. We maintain a run-time global list(using a dict) of transaction-ids, and their respective opinfos to achieve this. Upstream Feature Page: http://www.gluster.org/community/documentation/index.php/Features/glusterd-volume-locks Change-Id: Iaad505a854bac8de8f83beec0357eb6cde3f7ea8 BUG: 1011470 Signed-off-by: Avra Sengupta Reviewed-on: http://review.gluster.org/5994 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- libglusterfs/src/globals.h | 1 + libglusterfs/src/mem-types.h | 5 +- rpc/rpc-lib/src/protocol-common.h | 10 + rpc/xdr/src/glusterd1-xdr.c | 78 +++ rpc/xdr/src/glusterd1-xdr.h | 53 ++ rpc/xdr/src/glusterd1-xdr.x | 29 + tests/basic/volume-locks.t | 106 ++++ tests/include.rc | 1 + xlators/mgmt/glusterd/src/Makefile.am | 5 +- xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 4 +- xlators/mgmt/glusterd/src/glusterd-handler.c | 491 ++++++++++++++-- xlators/mgmt/glusterd/src/glusterd-handshake.c | 11 + xlators/mgmt/glusterd/src/glusterd-locks.c | 177 ++++++ xlators/mgmt/glusterd/src/glusterd-locks.h | 38 ++ xlators/mgmt/glusterd/src/glusterd-op-sm.c | 633 +++++++++++++++++---- xlators/mgmt/glusterd/src/glusterd-op-sm.h | 21 +- xlators/mgmt/glusterd/src/glusterd-quota.c | 6 +- xlators/mgmt/glusterd/src/glusterd-rebalance.c | 2 +- xlators/mgmt/glusterd/src/glusterd-replace-brick.c | 16 +- xlators/mgmt/glusterd/src/glusterd-rpc-ops.c | 321 ++++++++++- xlators/mgmt/glusterd/src/glusterd-sm.h | 1 + xlators/mgmt/glusterd/src/glusterd-syncop.c | 477 ++++++++++++++-- xlators/mgmt/glusterd/src/glusterd-utils.c | 63 +- xlators/mgmt/glusterd/src/glusterd-utils.h | 5 +- xlators/mgmt/glusterd/src/glusterd.c | 7 + xlators/mgmt/glusterd/src/glusterd.h | 18 + 26 files changed, 2329 insertions(+), 250 deletions(-) create mode 100755 tests/basic/volume-locks.t create mode 100644 xlators/mgmt/glusterd/src/glusterd-locks.c create mode 100644 xlators/mgmt/glusterd/src/glusterd-locks.h diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h index 0de03925d..16ab96268 100644 --- a/libglusterfs/src/globals.h +++ b/libglusterfs/src/globals.h @@ -32,6 +32,7 @@ #define GD_OP_VERSION_MAX 4 /* MAX VERSION is the maximum count in VME table, should keep changing with introduction of newer versions */ +#define GD_OP_VERSION_4 4 /* Op-Version 4 */ #include "xlator.h" diff --git a/libglusterfs/src/mem-types.h b/libglusterfs/src/mem-types.h index 726d38eb6..26237fecb 100644 --- a/libglusterfs/src/mem-types.h +++ b/libglusterfs/src/mem-types.h @@ -119,6 +119,9 @@ enum gf_common_mem_types_ { gf_common_mt_syncopctx = 103, gf_common_mt_iobrefs = 104, gf_common_mt_gsync_status_t = 105, - gf_common_mt_end = 106 + gf_common_mt_uuid_t = 106, + gf_common_mt_vol_lock_obj_t = 107, + gf_common_mt_txn_opinfo_obj_t = 108, + gf_common_mt_end = 109 }; #endif diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h index 5876a500b..6d28ed90e 100644 --- a/rpc/rpc-lib/src/protocol-common.h +++ b/rpc/rpc-lib/src/protocol-common.h @@ -236,6 +236,13 @@ struct gf_gsync_detailed_status_ { char total_files_skipped[NAME_MAX]; }; +enum glusterd_mgmt_v3_procnum { + GLUSTERD_MGMT_V3_NULL, /* 0 */ + GLUSTERD_MGMT_V3_VOLUME_LOCK, + GLUSTERD_MGMT_V3_VOLUME_UNLOCK, + GLUSTERD_MGMT_V3_MAXVALUE, +}; + typedef struct gf_gsync_detailed_status_ gf_gsync_status_t; #define GLUSTER_HNDSK_PROGRAM 14398633 /* Completely random */ @@ -268,6 +275,9 @@ typedef struct gf_gsync_detailed_status_ gf_gsync_status_t; #define GD_BRICK_PROGRAM 4867634 /*Completely random*/ #define GD_BRICK_VERSION 2 +/* Third version */ +#define GD_MGMT_V3_VERSION 3 + /* OP-VERSION handshake */ #define GD_MGMT_HNDSK_PROGRAM 1239873 /* Completely random */ #define GD_MGMT_HNDSK_VERSION 1 diff --git a/rpc/xdr/src/glusterd1-xdr.c b/rpc/xdr/src/glusterd1-xdr.c index 213b48bc6..6c6514c90 100644 --- a/rpc/xdr/src/glusterd1-xdr.c +++ b/rpc/xdr/src/glusterd1-xdr.c @@ -491,3 +491,81 @@ xdr_gd1_mgmt_brick_op_rsp (XDR *xdrs, gd1_mgmt_brick_op_rsp *objp) return FALSE; return TRUE; } + +bool_t +xdr_gd1_mgmt_volume_lock_req (XDR *xdrs, gd1_mgmt_volume_lock_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_vector (xdrs, (char *)objp->uuid, 16, + sizeof (u_char), (xdrproc_t) xdr_u_char)) + return FALSE; + if (!xdr_vector (xdrs, (char *)objp->txn_id, 16, + sizeof (u_char), (xdrproc_t) xdr_u_char)) + return FALSE; + if (!xdr_int (xdrs, &objp->op)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gd1_mgmt_volume_lock_rsp (XDR *xdrs, gd1_mgmt_volume_lock_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_vector (xdrs, (char *)objp->uuid, 16, + sizeof (u_char), (xdrproc_t) xdr_u_char)) + return FALSE; + if (!xdr_vector (xdrs, (char *)objp->txn_id, 16, + sizeof (u_char), (xdrproc_t) xdr_u_char)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gd1_mgmt_volume_unlock_req (XDR *xdrs, gd1_mgmt_volume_unlock_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_vector (xdrs, (char *)objp->uuid, 16, + sizeof (u_char), (xdrproc_t) xdr_u_char)) + return FALSE; + if (!xdr_vector (xdrs, (char *)objp->txn_id, 16, + sizeof (u_char), (xdrproc_t) xdr_u_char)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gd1_mgmt_volume_unlock_rsp (XDR *xdrs, gd1_mgmt_volume_unlock_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_vector (xdrs, (char *)objp->uuid, 16, + sizeof (u_char), (xdrproc_t) xdr_u_char)) + return FALSE; + if (!xdr_vector (xdrs, (char *)objp->txn_id, 16, + sizeof (u_char), (xdrproc_t) xdr_u_char)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + return TRUE; +} diff --git a/rpc/xdr/src/glusterd1-xdr.h b/rpc/xdr/src/glusterd1-xdr.h index c35930cad..4115ff7a8 100644 --- a/rpc/xdr/src/glusterd1-xdr.h +++ b/rpc/xdr/src/glusterd1-xdr.h @@ -202,6 +202,51 @@ struct gd1_mgmt_brick_op_rsp { }; typedef struct gd1_mgmt_brick_op_rsp gd1_mgmt_brick_op_rsp; +struct gd1_mgmt_volume_lock_req { + u_char uuid[16]; + u_char txn_id[16]; + int op; + struct { + u_int dict_len; + char *dict_val; + } dict; +}; +typedef struct gd1_mgmt_volume_lock_req gd1_mgmt_volume_lock_req; + +struct gd1_mgmt_volume_lock_rsp { + u_char uuid[16]; + u_char txn_id[16]; + struct { + u_int dict_len; + char *dict_val; + } dict; + int op_ret; + int op_errno; +}; +typedef struct gd1_mgmt_volume_lock_rsp gd1_mgmt_volume_lock_rsp; + +struct gd1_mgmt_volume_unlock_req { + u_char uuid[16]; + u_char txn_id[16]; + struct { + u_int dict_len; + char *dict_val; + } dict; +}; +typedef struct gd1_mgmt_volume_unlock_req gd1_mgmt_volume_unlock_req; + +struct gd1_mgmt_volume_unlock_rsp { + u_char uuid[16]; + u_char txn_id[16]; + struct { + u_int dict_len; + char *dict_val; + } dict; + int op_ret; + int op_errno; +}; +typedef struct gd1_mgmt_volume_unlock_rsp gd1_mgmt_volume_unlock_rsp; + /* the xdr functions */ #if defined(__STDC__) || defined(__cplusplus) @@ -224,6 +269,10 @@ extern bool_t xdr_gd1_mgmt_friend_update (XDR *, gd1_mgmt_friend_update*); extern bool_t xdr_gd1_mgmt_friend_update_rsp (XDR *, gd1_mgmt_friend_update_rsp*); extern bool_t xdr_gd1_mgmt_brick_op_req (XDR *, gd1_mgmt_brick_op_req*); extern bool_t xdr_gd1_mgmt_brick_op_rsp (XDR *, gd1_mgmt_brick_op_rsp*); +extern bool_t xdr_gd1_mgmt_volume_lock_req (XDR *, gd1_mgmt_volume_lock_req*); +extern bool_t xdr_gd1_mgmt_volume_lock_rsp (XDR *, gd1_mgmt_volume_lock_rsp*); +extern bool_t xdr_gd1_mgmt_volume_unlock_req (XDR *, gd1_mgmt_volume_unlock_req*); +extern bool_t xdr_gd1_mgmt_volume_unlock_rsp (XDR *, gd1_mgmt_volume_unlock_rsp*); #else /* K&R C */ extern bool_t xdr_glusterd_volume_status (); @@ -245,6 +294,10 @@ extern bool_t xdr_gd1_mgmt_friend_update (); extern bool_t xdr_gd1_mgmt_friend_update_rsp (); extern bool_t xdr_gd1_mgmt_brick_op_req (); extern bool_t xdr_gd1_mgmt_brick_op_rsp (); +extern bool_t xdr_gd1_mgmt_volume_lock_req (); +extern bool_t xdr_gd1_mgmt_volume_lock_rsp (); +extern bool_t xdr_gd1_mgmt_volume_unlock_req (); +extern bool_t xdr_gd1_mgmt_volume_unlock_rsp (); #endif /* K&R C */ diff --git a/rpc/xdr/src/glusterd1-xdr.x b/rpc/xdr/src/glusterd1-xdr.x index fc1bb58b4..f29a9d214 100644 --- a/rpc/xdr/src/glusterd1-xdr.x +++ b/rpc/xdr/src/glusterd1-xdr.x @@ -125,3 +125,32 @@ struct gd1_mgmt_brick_op_rsp { opaque output<>; string op_errstr<>; } ; + +struct gd1_mgmt_volume_lock_req { + unsigned char uuid[16]; + unsigned char txn_id[16]; + int op; + opaque dict<>; +} ; + +struct gd1_mgmt_volume_lock_rsp { + unsigned char uuid[16]; + unsigned char txn_id[16]; + opaque dict<>; + int op_ret; + int op_errno; +} ; + +struct gd1_mgmt_volume_unlock_req { + unsigned char uuid[16]; + unsigned char txn_id[16]; + opaque dict<>; +} ; + +struct gd1_mgmt_volume_unlock_rsp { + unsigned char uuid[16]; + unsigned char txn_id[16]; + opaque dict<>; + int op_ret; + int op_errno; +} ; diff --git a/tests/basic/volume-locks.t b/tests/basic/volume-locks.t new file mode 100755 index 000000000..b9e94b7e1 --- /dev/null +++ b/tests/basic/volume-locks.t @@ -0,0 +1,106 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../cluster.rc + +function check_peers { + $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l +} + +function volume_count { + local cli=$1; + if [ $cli -eq '1' ] ; then + $CLI_1 volume info | grep 'Volume Name' | wc -l; + else + $CLI_2 volume info | grep 'Volume Name' | wc -l; + fi +} + +function volinfo_field() +{ + local vol=$1; + local field=$2; + + $CLI_1 volume info $vol | grep "^$field: " | sed 's/.*: //'; +} + +function two_diff_vols_create { + # Both volume creates should be successful + $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0 $H3:$B3/$V0 & + $CLI_2 volume create $V1 $H1:$B1/$V1 $H2:$B2/$V1 $H3:$B3/$V1 +} + +function two_diff_vols_start { + # Both volume starts should be successful + $CLI_1 volume start $V0 & + $CLI_2 volume start $V1 +} + +function two_diff_vols_stop_force { + # Force stop, so that if rebalance from the + # remove bricks is in progress, stop can + # still go ahead. Both volume stops should + # be successful + $CLI_1 volume stop $V0 force & + $CLI_2 volume stop $V1 force +} + +function same_vol_remove_brick { + + # Running two same vol commands at the same time can result in + # two success', two failures, or one success and one failure, all + # of which are valid. The only thing that shouldn't happen is a + # glusterd crash. + + local vol=$1 + local brick=$2 + $CLI_1 volume remove-brick $1 $2 start & + $CLI_2 volume remove-brick $1 $2 start +} + +cleanup; + +TEST launch_cluster 3; +TEST $CLI_1 peer probe $H2; +TEST $CLI_1 peer probe $H3; + +EXPECT_WITHIN 20 2 check_peers + +two_diff_vols_create +EXPECT 'Created' volinfo_field $V0 'Status'; +EXPECT 'Created' volinfo_field $V1 'Status'; + +two_diff_vols_start +EXPECT 'Started' volinfo_field $V0 'Status'; +EXPECT 'Started' volinfo_field $V1 'Status'; + +same_vol_remove_brick $V0 $H2:$B2/$V0 +# Checking glusterd crashed or not after same volume remove brick +# on both nodes. +EXPECT_WITHIN 20 2 check_peers + +same_vol_remove_brick $V1 $H2:$B2/$V1 +# Checking glusterd crashed or not after same volume remove brick +# on both nodes. +EXPECT_WITHIN 20 2 check_peers + +$CLI_1 volume set $V0 diagnostics.client-log-level DEBUG & +$CLI_1 volume set $V1 diagnostics.client-log-level DEBUG +kill_glusterd 3 +$CLI_1 volume status $V0 +$CLI_2 volume status $V1 +$CLI_1 peer status +EXPECT_WITHIN 20 1 check_peers +EXPECT 'Started' volinfo_field $V0 'Status'; +EXPECT 'Started' volinfo_field $V1 'Status'; + +TEST $glusterd_3 +$CLI_1 volume status $V0 +$CLI_2 volume status $V1 +$CLI_1 peer status +#EXPECT_WITHIN 20 2 check_peers +#EXPECT 'Started' volinfo_field $V0 'Status'; +#EXPECT 'Started' volinfo_field $V1 'Status'; +#two_diff_vols_stop_force +#EXPECT_WITHIN 20 2 check_peers +cleanup; diff --git a/tests/include.rc b/tests/include.rc index 44259872a..250220efa 100644 --- a/tests/include.rc +++ b/tests/include.rc @@ -3,6 +3,7 @@ M1=${M1:=/mnt/glusterfs/1}; # 1st mount point for FUSE N0=${N0:=/mnt/nfs/0}; # 0th mount point for NFS N1=${N1:=/mnt/nfs/1}; # 1st mount point for NFS V0=${V0:=patchy}; # volume name to use in tests +V1=${V1:=patchy1}; # volume name to use in tests B0=${B0:=/d/backends}; # top level of brick directories H0=${H0:=`hostname --fqdn`}; # hostname DEBUG=${DEBUG:=0} # turn on debugging? diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am index a6f49ae01..b109e6dff 100644 --- a/xlators/mgmt/glusterd/src/Makefile.am +++ b/xlators/mgmt/glusterd/src/Makefile.am @@ -11,7 +11,8 @@ glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \ glusterd-volgen.c glusterd-rebalance.c glusterd-quota.c \ glusterd-geo-rep.c glusterd-replace-brick.c glusterd-log-ops.c \ glusterd-volume-ops.c glusterd-brick-ops.c glusterd-mountbroker.c \ - glusterd-syncop.c glusterd-hooks.c glusterd-volume-set.c + glusterd-syncop.c glusterd-hooks.c glusterd-volume-set.c \ + glusterd-locks.c glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ $(top_builddir)/rpc/xdr/src/libgfxdr.la \ @@ -21,7 +22,7 @@ glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \ glusterd-sm.h glusterd-store.h glusterd-mem-types.h \ glusterd-pmap.h glusterd-volgen.h glusterd-mountbroker.h \ - glusterd-syncop.h glusterd-hooks.h + glusterd-syncop.h glusterd-hooks.h glusterd-locks.h AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ -I$(rpclibdir) -I$(CONTRIBDIR)/rbtree \ diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index 26d608a2f..ced916ea1 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -1467,7 +1467,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) goto out; } - if (is_origin_glusterd ()) { + if (is_origin_glusterd (dict)) { ret = glusterd_generate_and_set_task_id (dict, GF_REMOVE_BRICK_TID_KEY); if (ret) { @@ -1733,7 +1733,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) /* Set task-id, if available, in ctx dict for operations other than * start */ - if (is_origin_glusterd () && (cmd != GF_OP_CMD_START)) { + if (is_origin_glusterd (dict) && (cmd != GF_OP_CMD_START)) { if (!uuid_is_null (volinfo->rebal.rebalance_id)) { ret = glusterd_copy_uuid_to_dict (volinfo->rebal.rebalance_id, dict, diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index ab3fa94c0..797141dec 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -33,6 +33,7 @@ #include "glusterd-op-sm.h" #include "glusterd-utils.h" #include "glusterd-store.h" +#include "glusterd-locks.h" #include "glusterd1-xdr.h" #include "cli1-xdr.h" @@ -55,6 +56,7 @@ #endif extern glusterd_op_info_t opinfo; +extern uuid_t global_txn_id; int glusterd_big_locked_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, @@ -590,10 +592,16 @@ int32_t glusterd_op_txn_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx, char *err_str, size_t err_len) { - int32_t ret = -1; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; - int32_t locked = 0; + int32_t ret = -1; + dict_t *dict = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int32_t locked = 0; + char *tmp = NULL; + char *volname = NULL; + uuid_t *txn_id = NULL; + glusterd_op_info_t txn_op_info = {{0},}; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; GF_ASSERT (req); GF_ASSERT ((op > GD_OP_NONE) && (op < GD_OP_MAX)); @@ -604,33 +612,122 @@ glusterd_op_txn_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx, priv = this->private; GF_ASSERT (priv); - ret = glusterd_lock (MY_UUID); + dict = ctx; + + /* Generate a transaction-id for this operation and + * save it in the dict. This transaction id distinguishes + * each transaction, and helps separate opinfos in the + * op state machine. */ + ret = glusterd_generate_txn_id (dict, &txn_id); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to generate transaction id"); + goto out; + + } + + /* Save the MY_UUID as the originator_uuid. This originator_uuid + * will be used by is_origin_glusterd() to determine if a node + * is the originator node for a command. */ + ret = glusterd_set_originator_uuid (dict); if (ret) { gf_log (this->name, GF_LOG_ERROR, - "Unable to acquire lock on localhost, ret: %d", ret); - snprintf (err_str, err_len, "Another transaction is in progress. " - "Please try again after sometime."); + "Failed to set originator_uuid."); goto out; } + /* Based on the op_version, acquire a cluster or volume lock */ + if (priv->op_version < GD_OP_VERSION_4) { + ret = glusterd_lock (MY_UUID); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire lock on localhost, ret: %d", + ret); + snprintf (err_str, err_len, + "Another transaction is in progress. " + "Please try again after sometime."); + goto out; + } + } else { + /* If no volname is given as a part of the command, locks will + * not be held */ + ret = dict_get_str (dict, "volname", &tmp); + if (ret) { + gf_log ("", GF_LOG_INFO, + "No Volume name present. " + "Locks not being held."); + goto local_locking_done; + } else { + /* Use a copy of volname, as cli response will be + * sent before the unlock, and the volname in the + * dict, might be removed */ + volname = gf_strdup (tmp); + if (!volname) + goto out; + } + + ret = glusterd_volume_lock (volname, MY_UUID); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire lock for %s", volname); + snprintf (err_str, err_len, + "Another transaction is in progress for %s. " + "Please try again after sometime.", volname); + goto out; + } + } + locked = 1; gf_log (this->name, GF_LOG_DEBUG, "Acquired lock on localhost"); - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_START_LOCK, NULL); +local_locking_done: + + /* If no volname is given as a part of the command, locks will + * not be held, hence sending stage event. */ + if (volname) + event_type = GD_OP_EVENT_START_LOCK; + else { + txn_op_info.state.state = GD_OP_STATE_LOCK_SENT; + event_type = GD_OP_EVENT_ALL_ACC; + } + + /* Save opinfo for this transaction with the transaction id */ + glusterd_txn_opinfo_init (&txn_op_info, NULL, &op, ctx, req); + + ret = glusterd_set_txn_opinfo (txn_id, &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set transaction's opinfo"); + if (ctx) + dict_unref (ctx); + goto out; + } + + ret = glusterd_op_sm_inject_event (event_type, txn_id, ctx); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to acquire cluster" " lock."); goto out; } - glusterd_op_set_op (op); - glusterd_op_set_ctx (ctx); - glusterd_op_set_req (req); - - out: - if (locked && ret) - glusterd_unlock (MY_UUID); + if (locked && ret) { + /* Based on the op-version, we release the + * cluster or volume lock */ + if (priv->op_version < GD_OP_VERSION_4) + glusterd_unlock (MY_UUID); + else { + ret = glusterd_volume_unlock (volname, MY_UUID); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to release lock for %s", + volname); + ret = -1; + } + } + + if (volname) + GF_FREE (volname); gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; @@ -639,11 +736,15 @@ out: int __glusterd_handle_cluster_lock (rpcsvc_request_t *req) { - gd1_mgmt_cluster_lock_req lock_req = {{0},}; - int32_t ret = -1; - glusterd_op_lock_ctx_t *ctx = NULL; - glusterd_peerinfo_t *peerinfo = NULL; - xlator_t *this = NULL; + dict_t *op_ctx = NULL; + int32_t ret = -1; + gd1_mgmt_cluster_lock_req lock_req = {{0},}; + glusterd_op_lock_ctx_t *ctx = NULL; + glusterd_op_t op = GD_OP_EVENT_LOCK; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_op_info_t txn_op_info = {{0},}; + uuid_t *txn_id = &global_txn_id; + xlator_t *this = NULL; this = THIS; GF_ASSERT (this); @@ -678,8 +779,29 @@ __glusterd_handle_cluster_lock (rpcsvc_request_t *req) uuid_copy (ctx->uuid, lock_req.uuid); ctx->req = req; + ctx->dict = NULL; + + op_ctx = dict_new (); + if (!op_ctx) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set new dict"); + goto out; + } - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_LOCK, ctx); + glusterd_txn_opinfo_init (&txn_op_info, NULL, &op, op_ctx, req); + + ret = glusterd_set_txn_opinfo (txn_id, &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set transaction's opinfo"); + dict_unref (txn_op_info.op_ctx); + goto out; + } + + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_LOCK, txn_id, ctx); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to inject event GD_OP_EVENT_LOCK"); out: gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -697,6 +819,186 @@ glusterd_handle_cluster_lock (rpcsvc_request_t *req) __glusterd_handle_cluster_lock); } +static int +glusterd_handle_volume_lock_fn (rpcsvc_request_t *req) +{ + gd1_mgmt_volume_lock_req lock_req = {{0},}; + int32_t ret = -1; + glusterd_op_lock_ctx_t *ctx = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + glusterd_op_info_t txn_op_info = {{0},}; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &lock_req, + (xdrproc_t)xdr_gd1_mgmt_volume_lock_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode lock " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "Received volume lock req " + "from uuid: %s txn_id: %s", uuid_utoa (lock_req.uuid), + uuid_utoa (lock_req.txn_id)); + + if (glusterd_friend_find_by_uuid (lock_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (lock_req.uuid)); + ret = -1; + goto out; + } + + ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t); + if (!ctx) { + ret = -1; + goto out; + } + + uuid_copy (ctx->uuid, lock_req.uuid); + ctx->req = req; + + ctx->dict = dict_new (); + if (!ctx->dict) { + ret = -1; + goto out; + } + + ret = dict_unserialize (lock_req.dict.dict_val, + lock_req.dict.dict_len, &ctx->dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + glusterd_txn_opinfo_init (&txn_op_info, NULL, &lock_req.op, + ctx->dict, req); + + ret = glusterd_set_txn_opinfo (&lock_req.txn_id, &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set transaction's opinfo"); + goto out; + } + + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_LOCK, + &lock_req.txn_id, ctx); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to inject event GD_OP_EVENT_LOCK"); + +out: + if (ret) { + if (ctx->dict) + dict_destroy (ctx->dict); + if (ctx) + GF_FREE (ctx); + } + + glusterd_friend_sm (); + glusterd_op_sm (); + + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_handle_volume_lock (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_volume_lock_fn); +} + +static int +glusterd_handle_volume_unlock_fn (rpcsvc_request_t *req) +{ + gd1_mgmt_volume_unlock_req lock_req = {{0},}; + int32_t ret = -1; + glusterd_op_lock_ctx_t *ctx = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &lock_req, + (xdrproc_t)xdr_gd1_mgmt_volume_unlock_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode unlock " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "Received volume unlock req " + "from uuid: %s", uuid_utoa (lock_req.uuid)); + + if (glusterd_friend_find_by_uuid (lock_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (lock_req.uuid)); + ret = -1; + goto out; + } + + ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t); + if (!ctx) { + ret = -1; + goto out; + } + + uuid_copy (ctx->uuid, lock_req.uuid); + ctx->req = req; + + ctx->dict = dict_new (); + if (!ctx->dict) { + ret = -1; + goto out; + } + + ret = dict_unserialize (lock_req.dict.dict_val, + lock_req.dict.dict_len, &ctx->dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_UNLOCK, + &lock_req.txn_id, ctx); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to inject event GD_OP_EVENT_UNLOCK"); + +out: + if (ret) { + if (ctx->dict) + dict_destroy (ctx->dict); + if (ctx) + GF_FREE (ctx); + } + + glusterd_friend_sm (); + glusterd_op_sm (); + + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_handle_volume_unlock (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_volume_unlock_fn); +} + int glusterd_req_ctx_create (rpcsvc_request_t *rpc_req, glusterd_op_t op, uuid_t uuid, @@ -755,6 +1057,9 @@ __glusterd_handle_stage_op (rpcsvc_request_t *req) gd1_mgmt_stage_op_req op_req = {{0},}; glusterd_peerinfo_t *peerinfo = NULL; xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; + glusterd_op_info_t txn_op_info = {{0},}; + glusterd_op_sm_state_info_t state = {0,}; this = THIS; GF_ASSERT (this); @@ -783,7 +1088,36 @@ __glusterd_handle_stage_op (rpcsvc_request_t *req) if (ret) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_STAGE_OP, req_ctx); + ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + + /* In cases where there is no volname, the receivers won't have a + * transaction opinfo created, as for those operations, the locking + * phase where the transaction opinfos are created, won't be called. */ + ret = glusterd_get_txn_opinfo (txn_id, &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "No transaction's opinfo set"); + + state.state = GD_OP_STATE_LOCKED; + glusterd_txn_opinfo_init (&txn_op_info, &state, + &op_req.op, req_ctx->dict, req); + + ret = glusterd_set_txn_opinfo (txn_id, &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set transaction's opinfo"); + dict_unref (req_ctx->dict); + goto out; + } + } + + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_STAGE_OP, + txn_id, req_ctx); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to inject event GD_OP_EVENT_STAGE_OP"); out: free (op_req.buf.buf_val);//malloced by xdr @@ -807,6 +1141,7 @@ __glusterd_handle_commit_op (rpcsvc_request_t *req) gd1_mgmt_commit_op_req op_req = {{0},}; glusterd_peerinfo_t *peerinfo = NULL; xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; this = THIS; GF_ASSERT (this); @@ -837,11 +1172,12 @@ __glusterd_handle_commit_op (rpcsvc_request_t *req) if (ret) goto out; - ret = glusterd_op_init_ctx (op_req.op); - if (ret) - goto out; + ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_OP, req_ctx); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_OP, + txn_id, req_ctx); out: free (op_req.buf.buf_val);//malloced by xdr @@ -1891,6 +2227,56 @@ glusterd_op_unlock_send_resp (rpcsvc_request_t *req, int32_t status) return ret; } +int +glusterd_op_volume_lock_send_resp (rpcsvc_request_t *req, uuid_t *txn_id, + int32_t status) +{ + + gd1_mgmt_volume_lock_rsp rsp = {{0},}; + int ret = -1; + + GF_ASSERT (req); + GF_ASSERT (txn_id); + glusterd_get_uuid (&rsp.uuid); + rsp.op_ret = status; + if (rsp.op_ret) + rsp.op_errno = errno; + uuid_copy (rsp.txn_id, *txn_id); + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_volume_lock_rsp); + + gf_log (THIS->name, GF_LOG_DEBUG, "Responded to volume lock, ret: %d", + ret); + + return ret; +} + +int +glusterd_op_volume_unlock_send_resp (rpcsvc_request_t *req, uuid_t *txn_id, + int32_t status) +{ + + gd1_mgmt_volume_unlock_rsp rsp = {{0},}; + int ret = -1; + + GF_ASSERT (req); + GF_ASSERT (txn_id); + rsp.op_ret = status; + if (rsp.op_ret) + rsp.op_errno = errno; + glusterd_get_uuid (&rsp.uuid); + uuid_copy (rsp.txn_id, *txn_id); + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_volume_unlock_rsp); + + gf_log (THIS->name, GF_LOG_DEBUG, "Responded to volume unlock, ret: %d", + ret); + + return ret; +} + int __glusterd_handle_cluster_unlock (rpcsvc_request_t *req) { @@ -1899,6 +2285,7 @@ __glusterd_handle_cluster_unlock (rpcsvc_request_t *req) glusterd_op_lock_ctx_t *ctx = NULL; glusterd_peerinfo_t *peerinfo = NULL; xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; this = THIS; GF_ASSERT (this); @@ -1933,8 +2320,9 @@ __glusterd_handle_cluster_unlock (rpcsvc_request_t *req) } uuid_copy (ctx->uuid, unlock_req.uuid); ctx->req = req; + ctx->dict = NULL; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_UNLOCK, ctx); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_UNLOCK, txn_id, ctx); out: glusterd_friend_sm (); @@ -3838,6 +4226,7 @@ __glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, glusterd_peerinfo_t *peerinfo = NULL; glusterd_peerctx_t *peerctx = NULL; gf_boolean_t quorum_action = _gf_false; + glusterd_volinfo_t *volinfo = NULL; uuid_t uuid; peerctx = mydata; @@ -3866,6 +4255,30 @@ __glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_DISCONNECT %d", peerinfo->state.state); + if (peerinfo->connected) { + if (conf->op_version < GD_OP_VERSION_4) { + glusterd_get_lock_owner (&uuid); + if (!uuid_is_null (uuid) && + !uuid_compare (peerinfo->uuid, uuid)) + glusterd_unlock (peerinfo->uuid); + } else { + list_for_each_entry (volinfo, &conf->volumes, + vol_list) { + ret = glusterd_volume_unlock + (volinfo->volname, + peerinfo->uuid); + if (ret) + gf_log (this->name, + GF_LOG_TRACE, + "Lock not released " + "for %s", + volinfo->volname); + } + } + + ret = 0; + } + if ((peerinfo->quorum_contrib != QUORUM_DOWN) && (peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED)) { peerinfo->quorum_contrib = QUORUM_DOWN; @@ -3880,13 +4293,6 @@ __glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, glusterd_friend_remove_notify (peerctx); goto out; } - glusterd_get_lock_owner (&uuid); - if (!uuid_is_null (uuid) && - !uuid_compare (peerinfo->uuid, uuid)) { - glusterd_unlock (peerinfo->uuid); - if (opinfo.state.state != GD_OP_STATE_DEFAULT) - opinfo.state.state = GD_OP_STATE_DEFAULT; - } peerinfo->connected = 0; break; @@ -4021,3 +4427,18 @@ struct rpcsvc_program gd_svc_cli_prog_ro = { .actors = gd_svc_cli_actors_ro, .synctask = _gf_true, }; + +rpcsvc_actor_t gd_svc_mgmt_v3_actors[] = { + [GLUSTERD_MGMT_V3_NULL] = { "NULL", GLUSTERD_MGMT_V3_NULL, glusterd_null, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_VOLUME_LOCK] = { "VOL_LOCK", GLUSTERD_MGMT_V3_VOLUME_LOCK, glusterd_handle_volume_lock, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_VOLUME_UNLOCK] = { "VOL_UNLOCK", GLUSTERD_MGMT_V3_VOLUME_UNLOCK, glusterd_handle_volume_unlock, NULL, 0, DRC_NA}, +}; + +struct rpcsvc_program gd_svc_mgmt_v3_prog = { + .progname = "GlusterD svc mgmt v3", + .prognum = GD_MGMT_PROGRAM, + .progver = GD_MGMT_V3_VERSION, + .numactors = GLUSTERD_MGMT_V3_MAXVALUE, + .actors = gd_svc_mgmt_v3_actors, + .synctask = _gf_true, +}; diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c index 1420eb692..e0508faf6 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handshake.c +++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c @@ -30,6 +30,7 @@ extern struct rpc_clnt_program gd_peer_prog; extern struct rpc_clnt_program gd_mgmt_prog; +extern struct rpc_clnt_program gd_mgmt_v3_prog; #define TRUSTED_PREFIX "trusted-" @@ -812,6 +813,7 @@ __glusterd_mgmt_hndsk_version_ack_cbk (struct rpc_req *req, struct iovec *iov, */ peerinfo->mgmt = &gd_mgmt_prog; peerinfo->peer = &gd_peer_prog; + peerinfo->mgmt_v3 = &gd_mgmt_v3_prog; ret = default_notify (this, GF_EVENT_CHILD_UP, NULL); @@ -1038,6 +1040,15 @@ glusterd_set_clnt_mgmt_program (glusterd_peerinfo_t *peerinfo, peerinfo->peer->progname, peerinfo->peer->prognum, peerinfo->peer->progver); } + + if (peerinfo->mgmt_v3) { + gf_log ("", GF_LOG_INFO, + "Using Program %s, Num (%d), Version (%d)", + peerinfo->mgmt_v3->progname, + peerinfo->mgmt_v3->prognum, + peerinfo->mgmt_v3->progver); + } + ret = 0; out: return ret; diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.c b/xlators/mgmt/glusterd/src/glusterd-locks.c new file mode 100644 index 000000000..68c6d7426 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-locks.c @@ -0,0 +1,177 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "common-utils.h" +#include "cli1-xdr.h" +#include "xdr-generic.h" +#include "glusterd.h" +#include "glusterd-op-sm.h" +#include "glusterd-store.h" +#include "glusterd-utils.h" +#include "glusterd-volgen.h" +#include "glusterd-locks.h" +#include "run.h" +#include "syscall.h" + +#include + +static dict_t *vol_lock; + +/* Initialize the global vol-lock list(dict) when + * glusterd is spawned */ +int32_t +glusterd_vol_lock_init () +{ + int32_t ret = -1; + + vol_lock = dict_new (); + if (!vol_lock) + goto out; + + ret = 0; +out: + return ret; +} + +/* Destroy the global vol-lock list(dict) when + * glusterd cleanup is performed */ +void +glusterd_vol_lock_fini () +{ + if (vol_lock) + dict_unref (vol_lock); +} + +int32_t +glusterd_get_vol_lock_owner (char *volname, uuid_t *uuid) +{ + int32_t ret = -1; + vol_lock_obj *lock_obj = NULL; + uuid_t no_owner = {0,}; + + if (!volname || !uuid) { + gf_log ("", GF_LOG_ERROR, "volname or uuid is null."); + ret = -1; + goto out; + } + + ret = dict_get_bin (vol_lock, volname, (void **) &lock_obj); + if (!ret) + uuid_copy (*uuid, lock_obj->lock_owner); + else + uuid_copy (*uuid, no_owner); + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_volume_lock (char *volname, uuid_t uuid) +{ + int32_t ret = -1; + vol_lock_obj *lock_obj = NULL; + uuid_t owner = {0}; + + if (!volname) { + gf_log ("", GF_LOG_ERROR, "volname is null."); + ret = -1; + goto out; + } + + ret = glusterd_get_vol_lock_owner (volname, &owner); + if (ret) { + gf_log ("", GF_LOG_DEBUG, "Unable to get volume lock owner"); + goto out; + } + + /* If the lock has already been held for the given volume + * we fail */ + if (!uuid_is_null (owner)) { + gf_log ("", GF_LOG_ERROR, "Unable to acquire lock. " + "Lock for %s held by %s", volname, + uuid_utoa (owner)); + ret = -1; + goto out; + } + + lock_obj = GF_CALLOC (1, sizeof(vol_lock_obj), + gf_common_mt_vol_lock_obj_t); + if (!lock_obj) { + ret = -1; + goto out; + } + + uuid_copy (lock_obj->lock_owner, uuid); + + ret = dict_set_bin (vol_lock, volname, lock_obj, sizeof(vol_lock_obj)); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to set lock owner " + "in volume lock"); + if (lock_obj) + GF_FREE (lock_obj); + goto out; + } + + gf_log ("", GF_LOG_DEBUG, "Lock for %s successfully held by %s", + volname, uuid_utoa (uuid)); + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_volume_unlock (char *volname, uuid_t uuid) +{ + int32_t ret = -1; + uuid_t owner = {0}; + + if (!volname) { + gf_log ("", GF_LOG_ERROR, "volname is null."); + ret = -1; + goto out; + } + + ret = glusterd_get_vol_lock_owner (volname, &owner); + if (ret) + goto out; + + if (uuid_is_null (owner)) { + gf_log ("", GF_LOG_ERROR, "Lock for %s not held", volname); + ret = -1; + goto out; + } + + ret = uuid_compare (uuid, owner); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Lock owner mismatch. " + "Lock for %s held by %s", + volname, uuid_utoa (owner)); + goto out; + } + + /* Removing the volume lock from the global list */ + dict_del (vol_lock, volname); + + gf_log ("", GF_LOG_DEBUG, "Lock for %s successfully released", + volname); + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.h b/xlators/mgmt/glusterd/src/glusterd-locks.h new file mode 100644 index 000000000..2a8cc20ed --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-locks.h @@ -0,0 +1,38 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _GLUSTERD_LOCKS_H_ +#define _GLUSTERD_LOCKS_H_ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +struct volume_lock_object_ { + uuid_t lock_owner; +}; +typedef struct volume_lock_object_ vol_lock_obj; + +int32_t +glusterd_vol_lock_init (); + +void +glusterd_vol_lock_fini (); + +int32_t +glusterd_get_vol_lock_owner (char *volname, uuid_t *uuid); + +int32_t +glusterd_volume_lock (char *volname, uuid_t uuid); + +int32_t +glusterd_volume_unlock (char *volname, uuid_t uuid); + +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 5758e217d..83c91a52d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -37,6 +37,7 @@ #include "glusterd-store.h" #include "glusterd-hooks.h" #include "glusterd-volgen.h" +#include "glusterd-locks.h" #include "syscall.h" #include "cli1-xdr.h" #include "common-utils.h" @@ -67,6 +68,195 @@ static struct list_head gd_op_sm_queue; pthread_mutex_t gd_op_sm_lock; glusterd_op_info_t opinfo = {{0},}; +uuid_t global_txn_id = {0}; /* To be used in + * heterogeneous + * cluster with no + * transaction ids */ + +static dict_t *txn_opinfo; + +struct glusterd_txn_opinfo_object_ { + glusterd_op_info_t opinfo; +}; +typedef struct glusterd_txn_opinfo_object_ glusterd_txn_opinfo_obj; + +int32_t +glusterd_txn_opinfo_dict_init () +{ + int32_t ret = -1; + + txn_opinfo = dict_new (); + if (!txn_opinfo) { + ret = -1; + goto out; + } + + ret = 0; +out: + return ret; +} + +void +glusterd_txn_opinfo_dict_fini () +{ + if (txn_opinfo) + dict_destroy (txn_opinfo); +} + +void +glusterd_txn_opinfo_init (glusterd_op_info_t *opinfo, + glusterd_op_sm_state_info_t *state, + glusterd_op_t *op, + dict_t *op_ctx, + rpcsvc_request_t *req) +{ + GF_ASSERT (opinfo); + + if (state) + opinfo->state = *state; + + if (op) + opinfo->op = *op; + + opinfo->op_ctx = dict_ref(op_ctx); + + if (req) + opinfo->req = req; + + return; +} + +int32_t +glusterd_generate_txn_id (dict_t *dict, uuid_t **txn_id) +{ + int32_t ret = -1; + + GF_ASSERT (dict); + + *txn_id = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t); + if (!*txn_id) + goto out; + + uuid_generate (**txn_id); + + ret = dict_set_bin (dict, "transaction_id", + *txn_id, sizeof (uuid_t)); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Failed to set transaction id."); + goto out; + } + + gf_log ("", GF_LOG_DEBUG, + "Transaction_id = %s", uuid_utoa (**txn_id)); +out: + if (ret && *txn_id) + GF_FREE (*txn_id); + + return ret; +} + +int32_t +glusterd_get_txn_opinfo (uuid_t *txn_id, glusterd_op_info_t *opinfo) +{ + int32_t ret = -1; + glusterd_txn_opinfo_obj *opinfo_obj = NULL; + + if (!txn_id || !opinfo) { + gf_log ("", GF_LOG_ERROR, + "Empty transaction id or opinfo received."); + ret = -1; + goto out; + } + + ret = dict_get_bin(txn_opinfo, uuid_utoa (*txn_id), + (void **) &opinfo_obj); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get transaction opinfo"); + goto out; + } + + (*opinfo) = opinfo_obj->opinfo; + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_set_txn_opinfo (uuid_t *txn_id, glusterd_op_info_t *opinfo) +{ + int32_t ret = -1; + glusterd_txn_opinfo_obj *opinfo_obj = NULL; + + if (!txn_id) { + gf_log ("", GF_LOG_ERROR, "Empty transaction id received."); + ret = -1; + goto out; + } + + ret = dict_get_bin(txn_opinfo, uuid_utoa (*txn_id), + (void **) &opinfo_obj); + if (ret) { + opinfo_obj = GF_CALLOC (1, sizeof(glusterd_txn_opinfo_obj), + gf_common_mt_txn_opinfo_obj_t); + if (!opinfo_obj) { + ret = -1; + goto out; + } + + ret = dict_set_bin(txn_opinfo, uuid_utoa (*txn_id), opinfo_obj, + sizeof(glusterd_txn_opinfo_obj)); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set opinfo for transaction ID : %s", + uuid_utoa (*txn_id)); + goto out; + } + } + + opinfo_obj->opinfo = (*opinfo); + + ret = 0; +out: + if (ret) + if (opinfo_obj) + GF_FREE (opinfo_obj); + + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_clear_txn_opinfo (uuid_t *txn_id) +{ + int32_t ret = -1; + glusterd_op_info_t txn_op_info = {{0},}; + + if (!txn_id) { + gf_log ("", GF_LOG_ERROR, "Empty transaction id received."); + ret = -1; + goto out; + } + + ret = glusterd_get_txn_opinfo (txn_id, &txn_op_info); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Transaction opinfo not found"); + goto out; + } + + dict_unref (txn_op_info.op_ctx); + + dict_del(txn_opinfo, uuid_utoa (*txn_id)); + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + static int glusterfs_port = GLUSTERD_DEFAULT_PORT; static char *glusterd_op_sm_state_names[] = { "Default", @@ -147,10 +337,10 @@ glusterd_is_volume_started (glusterd_volinfo_t *volinfo) } static int -glusterd_op_sm_inject_all_acc () +glusterd_op_sm_inject_all_acc (uuid_t *txn_id) { int32_t ret = -1; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, txn_id, NULL); gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } @@ -445,7 +635,7 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr) * This check is not done on the originator glusterd. The originator * glusterd sets this value. */ - origin_glusterd = is_origin_glusterd (); + origin_glusterd = is_origin_glusterd (dict); if (!origin_glusterd) { /* Check for v3.3.x origin glusterd */ @@ -2215,7 +2405,7 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr, GF_ASSERT (dict); - origin_glusterd = is_origin_glusterd (); + origin_glusterd = is_origin_glusterd (dict); ret = dict_get_uint32 (dict, "cmd", &cmd); if (ret) @@ -2420,6 +2610,7 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx) xlator_t *this = NULL; glusterd_peerinfo_t *peerinfo = NULL; uint32_t pending_count = 0; + dict_t *dict = NULL; this = THIS; priv = this->private; @@ -2434,27 +2625,61 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx) (glusterd_op_get_op() != GD_OP_SYNC_VOLUME)) continue; - proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_CLUSTER_LOCK]; - if (proc->fn) { - ret = proc->fn (NULL, this, peerinfo); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, "Failed to " - "send lock request for operation " - "'Volume %s' to peer %s", - gd_op_list[opinfo.op], - peerinfo->hostname); - continue; + /* Based on the op_version, acquire a cluster or volume lock */ + if (priv->op_version < GD_OP_VERSION_4) { + proc = &peerinfo->mgmt->proctable + [GLUSTERD_MGMT_CLUSTER_LOCK]; + if (proc->fn) { + ret = proc->fn (NULL, this, peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Failed to send lock request " + "for operation 'Volume %s' to " + "peer %s", + gd_op_list[opinfo.op], + peerinfo->hostname); + continue; + } + pending_count++; + } + } else { + dict = glusterd_op_get_ctx (); + dict_ref (dict); + + proc = &peerinfo->mgmt_v3->proctable + [GLUSTERD_MGMT_V3_VOLUME_LOCK]; + if (proc->fn) { + ret = dict_set_static_ptr (dict, "peerinfo", + peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to set peerinfo"); + dict_unref (dict); + goto out; + } + + ret = proc->fn (NULL, this, dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Failed to send volume lock " + "request for operation " + "'Volume %s' to peer %s", + gd_op_list[opinfo.op], + peerinfo->hostname); + dict_unref (dict); + continue; + } + pending_count++; } - pending_count++; } } opinfo.pending_count = pending_count; if (!opinfo.pending_count) - ret = glusterd_op_sm_inject_all_acc (); + ret = glusterd_op_sm_inject_all_acc (&event->txn_id); +out: gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); - return ret; } @@ -2467,17 +2692,12 @@ glusterd_op_ac_send_unlock (glusterd_op_sm_event_t *event, void *ctx) xlator_t *this = NULL; glusterd_peerinfo_t *peerinfo = NULL; uint32_t pending_count = 0; + dict_t *dict = NULL; this = THIS; priv = this->private; GF_ASSERT (priv); - /*ret = glusterd_unlock (MY_UUID); - - if (ret) - goto out; - */ - list_for_each_entry (peerinfo, &priv->peers, uuid_list) { GF_ASSERT (peerinfo); @@ -2487,29 +2707,63 @@ glusterd_op_ac_send_unlock (glusterd_op_sm_event_t *event, void *ctx) (glusterd_op_get_op() != GD_OP_SYNC_VOLUME)) continue; - proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_CLUSTER_UNLOCK]; - if (proc->fn) { - ret = proc->fn (NULL, this, peerinfo); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, "Failed to " - "send unlock request for operation " - "'Volume %s' to peer %s", - gd_op_list[opinfo.op], - peerinfo->hostname); - continue; + /* Based on the op_version, + * release the cluster or volume lock */ + if (priv->op_version < GD_OP_VERSION_4) { + proc = &peerinfo->mgmt->proctable + [GLUSTERD_MGMT_CLUSTER_UNLOCK]; + if (proc->fn) { + ret = proc->fn (NULL, this, peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Failed to send unlock request " + "for operation 'Volume %s' to " + "peer %s", + gd_op_list[opinfo.op], + peerinfo->hostname); + continue; + } + pending_count++; + } + } else { + dict = glusterd_op_get_ctx (); + dict_ref (dict); + + proc = &peerinfo->mgmt_v3->proctable + [GLUSTERD_MGMT_V3_VOLUME_UNLOCK]; + if (proc->fn) { + ret = dict_set_static_ptr (dict, "peerinfo", + peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to set peerinfo"); + dict_unref (dict); + goto out; + } + + ret = proc->fn (NULL, this, dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Failed to send volume unlock " + "request for operation " + "'Volume %s' to peer %s", + gd_op_list[opinfo.op], + peerinfo->hostname); + dict_unref (dict); + continue; + } + pending_count++; } - pending_count++; } } opinfo.pending_count = pending_count; if (!opinfo.pending_count) - ret = glusterd_op_sm_inject_all_acc (); + ret = glusterd_op_sm_inject_all_acc (&event->txn_id); +out: gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); - return ret; - } static int @@ -2521,7 +2775,8 @@ glusterd_op_ac_ack_drain (glusterd_op_sm_event_t *event, void *ctx) opinfo.pending_count--; if (!opinfo.pending_count) - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, + &event->txn_id, NULL); gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret); @@ -2537,43 +2792,93 @@ glusterd_op_ac_send_unlock_drain (glusterd_op_sm_event_t *event, void *ctx) static int glusterd_op_ac_lock (glusterd_op_sm_event_t *event, void *ctx) { - glusterd_op_lock_ctx_t *lock_ctx = NULL; - int32_t ret = 0; + int32_t ret = 0; + char *volname = NULL; + glusterd_op_lock_ctx_t *lock_ctx = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; GF_ASSERT (event); GF_ASSERT (ctx); + this = THIS; + priv = this->private; + lock_ctx = (glusterd_op_lock_ctx_t *)ctx; - ret = glusterd_lock (lock_ctx->uuid); + /* If the req came from a node running on older op_version + * the dict won't be present. Based on it acquiring a cluster + * or volume lock */ + if (lock_ctx->dict == NULL) { + ret = glusterd_lock (lock_ctx->uuid); + glusterd_op_lock_send_resp (lock_ctx->req, ret); + } else { + ret = dict_get_str (lock_ctx->dict, "volname", &volname); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire volname"); + else { + ret = glusterd_volume_lock (volname, lock_ctx->uuid); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire lock for %s", + volname); + } - gf_log (THIS->name, GF_LOG_DEBUG, "Lock Returned %d", ret); + glusterd_op_volume_lock_send_resp (lock_ctx->req, + &event->txn_id, ret); - glusterd_op_lock_send_resp (lock_ctx->req, ret); + dict_unref (lock_ctx->dict); + } + gf_log (THIS->name, GF_LOG_DEBUG, "Lock Returned %d", ret); return ret; } static int glusterd_op_ac_unlock (glusterd_op_sm_event_t *event, void *ctx) { - int ret = 0; - glusterd_op_lock_ctx_t *lock_ctx = NULL; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; + int32_t ret = 0; + char *volname = NULL; + glusterd_op_lock_ctx_t *lock_ctx = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + GF_ASSERT (event); GF_ASSERT (ctx); this = THIS; priv = this->private; + lock_ctx = (glusterd_op_lock_ctx_t *)ctx; - ret = glusterd_unlock (lock_ctx->uuid); + /* If the req came from a node running on older op_version + * the dict won't be present. Based on it releasing the cluster + * or volume lock */ + if (lock_ctx->dict == NULL) { + ret = glusterd_unlock (lock_ctx->uuid); + glusterd_op_unlock_send_resp (lock_ctx->req, ret); + } else { + ret = dict_get_str (lock_ctx->dict, "volname", &volname); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire volname"); + else { + ret = glusterd_volume_unlock (volname, lock_ctx->uuid); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to release lock for %s", + volname); + } + + glusterd_op_volume_unlock_send_resp (lock_ctx->req, + &event->txn_id, ret); - gf_log (this->name, GF_LOG_DEBUG, "Unlock Returned %d", ret); + dict_unref (lock_ctx->dict); + } - glusterd_op_unlock_send_resp (lock_ctx->req, ret); + gf_log (this->name, GF_LOG_DEBUG, "Unlock Returned %d", ret); if (priv->pending_quorum_action) glusterd_do_quorum_action (); @@ -2611,7 +2916,8 @@ glusterd_op_ac_rcvd_lock_acc (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.pending_count > 0) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, + &event->txn_id, NULL); gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -3043,7 +3349,8 @@ out: if (dict) dict_unref (dict); if (ret) { - glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, NULL); + glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, + &event->txn_id, NULL); opinfo.op_ret = ret; } @@ -3052,7 +3359,7 @@ out: opinfo.pending_count); if (!opinfo.pending_count) - ret = glusterd_op_sm_inject_all_acc (); + ret = glusterd_op_sm_inject_all_acc (&event->txn_id); gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); @@ -3061,7 +3368,7 @@ out: } static int32_t -glusterd_op_start_rb_timer (dict_t *dict) +glusterd_op_start_rb_timer (dict_t *dict, uuid_t *txn_id) { int32_t op = 0; struct timespec timeout = {0, }; @@ -3080,7 +3387,7 @@ glusterd_op_start_rb_timer (dict_t *dict) } if (op != GF_REPLACE_OP_START) { - ret = glusterd_op_sm_inject_all_acc (); + ret = glusterd_op_sm_inject_all_acc (txn_id); goto out; } @@ -3095,6 +3402,17 @@ glusterd_op_start_rb_timer (dict_t *dict) ret = -1; goto out; } + + ret = dict_set_bin (rb_ctx, "transaction_id", + txn_id, sizeof (uuid_t)); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Failed to set transaction id."); + goto out; + } else + gf_log ("", GF_LOG_DEBUG, + "transaction_id = %s", uuid_utoa (*txn_id)); + priv->timer = gf_timer_call_after (THIS->ctx, timeout, glusterd_do_replace_brick, (void *) rb_ctx); @@ -3584,17 +3902,19 @@ out: if (dict) dict_unref (dict); if (ret) { - glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, NULL); + glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, + &event->txn_id, NULL); opinfo.op_ret = ret; } if (!opinfo.pending_count) { if (op == GD_OP_REPLACE_BRICK) { - ret = glusterd_op_start_rb_timer (op_dict); + ret = glusterd_op_start_rb_timer (op_dict, + &event->txn_id); } else { glusterd_op_modify_op_ctx (op, NULL); - ret = glusterd_op_sm_inject_all_acc (); + ret = glusterd_op_sm_inject_all_acc (&event->txn_id); } goto err; } @@ -3619,7 +3939,8 @@ glusterd_op_ac_rcvd_stage_op_acc (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.pending_count > 0) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_STAGE_ACC, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_STAGE_ACC, + &event->txn_id, NULL); out: gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -3640,7 +3961,8 @@ glusterd_op_ac_stage_op_failed (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.pending_count > 0) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, + &event->txn_id, NULL); out: gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -3661,7 +3983,8 @@ glusterd_op_ac_commit_op_failed (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.pending_count > 0) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, + &event->txn_id, NULL); out: gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -3704,7 +4027,8 @@ glusterd_op_ac_brick_op_failed (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.brick_pending_count > 0) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, ev_ctx->commit_ctx); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, + &event->txn_id, ev_ctx->commit_ctx); out: if (ev_ctx->rsp_dict) @@ -3746,7 +4070,7 @@ glusterd_op_ac_rcvd_commit_op_acc (glusterd_op_sm_event_t *event, void *ctx) goto out; } - ret = glusterd_op_start_rb_timer (op_ctx); + ret = glusterd_op_start_rb_timer (op_ctx, &event->txn_id); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Couldn't start " "replace-brick operation."); @@ -3761,10 +4085,14 @@ glusterd_op_ac_rcvd_commit_op_acc (glusterd_op_sm_event_t *event, void *ctx) out: if (commit_ack_inject) { if (ret) - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, + &event->txn_id, + NULL); else if (!opinfo.pending_count) { glusterd_op_modify_op_ctx (op, NULL); - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_ACC, NULL); + ret = glusterd_op_sm_inject_event + (GD_OP_EVENT_COMMIT_ACC, + &event->txn_id, NULL); } /*else do nothing*/ } @@ -3785,7 +4113,8 @@ glusterd_op_ac_rcvd_unlock_acc (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.pending_count > 0) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, + &event->txn_id, NULL); gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -3819,7 +4148,7 @@ glusterd_op_reset_ctx () } int32_t -glusterd_op_txn_complete () +glusterd_op_txn_complete (uuid_t *txn_id) { int32_t ret = -1; glusterd_conf_t *priv = NULL; @@ -3829,6 +4158,7 @@ glusterd_op_txn_complete () rpcsvc_request_t *req = NULL; void *ctx = NULL; char *op_errstr = NULL; + char *volname = NULL; xlator_t *this = NULL; this = THIS; @@ -3851,14 +4181,29 @@ glusterd_op_txn_complete () glusterd_op_reset_ctx (); glusterd_op_clear_errstr (); - ret = glusterd_unlock (MY_UUID); - - /* unlock cant/shouldnt fail here!! */ - if (ret) { - gf_log (this->name, GF_LOG_CRITICAL, - "Unable to clear local lock, ret: %d", ret); + /* Based on the op-version, we release the cluster or volume lock */ + if (priv->op_version < GD_OP_VERSION_4) { + ret = glusterd_unlock (MY_UUID); + /* unlock cant/shouldnt fail here!! */ + if (ret) + gf_log (this->name, GF_LOG_CRITICAL, + "Unable to clear local lock, ret: %d", ret); + else + gf_log (this->name, GF_LOG_DEBUG, "Cleared local lock"); } else { - gf_log (this->name, GF_LOG_DEBUG, "Cleared local lock"); + ret = dict_get_str (ctx, "volname", &volname); + if (ret) + gf_log ("", GF_LOG_INFO, + "No Volume name present. " + "Locks have not been held."); + + if (volname) { + ret = glusterd_volume_unlock (volname, MY_UUID); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to release lock for %s", + volname); + } } ret = glusterd_op_send_cli_response (op, op_ret, @@ -3877,6 +4222,13 @@ glusterd_op_txn_complete () if (priv->pending_quorum_action) glusterd_do_quorum_action (); + + /* Clearing the transaction opinfo */ + ret = glusterd_clear_txn_opinfo (txn_id); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to clear transaction's opinfo"); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } @@ -3888,7 +4240,7 @@ glusterd_op_ac_unlocked_all (glusterd_op_sm_event_t *event, void *ctx) GF_ASSERT (event); - ret = glusterd_op_txn_complete (); + ret = glusterd_op_txn_complete (&event->txn_id); gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -3905,6 +4257,7 @@ glusterd_op_ac_stage_op (glusterd_op_sm_event_t *event, void *ctx) char *op_errstr = NULL; dict_t *dict = NULL; xlator_t *this = NULL; + uuid_t *txn_id = NULL; this = THIS; GF_ASSERT (this); @@ -3930,9 +4283,27 @@ glusterd_op_ac_stage_op (glusterd_op_sm_event_t *event, void *ctx) status); } + txn_id = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t); + + if (txn_id) + uuid_copy (*txn_id, event->txn_id); + else { + ret = -1; + goto out; + } + + ret = dict_set_bin (rsp_dict, "transaction_id", + txn_id, sizeof(uuid_t *)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set transaction id."); + goto out; + } + ret = glusterd_op_stage_send_resp (req_ctx->req, req_ctx->op, status, op_errstr, rsp_dict); +out: if (op_errstr && (strcmp (op_errstr, ""))) GF_FREE (op_errstr); @@ -3994,6 +4365,7 @@ glusterd_op_ac_commit_op (glusterd_op_sm_event_t *event, void *ctx) dict_t *dict = NULL; dict_t *rsp_dict = NULL; xlator_t *this = NULL; + uuid_t *txn_id = NULL; this = THIS; GF_ASSERT (this); @@ -4023,10 +4395,27 @@ glusterd_op_ac_commit_op (glusterd_op_sm_event_t *event, void *ctx) "'Volume %s' failed: %d", gd_op_list[req_ctx->op], status); + txn_id = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t); + + if (txn_id) + uuid_copy (*txn_id, event->txn_id); + else { + ret = -1; + goto out; + } + + ret = dict_set_bin (rsp_dict, "transaction_id", + txn_id, sizeof(uuid_t)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set transaction id."); + goto out; + } + ret = glusterd_op_commit_send_resp (req_ctx->req, req_ctx->op, status, op_errstr, rsp_dict); - glusterd_op_fini_ctx (); +out: if (op_errstr && (strcmp (op_errstr, ""))) GF_FREE (op_errstr); @@ -4055,7 +4444,6 @@ glusterd_op_ac_send_commit_failed (glusterd_op_sm_event_t *event, void *ctx) opinfo.op_ret, opinfo.op_errstr, op_ctx); - glusterd_op_fini_ctx (); if (opinfo.op_errstr && (strcmp (opinfo.op_errstr, ""))) { GF_FREE (opinfo.op_errstr); opinfo.op_errstr = NULL; @@ -5237,7 +5625,8 @@ glusterd_op_ac_send_brick_op (glusterd_op_sm_event_t *event, void *ctx) if (!opinfo.pending_count && !opinfo.brick_pending_count) { glusterd_clear_pending_nodes (&opinfo.pending_bricks); - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, req_ctx); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, + &event->txn_id, req_ctx); } out: @@ -5291,7 +5680,8 @@ glusterd_op_ac_rcvd_brick_op_acc (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.brick_pending_count > 0) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, ev_ctx->commit_ctx); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, &event->txn_id, + ev_ctx->commit_ctx); out: if (ev_ctx->rsp_dict) @@ -5665,7 +6055,7 @@ glusterd_op_sm_new_event (glusterd_op_sm_event_type_t event_type, int glusterd_op_sm_inject_event (glusterd_op_sm_event_type_t event_type, - void *ctx) + uuid_t *txn_id, void *ctx) { int32_t ret = -1; glusterd_op_sm_event_t *event = NULL; @@ -5680,6 +6070,9 @@ glusterd_op_sm_inject_event (glusterd_op_sm_event_type_t event_type, event->ctx = ctx; + if (txn_id) + uuid_copy (event->txn_id, *txn_id); + gf_log (THIS->name, GF_LOG_DEBUG, "Enqueue event: '%s'", glusterd_op_sm_event_name_get (event->event)); list_add_tail (&event->list, &gd_op_sm_queue); @@ -5740,6 +6133,7 @@ glusterd_op_sm () glusterd_op_sm_t *state = NULL; glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; xlator_t *this = NULL; + glusterd_op_info_t txn_op_info; this = THIS; GF_ASSERT (this); @@ -5760,6 +6154,20 @@ glusterd_op_sm () "type: '%s'", glusterd_op_sm_event_name_get(event_type)); + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", + uuid_utoa (event->txn_id)); + + ret = glusterd_get_txn_opinfo (&event->txn_id, + &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to get transaction's opinfo"); + glusterd_destroy_op_event_ctx (event); + GF_FREE (event); + continue; + } else + opinfo = txn_op_info; + state = glusterd_op_state_table[opinfo.state.state]; GF_ASSERT (state); @@ -5790,8 +6198,27 @@ glusterd_op_sm () return ret; } + if ((state[event_type].next_state == + GD_OP_STATE_DEFAULT) && + (event_type == GD_OP_EVENT_UNLOCK)) { + /* Clearing the transaction opinfo */ + ret = glusterd_clear_txn_opinfo(&event->txn_id); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to clear " + "transaction's opinfo"); + } else { + ret = glusterd_set_txn_opinfo (&event->txn_id, + &opinfo); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to set " + "transaction's opinfo"); + } + glusterd_destroy_op_event_ctx (event); GF_FREE (event); + } } @@ -5844,52 +6271,6 @@ glusterd_op_clear_op (glusterd_op_t op) } -int32_t -glusterd_op_init_ctx (glusterd_op_t op) -{ - int ret = 0; - dict_t *dict = NULL; - xlator_t *this = NULL; - - this = THIS; - GF_ASSERT (this); - GF_ASSERT (GD_OP_NONE < op && op < GD_OP_MAX); - - if (_gf_false == glusterd_need_brick_op (op)) { - gf_log (this->name, GF_LOG_DEBUG, "Received op: %s, returning", - gd_op_list[op]); - goto out; - } - dict = dict_new (); - if (dict == NULL) { - ret = -1; - goto out; - } - ret = glusterd_op_set_ctx (dict); - if (ret) - goto out; -out: - gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); - return ret; -} - - - -int32_t -glusterd_op_fini_ctx () -{ - dict_t *dict = NULL; - - dict = glusterd_op_get_ctx (); - if (dict) - dict_unref (dict); - - glusterd_op_reset_ctx (); - return 0; -} - - - int32_t glusterd_op_free_ctx (glusterd_op_t op, void *ctx) { diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h index 1125368ce..53d4e2ff4 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h @@ -77,6 +77,7 @@ struct glusterd_op_sm_event_ { struct list_head list; void *ctx; glusterd_op_sm_event_type_t event; + uuid_t txn_id; }; typedef struct glusterd_op_sm_event_ glusterd_op_sm_event_t; @@ -119,6 +120,7 @@ typedef struct glusterd_op_log_filename_ctx_ glusterd_op_log_filename_ctx_t; struct glusterd_op_lock_ctx_ { uuid_t uuid; + dict_t *dict; rpcsvc_request_t *req; }; @@ -180,7 +182,7 @@ glusterd_op_sm_new_event (glusterd_op_sm_event_type_t event_type, glusterd_op_sm_event_t **new_event); int glusterd_op_sm_inject_event (glusterd_op_sm_event_type_t event_type, - void *ctx); + uuid_t *txn_id, void *ctx); int glusterd_op_sm_init (); @@ -264,10 +266,7 @@ glusterd_op_init_commit_rsp_dict (glusterd_op_t op); void glusterd_op_modify_op_ctx (glusterd_op_t op, void *op_ctx); -int32_t -glusterd_op_init_ctx (glusterd_op_t op); -int32_t -glusterd_op_fini_ctx (); + int32_t glusterd_volume_stats_read_perf (char *brick_path, int32_t blk_size, int32_t blk_count, double *throughput, double *time); @@ -296,4 +295,16 @@ int glusterd_is_valid_vg (glusterd_brickinfo_t *brick, int check_tag, char *msg); #endif +int32_t +glusterd_get_txn_opinfo (uuid_t *txn_id, glusterd_op_info_t *opinfo); + +int32_t +glusterd_set_txn_opinfo (uuid_t *txn_id, glusterd_op_info_t *opinfo); + +int32_t +glusterd_clear_txn_opinfo (uuid_t *txn_id); + +int32_t +glusterd_generate_txn_id (dict_t *dict, uuid_t **txn_id); + #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-quota.c b/xlators/mgmt/glusterd/src/glusterd-quota.c index d13533aa6..cf23b6404 100644 --- a/xlators/mgmt/glusterd/src/glusterd-quota.c +++ b/xlators/mgmt/glusterd/src/glusterd-quota.c @@ -824,7 +824,7 @@ glusterd_quota_limit_usage (glusterd_volinfo_t *volinfo, dict_t *dict, } } - if (is_origin_glusterd ()) { + if (is_origin_glusterd (dict)) { ret = glusterd_set_quota_limit (volinfo->volname, path, hard_limit, soft_limit, op_errstr); @@ -919,7 +919,7 @@ glusterd_quota_remove_limits (glusterd_volinfo_t *volinfo, dict_t *dict, if (ret) goto out; - if (is_origin_glusterd ()) { + if (is_origin_glusterd (dict)) { ret = glusterd_remove_quota_limit (volinfo->volname, path, op_errstr); if (ret) @@ -1385,7 +1385,7 @@ glusterd_op_stage_quota (dict_t *dict, char **op_errstr, dict_t *rsp_dict) case GF_QUOTA_OPTION_TYPE_ENABLE: case GF_QUOTA_OPTION_TYPE_LIST: /* Fuse mount req. only for enable & list-usage options*/ - if (is_origin_glusterd () && + if (is_origin_glusterd (dict) && !glusterd_is_fuse_available ()) { *op_errstr = gf_strdup ("Fuse unavailable"); ret = -1; diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index b28056135..b274e3367 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -545,7 +545,7 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr) case GF_DEFRAG_CMD_START: case GF_DEFRAG_CMD_START_LAYOUT_FIX: case GF_DEFRAG_CMD_START_FORCE: - if (is_origin_glusterd ()) { + if (is_origin_glusterd (dict)) { op_ctx = glusterd_op_get_ctx (); if (!op_ctx) { ret = -1; diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c index 5c3fc2d82..f26108f89 100644 --- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c +++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c @@ -31,6 +31,7 @@ DEFAULT_VAR_RUN_DIRECTORY"/%s-"RB_CLIENT_MOUNTPOINT, \ volinfo->volname); +extern uuid_t global_txn_id; int glusterd_get_replace_op_str (gf1_cli_replace_op op, char *op_str) @@ -325,7 +326,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, ret = -1; goto out; } - if (is_origin_glusterd ()) { + if (is_origin_glusterd (dict)) { if (!ctx) { ret = -1; gf_log (this->name, GF_LOG_ERROR, @@ -1631,7 +1632,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) /* Set task-id, if available, in op_ctx dict for operations * other than start */ - if (is_origin_glusterd ()) { + if (is_origin_glusterd (dict)) { ctx = glusterd_op_get_ctx(); if (!ctx) { gf_log (this->name, GF_LOG_ERROR, "Failed to " @@ -1894,6 +1895,7 @@ glusterd_do_replace_brick (void *data) glusterd_brickinfo_t *src_brickinfo = NULL; glusterd_brickinfo_t *dst_brickinfo = NULL; glusterd_conf_t *priv = NULL; + uuid_t *txn_id = &global_txn_id; int ret = 0; @@ -1913,6 +1915,10 @@ glusterd_do_replace_brick (void *data) gf_log ("", GF_LOG_DEBUG, "Replace brick operation detected"); + ret = dict_get_bin (dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + ret = dict_get_int32 (dict, "operation", &op); if (ret) { gf_log ("", GF_LOG_DEBUG, @@ -2008,9 +2014,11 @@ glusterd_do_replace_brick (void *data) out: if (ret) - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, + txn_id, NULL); else - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_ACC, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_ACC, + txn_id, NULL); synclock_lock (&priv->big_lock); { diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c index cd81383e9..9af26cfab 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c @@ -33,6 +33,7 @@ extern glusterd_op_info_t opinfo; +extern uuid_t global_txn_id; int32_t glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret, @@ -574,6 +575,7 @@ __glusterd_cluster_lock_cbk (struct rpc_req *req, struct iovec *iov, glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; glusterd_peerinfo_t *peerinfo = NULL; xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; this = THIS; GF_ASSERT (this); @@ -618,7 +620,7 @@ out: event_type = GD_OP_EVENT_RCVD_ACC; } - ret = glusterd_op_sm_inject_event (event_type, NULL); + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); if (!ret) { glusterd_friend_sm (); @@ -637,6 +639,168 @@ glusterd_cluster_lock_cbk (struct rpc_req *req, struct iovec *iov, __glusterd_cluster_lock_cbk); } +static int32_t +glusterd_vol_lock_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + gd1_mgmt_volume_lock_rsp rsp = {{0},}; + int ret = -1; + int32_t op_ret = -1; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + uuid_t *txn_id = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + if (-1 == req->rpc_status) { + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_volume_lock_rsp); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to decode volume lock " + "response received from peer"); + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + + op_ret = rsp.op_ret; + + txn_id = &rsp.txn_id; + + gf_log (this->name, (op_ret) ? GF_LOG_ERROR : GF_LOG_DEBUG, + "Received volume lock %s from uuid: %s", + (op_ret) ? "RJT" : "ACC", uuid_utoa (rsp.uuid)); + + ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "Volume lock response received " + "from unknown peer: %s. Ignoring response", + uuid_utoa (rsp.uuid)); + goto out; + } + + if (op_ret) { + event_type = GD_OP_EVENT_RCVD_RJT; + opinfo.op_ret = op_ret; + opinfo.op_errstr = gf_strdup ("Another transaction could be in " + "progress. Please try again after" + " sometime."); + } else { + event_type = GD_OP_EVENT_RCVD_ACC; + } + + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); + + if (!ret) { + glusterd_friend_sm (); + glusterd_op_sm (); + } + +out: + GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); + return ret; +} + +int32_t +glusterd_vol_lock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + glusterd_vol_lock_cbk_fn); +} + +static int32_t +glusterd_vol_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + gd1_mgmt_volume_unlock_rsp rsp = {{0},}; + int ret = -1; + int32_t op_ret = -1; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + uuid_t *txn_id = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + if (-1 == req->rpc_status) { + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_volume_unlock_rsp); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to decode volume unlock " + "response received from peer"); + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + + op_ret = rsp.op_ret; + + txn_id = &rsp.txn_id; + + gf_log (this->name, (op_ret) ? GF_LOG_ERROR : GF_LOG_DEBUG, + "Received volume unlock %s from uuid: %s", + (op_ret) ? "RJT" : "ACC", + uuid_utoa (rsp.uuid)); + + ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); + + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "Volume unlock response received " + "from unknown peer: %s. Ignoring response", + uuid_utoa (rsp.uuid)); + goto out; + } + + if (op_ret) { + event_type = GD_OP_EVENT_RCVD_RJT; + opinfo.op_ret = op_ret; + opinfo.op_errstr = gf_strdup ("Another transaction could be in " + "progress. Please try again after" + " sometime."); + } else { + event_type = GD_OP_EVENT_RCVD_ACC; + } + + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); + + if (!ret) { + glusterd_friend_sm (); + glusterd_op_sm (); + } + +out: + GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); + return ret; +} + +int32_t +glusterd_vol_unlock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + glusterd_vol_unlock_cbk_fn); +} + int32_t __glusterd_cluster_unlock_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) @@ -647,6 +811,7 @@ __glusterd_cluster_unlock_cbk (struct rpc_req *req, struct iovec *iov, glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; glusterd_peerinfo_t *peerinfo = NULL; xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; this = THIS; GF_ASSERT (this); @@ -688,7 +853,7 @@ out: event_type = GD_OP_EVENT_RCVD_ACC; } - ret = glusterd_op_sm_inject_event (event_type, NULL); + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); if (!ret) { glusterd_friend_sm (); @@ -720,6 +885,7 @@ __glusterd_stage_op_cbk (struct rpc_req *req, struct iovec *iov, char err_str[2048] = {0}; char *peer_str = NULL; xlator_t *this = NULL; + uuid_t *txn_id = NULL; this = THIS; GF_ASSERT (this); @@ -772,11 +938,17 @@ out: "Received stage %s from uuid: %s", (op_ret) ? "RJT" : "ACC", uuid_utoa (rsp.uuid)); + ret = dict_get_bin (dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); if (ret) { gf_log (this->name, GF_LOG_CRITICAL, "Stage response received " - "from unknown peer: %s", uuid_utoa (rsp.uuid)); + "from unknown peer: %s. Ignoring response.", + uuid_utoa (rsp.uuid)); + goto out; } if (op_ret) { @@ -807,7 +979,7 @@ out: break; } - ret = glusterd_op_sm_inject_event (event_type, NULL); + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); if (!ret) { glusterd_friend_sm (); @@ -847,6 +1019,8 @@ __glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov, char err_str[2048] = {0}; char *peer_str = NULL; xlator_t *this = NULL; + uuid_t *txn_id = NULL; + this = THIS; GF_ASSERT (this); @@ -900,6 +1074,10 @@ __glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov, "Received commit %s from uuid: %s", (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid)); + ret = dict_get_bin (dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); if (ret) { @@ -979,7 +1157,7 @@ __glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov, } out: - ret = glusterd_op_sm_inject_event (event_type, NULL); + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); if (!ret) { glusterd_friend_sm (); @@ -1215,6 +1393,107 @@ out: return ret; } +int32_t +glusterd_vol_lock (call_frame_t *frame, xlator_t *this, + void *data) +{ + gd1_mgmt_volume_lock_req req = {{0},}; + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + call_frame_t *dummy_frame = NULL; + dict_t *dict = NULL; + + if (!this) + goto out; + + dict = data; + + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo)); + if (ret) + goto out; + + //peerinfo should not be in payload + dict_del (dict, "peerinfo"); + + glusterd_get_uuid (&req.uuid); + + ret = dict_allocate_and_serialize (dict, &req.dict.dict_val, + &req.dict.dict_len); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to serialize dict " + "to request buffer"); + goto out; + } + + dummy_frame = create_frame (this, this->ctx->pool); + if (!dummy_frame) + goto out; + + ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame, + peerinfo->mgmt_v3, + GLUSTERD_MGMT_V3_VOLUME_LOCK, NULL, + this, glusterd_vol_lock_cbk, + (xdrproc_t)xdr_gd1_mgmt_volume_lock_req); +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_vol_unlock (call_frame_t *frame, xlator_t *this, + void *data) +{ + gd1_mgmt_volume_unlock_req req = {{0},}; + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + call_frame_t *dummy_frame = NULL; + dict_t *dict = NULL; + + if (!this) + goto out; + + dict = data; + + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo)); + if (ret) + goto out; + + //peerinfo should not be in payload + dict_del (dict, "peerinfo"); + + glusterd_get_uuid (&req.uuid); + + ret = dict_allocate_and_serialize (dict, &req.dict.dict_val, + &req.dict.dict_len); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to serialize dict " + "to request buffer"); + goto out; + } + + dummy_frame = create_frame (this, this->ctx->pool); + if (!dummy_frame) + goto out; + + ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame, + peerinfo->mgmt_v3, + GLUSTERD_MGMT_V3_VOLUME_UNLOCK, NULL, + this, glusterd_vol_unlock_cbk, + (xdrproc_t) + xdr_gd1_mgmt_volume_unlock_req); +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + int32_t glusterd_cluster_unlock (call_frame_t *frame, xlator_t *this, void *data) @@ -1378,6 +1657,7 @@ __glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov, glusterd_req_ctx_t *req_ctx = NULL; glusterd_pending_node_t *node = NULL; xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; this = THIS; GF_ASSERT (this); @@ -1440,6 +1720,11 @@ __glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov, } } out: + + ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + ev_ctx = GF_CALLOC (1, sizeof (*ev_ctx), gf_gld_mt_brick_rsp_ctx_t); GF_ASSERT (ev_ctx); if (op_ret) { @@ -1452,7 +1737,7 @@ out: ev_ctx->pending_node = frame->cookie; ev_ctx->rsp_dict = dict; ev_ctx->commit_ctx = frame->local; - ret = glusterd_op_sm_inject_event (event_type, ev_ctx); + ret = glusterd_op_sm_inject_event (event_type, txn_id, ev_ctx); if (!ret) { glusterd_friend_sm (); glusterd_op_sm (); @@ -1477,6 +1762,7 @@ int32_t glusterd_brick_op (call_frame_t *frame, xlator_t *this, void *data) { + gd1_mgmt_brick_op_req *req = NULL; int ret = 0; glusterd_conf_t *priv = NULL; @@ -1487,6 +1773,7 @@ glusterd_brick_op (call_frame_t *frame, xlator_t *this, glusterd_req_ctx_t *req_ctx = NULL; struct rpc_clnt *rpc = NULL; dict_t *op_ctx = NULL; + uuid_t *txn_id = &global_txn_id; if (!this) { ret = -1; @@ -1509,6 +1796,11 @@ glusterd_brick_op (call_frame_t *frame, xlator_t *this, goto out; } + + ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + list_for_each_entry (pending_node, &opinfo.pending_bricks, list) { dummy_frame = create_frame (this, this->ctx->pool); if (!dummy_frame) @@ -1587,7 +1879,8 @@ glusterd_brick_op (call_frame_t *frame, xlator_t *this, out: if (ret) { - glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, data); + glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, + txn_id, data); opinfo.op_ret = ret; } gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -1615,6 +1908,12 @@ struct rpc_clnt_procedure gd_mgmt_actors[GLUSTERD_MGMT_MAXVALUE] = { [GLUSTERD_MGMT_COMMIT_OP] = {"COMMIT_OP", glusterd_commit_op}, }; +struct rpc_clnt_procedure gd_mgmt_v3_actors[GLUSTERD_MGMT_V3_MAXVALUE] = { + [GLUSTERD_MGMT_V3_NULL] = {"NULL", NULL }, + [GLUSTERD_MGMT_V3_VOLUME_LOCK] = {"VOLUME_LOCK", glusterd_vol_lock}, + [GLUSTERD_MGMT_V3_VOLUME_UNLOCK] = {"VOLUME_UNLOCK", glusterd_vol_unlock}, +}; + struct rpc_clnt_program gd_mgmt_prog = { .progname = "glusterd mgmt", .prognum = GD_MGMT_PROGRAM, @@ -1639,4 +1938,10 @@ struct rpc_clnt_program gd_peer_prog = { .numproc = GLUSTERD_FRIEND_MAXVALUE, }; - +struct rpc_clnt_program gd_mgmt_v3_prog = { + .progname = "glusterd mgmt v3", + .prognum = GD_MGMT_PROGRAM, + .progver = GD_MGMT_V3_VERSION, + .proctable = gd_mgmt_v3_actors, + .numproc = GLUSTERD_MGMT_V3_MAXVALUE, +}; diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.h b/xlators/mgmt/glusterd/src/glusterd-sm.h index e29bb7277..b9bedbe69 100644 --- a/xlators/mgmt/glusterd/src/glusterd-sm.h +++ b/xlators/mgmt/glusterd/src/glusterd-sm.h @@ -97,6 +97,7 @@ struct glusterd_peerinfo_ { struct rpc_clnt *rpc; rpc_clnt_prog_t *mgmt; rpc_clnt_prog_t *peer; + rpc_clnt_prog_t *mgmt_v3; int connected; gf_store_handle_t *shandle; glusterd_sm_tr_log_t sm_log; diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c index c9a1b5a75..5eb5e9f38 100644 --- a/xlators/mgmt/glusterd/src/glusterd-syncop.c +++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c @@ -17,6 +17,9 @@ #include "glusterd.h" #include "glusterd-op-sm.h" #include "glusterd-utils.h" +#include "glusterd-locks.h" + +extern glusterd_op_info_t opinfo; static inline void gd_synctask_barrier_wait (struct syncargs *args, int count) @@ -30,6 +33,62 @@ gd_synctask_barrier_wait (struct syncargs *args, int count) syncbarrier_destroy (&args->barrier); } +static void +gd_mgmt_v3_collate_errors (struct syncargs *args, int op_ret, int op_errno, + char *op_errstr, int op_code, + glusterd_peerinfo_t *peerinfo, u_char *uuid) +{ + char err_str[PATH_MAX] = "Please check log file for details."; + char op_err[PATH_MAX] = ""; + char *peer_str = NULL; + + if (op_ret) { + args->op_ret = op_ret; + args->op_errno = op_errno; + + if (peerinfo) + peer_str = peerinfo->hostname; + else + peer_str = uuid_utoa (uuid); + + if (op_errstr && strcmp (op_errstr, "")) + snprintf (err_str, sizeof(err_str) - 1, + "Error: %s", op_errstr); + + switch (op_code) { + case GLUSTERD_MGMT_V3_VOLUME_LOCK: + { + snprintf (op_err, sizeof(op_err) - 1, + "Locking volume failed " + "on %s. %s", peer_str, err_str); + break; + } + case GLUSTERD_MGMT_V3_VOLUME_UNLOCK: + { + snprintf (op_err, sizeof(op_err) - 1, + "Unlocking volume failed " + "on %s. %s", peer_str, err_str); + break; + } + } + + if (args->errstr) { + snprintf (err_str, sizeof(err_str) - 1, + "%s\n%s", args->errstr, + op_err); + GF_FREE (args->errstr); + args->errstr = NULL; + } else + snprintf (err_str, sizeof(err_str) - 1, + "%s", op_err); + + gf_log ("", GF_LOG_ERROR, "%s", op_err); + args->errstr = gf_strdup (err_str); + } + + return; +} + static void gd_collate_errors (struct syncargs *args, int op_ret, int op_errno, char *op_errstr, int op_code, @@ -207,6 +266,7 @@ out: /* Defined in glusterd-rpc-ops.c */ extern struct rpc_clnt_program gd_mgmt_prog; extern struct rpc_clnt_program gd_brick_prog; +extern struct rpc_clnt_program gd_mgmt_v3_prog; static int glusterd_syncop_aggr_rsp_dict (glusterd_op_t op, dict_t *aggr, dict_t *rsp) @@ -286,6 +346,185 @@ out: return ret; } +int32_t +_gd_syncop_mgmt_volume_lock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_volume_lock_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int op_ret = -1; + int op_errno = -1; + + GF_ASSERT(req); + GF_ASSERT(iov); + GF_ASSERT(myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_volume_lock_rsp); + if (ret < 0) + goto out; + + uuid_copy (args->uuid, rsp.uuid); + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; +out: + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_VOLUME_LOCK, + peerinfo, rsp.uuid); + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_syncop_mgmt_volume_lock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + _gd_syncop_mgmt_volume_lock_cbk); +} + +int +gd_syncop_mgmt_volume_lock (glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid, uuid_t txn_id) +{ + int ret = -1; + gd1_mgmt_volume_lock_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + + GF_ASSERT(op_ctx); + GF_ASSERT(peerinfo); + GF_ASSERT(args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + uuid_copy (req.txn_id, txn_id); + req.op = op; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_VOLUME_LOCK, + gd_syncop_mgmt_volume_lock_cbk, + (xdrproc_t) + xdr_gd1_mgmt_volume_lock_req); + synclock_lock (&conf->big_lock); +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +_gd_syncop_mgmt_volume_unlock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_volume_unlock_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int op_ret = -1; + int op_errno = -1; + + GF_ASSERT(req); + GF_ASSERT(iov); + GF_ASSERT(myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_volume_unlock_rsp); + if (ret < 0) + goto out; + + uuid_copy (args->uuid, rsp.uuid); + + /* Set peer as locked, so we unlock only the locked peers */ + if (rsp.op_ret == 0) + peerinfo->locked = _gf_true; + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; +out: + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_VOLUME_UNLOCK, + peerinfo, rsp.uuid); + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_syncop_mgmt_volume_unlock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + _gd_syncop_mgmt_volume_unlock_cbk); +} + +int +gd_syncop_mgmt_volume_unlock (dict_t *op_ctx, glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid, uuid_t txn_id) +{ + int ret = -1; + gd1_mgmt_volume_unlock_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + + GF_ASSERT(op_ctx); + GF_ASSERT(peerinfo); + GF_ASSERT(args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + uuid_copy (req.txn_id, txn_id); + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_VOLUME_UNLOCK, + gd_syncop_mgmt_volume_unlock_cbk, + (xdrproc_t) + xdr_gd1_mgmt_volume_unlock_req); + synclock_lock (&conf->big_lock); +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + int32_t _gd_syncop_mgmt_lock_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) @@ -830,8 +1069,8 @@ gd_build_peers_list (struct list_head *peers, struct list_head *xact_peers, } int -gd_lock_op_phase (struct list_head *peers, glusterd_op_t op, dict_t *op_ctx, - char **op_errstr, int npeers) +gd_lock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, dict_t *op_ctx, + char **op_errstr, int npeers, uuid_t txn_id) { int ret = -1; int peer_cnt = 0; @@ -839,6 +1078,9 @@ gd_lock_op_phase (struct list_head *peers, glusterd_op_t op, dict_t *op_ctx, xlator_t *this = NULL; glusterd_peerinfo_t *peerinfo = NULL; struct syncargs args = {0}; + struct list_head *peers = NULL; + + peers = &conf->xaction_peers; if (!npeers) { ret = 0; @@ -849,22 +1091,38 @@ gd_lock_op_phase (struct list_head *peers, glusterd_op_t op, dict_t *op_ctx, synctask_barrier_init((&args)); peer_cnt = 0; list_for_each_entry (peerinfo, peers, op_peers_list) { - /* Reset lock status */ - peerinfo->locked = _gf_false; - gd_syncop_mgmt_lock (peerinfo, &args, MY_UUID, peer_uuid); + if (conf->op_version < GD_OP_VERSION_4) { + /* Reset lock status */ + peerinfo->locked = _gf_false; + gd_syncop_mgmt_lock (peerinfo, &args, + MY_UUID, peer_uuid); + } else + gd_syncop_mgmt_volume_lock (op, op_ctx, peerinfo, &args, + MY_UUID, peer_uuid, txn_id); peer_cnt++; } gd_synctask_barrier_wait((&args), peer_cnt); - ret = args.op_ret; - if (ret) { - gf_asprintf (op_errstr, "Another transaction could be " - "in progress. Please try again after " - "sometime."); - gf_log (this->name, GF_LOG_ERROR, "Failed to acquire lock"); - goto out; + + if (args.op_ret) { + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + else { + ret = gf_asprintf (op_errstr, "Another transaction " + "could be in progress. Please try " + "again after sometime."); + if (ret == -1) + *op_errstr = NULL; + + gf_log (this->name, GF_LOG_ERROR, + "Failed to acquire lock"); + + } } - ret = 0; + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent lock op req for 'Volume %s' " + "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret); out: return ret; } @@ -1055,9 +1313,10 @@ out: } int -gd_unlock_op_phase (struct list_head *peers, glusterd_op_t op, int op_ret, +gd_unlock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, int op_ret, rpcsvc_request_t *req, dict_t *op_ctx, char *op_errstr, - int npeers, gf_boolean_t is_locked) + int npeers, char *volname, gf_boolean_t is_acquired, + uuid_t txn_id) { glusterd_peerinfo_t *peerinfo = NULL; glusterd_peerinfo_t *tmp = NULL; @@ -1066,6 +1325,9 @@ gd_unlock_op_phase (struct list_head *peers, glusterd_op_t op, int op_ret, int ret = -1; xlator_t *this = NULL; struct syncargs args = {0}; + struct list_head *peers = NULL; + + peers = &conf->xaction_peers; if (!npeers) { ret = 0; @@ -1074,24 +1336,40 @@ gd_unlock_op_phase (struct list_head *peers, glusterd_op_t op, int op_ret, /* If the lock has not been held during this * transaction, do not send unlock requests */ - if (!is_locked) + if (!is_acquired) goto out; this = THIS; synctask_barrier_init((&args)); peer_cnt = 0; - list_for_each_entry_safe (peerinfo, tmp, peers, op_peers_list) { - /* Only unlock peers that were locked */ - if (peerinfo->locked) { - gd_syncop_mgmt_unlock (peerinfo, &args, MY_UUID, - tmp_uuid); - peer_cnt++; + if (conf->op_version < GD_OP_VERSION_4) { + list_for_each_entry_safe (peerinfo, tmp, peers, op_peers_list) { + /* Only unlock peers that were locked */ + if (peerinfo->locked) { + gd_syncop_mgmt_unlock (peerinfo, &args, + MY_UUID, tmp_uuid); + peer_cnt++; + list_del_init (&peerinfo->op_peers_list); + } + } + } else { + if (volname) { + list_for_each_entry_safe (peerinfo, tmp, + peers, op_peers_list) { + gd_syncop_mgmt_volume_unlock (op_ctx, peerinfo, + &args, MY_UUID, + tmp_uuid, txn_id); + peer_cnt++; + list_del_init (&peerinfo->op_peers_list); + } } - - list_del_init (&peerinfo->op_peers_list); } gd_synctask_barrier_wait((&args), peer_cnt); + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent unlock op req for 'Volume %s' " + "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to unlock " "on some peer(s)"); @@ -1099,10 +1377,24 @@ gd_unlock_op_phase (struct list_head *peers, glusterd_op_t op, int op_ret, out: glusterd_op_send_cli_response (op, op_ret, 0, req, op_ctx, op_errstr); - /* Unlock and reset opinfo.op ONLY if lock ever succeeded! */ - if (is_locked) { + + if (is_acquired) { + /* Based on the op-version, + * we release the cluster or volume lock + * and clear the op */ + glusterd_op_clear_op (op); - glusterd_unlock (MY_UUID); + if (conf->op_version < GD_OP_VERSION_4) + glusterd_unlock (MY_UUID); + else { + if (volname) { + ret = glusterd_volume_unlock (volname, MY_UUID); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to release lock for %s", + volname); + } + } } return 0; @@ -1120,7 +1412,8 @@ gd_get_brick_count (struct list_head *bricks) } int -gd_brick_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, char **op_errstr) +gd_brick_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + char **op_errstr) { glusterd_pending_node_t *pending_node = NULL; struct list_head selected = {0,}; @@ -1192,21 +1485,43 @@ out: void gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req) { - int ret = -1; - int npeers = 0; - dict_t *req_dict = NULL; - glusterd_conf_t *conf = NULL; - glusterd_op_t op = 0; - int32_t tmp_op = 0; - char *op_errstr = NULL; - xlator_t *this = NULL; - gf_boolean_t is_locked = _gf_false; + int ret = -1; + int npeers = 0; + dict_t *req_dict = NULL; + glusterd_conf_t *conf = NULL; + glusterd_op_t op = 0; + int32_t tmp_op = 0; + char *op_errstr = NULL; + char *tmp = NULL; + char *volname = NULL; + xlator_t *this = NULL; + gf_boolean_t is_acquired = _gf_false; + uuid_t *txn_id = NULL; + glusterd_op_info_t txn_opinfo; this = THIS; GF_ASSERT (this); conf = this->private; GF_ASSERT (conf); + /* Generate a transaction-id for this operation and + * save it in the dict */ + ret = glusterd_generate_txn_id (op_ctx, &txn_id); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to generate transaction id"); + goto out; + + } + + /* Save the MY_UUID as the originator_uuid */ + ret = glusterd_set_originator_uuid (op_ctx); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set originator_uuid."); + goto out; + } + ret = dict_get_int32 (op_ctx, GD_SYNC_OPCODE_KEY, &tmp_op); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to get volume " @@ -1215,28 +1530,73 @@ gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req) } op = tmp_op; - ret = glusterd_lock (MY_UUID); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Unable to acquire lock"); - gf_asprintf (&op_errstr, "Another transaction is in progress. " - "Please try again after sometime."); - goto out; + + /* Based on the op_version, acquire a cluster or volume lock */ + if (conf->op_version < GD_OP_VERSION_4) { + ret = glusterd_lock (MY_UUID); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire lock"); + gf_asprintf (&op_errstr, + "Another transaction is in progress. " + "Please try again after sometime."); + goto out; + } + } else { + + /* If no volname is given as a part of the command, locks will + * not be held */ + ret = dict_get_str (op_ctx, "volname", &tmp); + if (ret) { + gf_log ("", GF_LOG_DEBUG, "Failed to get volume " + "name"); + goto local_locking_done; + } else { + /* Use a copy of volname, as cli response will be + * sent before the unlock, and the volname in the + * dict, might be removed */ + volname = gf_strdup (tmp); + if (!volname) + goto out; + } + + ret = glusterd_volume_lock (volname, MY_UUID); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire lock for %s", volname); + gf_asprintf (&op_errstr, + "Another transaction is in progress " + "for %s. Please try again after sometime.", + volname); + goto out; + } } - is_locked = _gf_true; + is_acquired = _gf_true; + +local_locking_done: + + /* Save opinfo for this transaction with the transaction id */ + glusterd_txn_opinfo_init (&txn_opinfo, NULL, &op, NULL, NULL); + ret = glusterd_set_txn_opinfo (txn_id, &txn_opinfo); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to set transaction's opinfo"); + + opinfo = txn_opinfo; - /* storing op globally to access in synctask code paths - * This is still acceptable, as we are performing this under - * the 'cluster' lock*/ - glusterd_op_set_op (op); INIT_LIST_HEAD (&conf->xaction_peers); npeers = gd_build_peers_list (&conf->peers, &conf->xaction_peers, op); - ret = gd_lock_op_phase (&conf->xaction_peers, op, op_ctx, &op_errstr, - npeers); - if (ret) - goto out; + /* If no volname is given as a part of the command, locks will + * not be held */ + if (volname) { + ret = gd_lock_op_phase (conf, op, op_ctx, &op_errstr, + npeers, *txn_id); + if (ret) + goto out; + } ret = glusterd_op_build_payload (&req_dict, &op_errstr, op_ctx); if (ret) { @@ -1263,8 +1623,17 @@ gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req) ret = 0; out: - (void) gd_unlock_op_phase (&conf->xaction_peers, op, ret, req, - op_ctx, op_errstr, npeers, is_locked); + (void) gd_unlock_op_phase (conf, op, ret, req, op_ctx, op_errstr, + npeers, volname, is_acquired, *txn_id); + + /* Clearing the transaction opinfo */ + ret = glusterd_clear_txn_opinfo (txn_id); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to clear transaction's opinfo"); + + if (volname) + GF_FREE (volname); if (req_dict) dict_unref (req_dict); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 78593d14e..ea26c4e47 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -37,6 +37,7 @@ #include "glusterd-pmap.h" #include "glusterfs-acl.h" #include "glusterd-syncop.h" +#include "glusterd-locks.h" #include "xdr-generic.h" #include @@ -7897,7 +7898,7 @@ glusterd_volume_status_copy_to_op_ctx_dict (dict_t *aggr, dict_t *rsp_dict) if (ret) goto out; - if (cmd & GF_CLI_STATUS_ALL && is_origin_glusterd ()) { + if (cmd & GF_CLI_STATUS_ALL && is_origin_glusterd (ctx_dict)) { ret = dict_get_int32 (rsp_dict, "vol_count", &vol_count); if (ret == 0) { ret = dict_set_int32 (ctx_dict, "vol_count", @@ -8788,20 +8789,66 @@ glusterd_handle_node_rsp (dict_t *req_dict, void *pending_entry, return ret; } +int32_t +glusterd_set_originator_uuid (dict_t *dict) +{ + int ret = -1; + uuid_t *originator_uuid = NULL; + + GF_ASSERT (dict); + + originator_uuid = GF_CALLOC (1, sizeof(uuid_t), + gf_common_mt_uuid_t); + if (!originator_uuid) { + ret = -1; + goto out; + } + + uuid_copy (*originator_uuid, MY_UUID); + ret = dict_set_bin (dict, "originator_uuid", + originator_uuid, sizeof (uuid_t)); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Failed to set originator_uuid."); + goto out; + } + +out: + if (ret && originator_uuid) + GF_FREE (originator_uuid); + + return ret; +} + /* Should be used only when an operation is in progress, as that is the only * time a lock_owner is set */ gf_boolean_t -is_origin_glusterd () +is_origin_glusterd (dict_t *dict) { - int ret = 0; - uuid_t lock_owner = {0,}; + gf_boolean_t ret = _gf_false; + uuid_t lock_owner = {0,}; + uuid_t *originator_uuid = NULL; - ret = glusterd_get_lock_owner (&lock_owner); - if (ret) - return _gf_false; + GF_ASSERT (dict); - return (uuid_compare (MY_UUID, lock_owner) == 0); + ret = dict_get_bin (dict, "originator_uuid", + (void **) &originator_uuid); + if (ret) { + /* If not originator_uuid has been set, then the command + * has been originated from a glusterd running on older version + * Hence fetching the lock owner */ + ret = glusterd_get_lock_owner (&lock_owner); + if (ret) { + ret = _gf_false; + goto out; + } + ret = !uuid_compare (MY_UUID, lock_owner); + } else + ret = !uuid_compare (MY_UUID, *originator_uuid); + +out: + return ret; } int diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 6b0d77b9f..cd22b2960 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -529,11 +529,14 @@ glusterd_are_vol_all_peers_up (glusterd_volinfo_t *volinfo, struct list_head *peers, char **down_peerstr); +int32_t +glusterd_set_originator_uuid (dict_t *dict); + /* Should be used only when an operation is in progress, as that is the only * time a lock_owner is set */ gf_boolean_t -is_origin_glusterd (); +is_origin_glusterd (dict_t *dict); gf_boolean_t glusterd_is_quorum_changed (dict_t *options, char *option, char *value); diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index d59aaa44a..9da8a2990 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -36,6 +36,7 @@ #include "glusterd-store.h" #include "glusterd-hooks.h" #include "glusterd-utils.h" +#include "glusterd-locks.h" #include "common-utils.h" #include "run.h" @@ -48,6 +49,7 @@ extern struct rpcsvc_program gluster_cli_getspec_prog; extern struct rpcsvc_program gluster_pmap_prog; extern glusterd_op_info_t opinfo; extern struct rpcsvc_program gd_svc_mgmt_prog; +extern struct rpcsvc_program gd_svc_mgmt_v3_prog; extern struct rpcsvc_program gd_svc_peer_prog; extern struct rpcsvc_program gd_svc_cli_prog; extern struct rpcsvc_program gd_svc_cli_prog_ro; @@ -64,6 +66,7 @@ struct rpcsvc_program *gd_inet_programs[] = { &gd_svc_peer_prog, &gd_svc_cli_prog_ro, &gd_svc_mgmt_prog, + &gd_svc_mgmt_v3_prog, &gluster_pmap_prog, &gluster_handshake_prog, &glusterd_mgmt_hndsk_prog, @@ -1303,6 +1306,8 @@ init (xlator_t *this) glusterd_friend_sm_init (); glusterd_op_sm_init (); glusterd_opinfo_init (); + glusterd_vol_lock_init (); + glusterd_txn_opinfo_dict_init (); ret = glusterd_sm_tr_log_init (&conf->op_sm_log, glusterd_op_sm_state_name_get, glusterd_op_sm_event_name_get, @@ -1422,6 +1427,8 @@ fini (xlator_t *this) if (conf->handle) gf_store_handle_destroy (conf->handle); glusterd_sm_tr_log_delete (&conf->op_sm_log); + glusterd_vol_lock_fini (); + glusterd_txn_opinfo_dict_fini (); GF_FREE (conf); this->private = NULL; diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 9b6e2fb33..e8035c7c6 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -524,6 +524,14 @@ glusterd_op_lock_send_resp (rpcsvc_request_t *req, int32_t status); int glusterd_op_unlock_send_resp (rpcsvc_request_t *req, int32_t status); +int +glusterd_op_volume_lock_send_resp (rpcsvc_request_t *req, + uuid_t *txn_id, int32_t status); + +int +glusterd_op_volume_unlock_send_resp (rpcsvc_request_t *req, + uuid_t *txn_id, int32_t status); + int glusterd_op_stage_send_resp (rpcsvc_request_t *req, int32_t op, int32_t status, @@ -797,4 +805,14 @@ int32_t glusterd_op_begin_synctask (rpcsvc_request_t *req, glusterd_op_t op, void *dict); int32_t glusterd_defrag_event_notify_handle (dict_t *dict); + +int32_t +glusterd_txn_opinfo_dict_init (); + +void +glusterd_txn_opinfo_dict_fini (); + +void +glusterd_txn_opinfo_init (); + #endif -- cgit