summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd/src/glusterd-utils.c
diff options
context:
space:
mode:
authorVarun Shastry <vshastry@redhat.com>2013-04-19 12:34:51 +0530
committerKrishnan Parthasarathi <kparthas@redhat.com>2013-08-12 00:48:14 +0530
commit184e88bfc8f9c6b180c56adfd029e2aaece1297f (patch)
tree154de47dd6fe5b3e91201ffde27e7e732381aaa9 /xlators/mgmt/glusterd/src/glusterd-utils.c
parenta1fe3d040a8c9b032cbcb5e831383628cddfa39a (diff)
features/quota: Improvements to quota
Old implementation * Client side implementation of quota - Not secure - Increased traffic in updating the ctx New Implementation * 2 stages of quota implementation is done: Soft and hard quota Upon reaching soft quota limit on the directory it logs/alerts in the quota daemon log (ie DEFAULT_LOG_DIR/quotad.log) and no more writes allowed after hard quota limit. After reaching the soft-limit the daemon alerts the user/admin repeatively for every 'alert-time', which is configurable. * Quota is moved to server-side. There will be 2 quota xlators i. Quota Server It takes care of the enforcing the quota and maintains the context specific to the brick. Since this doesn't have the complete picture of the cluster, cluster wide usage is updated from the quota daemon. This updated context is saved and used for the enforcement. It updates its context by searching the QUOTA_UPDATE_KEY from the dict in the setxattr call, and is updated from nowhere else. The quota is always loaded in the server graph and is by passed if the feature is not enabled. Options specific to quota-server: server-quota - Specifies whether the features is on/off. It is used to by pass the quota if turned off. deem-statfs - If set to on, it takes quota limits into consideration while estimating fs size. (df command) ii. Quota Daemon This is the new xlator introduced with this patch. Its the *gluster client* process with no mount point, started upon enabling quota or restarting the volume. This is a single process for all the volumes in the cluster. Its volfile stored in GLUSTERD_DEFAULT_WORKI_DIR/quotad/quotad.vol. It queries for the sizes on all the bricks, aggregates the size and sends back the updated size, periodically. The timeout between successive updation is configurable and typically/by default more for below-soft-quota usage and less for above-soft-quota usage. It maintains the timeout inside the limit structure based on the usage; below soft limit and above soft limit. There will be thread running per volume which iterates through the list and decides whether the size to be queried in the current iteration based on its timeout. It takes the next iteration time taking the least of the timeouts in the list of entries. Maintains a separate inode table for each volume in the quotad. In the first iteration it builds the table for quota-dirs (dirs on which limit is set) and its components. Options specific to quotad: hard-timeout - Timeout for updation of usage to the quota-server when the usage is crosses the soft-limit. soft-timeout - Timeout for the updation of usage to the quota-server when the usage is below soft-limit. alert-time - Frequency of logging after the usage reached soft limit. Options common to both: default-soft-limit - This is used when individual paths are not configured with soft-limit and default value of this option is 90% of the hard-limit. limit-set - String containing all the limits. Thus in the current implementation we'll have 2 quota xlators: one in server graph and one in trusted client (quota daemon) of which the sole purpose will be to aggregate the quota size xattrs from all the bricks and send the same to server quota xlator. * Changes in glusterd and CLI A single volfile is created for all the volumes, similar to nfs volfile. All files related to quota client (volfile, pid etc) are stored in GLUSTERD_DEFAULT_WORK_DIR/quotad/. The new pattern of the quota limit stores in limit-set = <single-dir-limit>[,<single-dir-limit>] single-dir-limit = <abs-path>:<hard-limit>[:<soft-limit-in-percent>] It also introduces new options: volume quota <VOLNAME> {enable|disable|list [<path> ...]|remove <path>| default-soft-limit <percent>} | volume quota <VOLNAME> {limit-usage <path> <size> |soft-limit <path> <percent>} | volume quota <VOLNAME> {alert-time|soft-timeout|hard-timeout} {<time>} Credit: Raghavendra Bhat <rabhat@redhat.com> Varun Shastry <vshastry@redhat.com> Shishir Gowda <sgowda@redhat.com> Kruthika Dhananjay <kdhananj@redhat.com> Brian Foster <bfoster@redhat.com> Krishnan Parthasarathi <kparthas@redhat.com> Change-Id: I16ec5be0c2faaf42b14034b9ccaf17796adef082 BUG: 969461 Signed-off-by: Varun Shastry <vshastry@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-utils.c')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c216
1 files changed, 206 insertions, 10 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index bde5b9b5..4a2b9454 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -3037,6 +3037,7 @@ glusterd_compare_friend_data (dict_t *vols, int32_t *status, char *hostname)
gf_boolean_t update = _gf_false;
gf_boolean_t stale_nfs = _gf_false;
gf_boolean_t stale_shd = _gf_false;
+ gf_boolean_t stale_qd = _gf_false;
GF_ASSERT (vols);
GF_ASSERT (status);
@@ -3066,6 +3067,8 @@ glusterd_compare_friend_data (dict_t *vols, int32_t *status, char *hostname)
stale_nfs = _gf_true;
if (glusterd_is_nodesvc_running ("glustershd"))
stale_shd = _gf_true;
+ if (glusterd_is_nodesvc_running ("quotad"))
+ stale_qd = _gf_true;
ret = glusterd_import_global_opts (vols);
if (ret)
goto out;
@@ -3079,6 +3082,8 @@ glusterd_compare_friend_data (dict_t *vols, int32_t *status, char *hostname)
glusterd_nfs_server_stop ();
if (stale_shd)
glusterd_shd_stop ();
+ if (stale_qd)
+ glusterd_quotad_stop ();
}
}
@@ -3161,7 +3166,10 @@ glusterd_get_nodesvc_volfile (char *server, char *workdir,
GF_ASSERT (len == PATH_MAX);
glusterd_get_nodesvc_dir (server, workdir, dir, sizeof (dir));
- snprintf (volfile, len, "%s/%s-server.vol", dir, server);
+ if (strcmp ("quotad", server) != 0)
+ snprintf (volfile, len, "%s/%s-server.vol", dir, server);
+ else
+ snprintf (volfile, len, "%s/%s.vol", dir, server);
}
void
@@ -3174,11 +3182,14 @@ glusterd_nodesvc_set_online_status (char *server, gf_boolean_t status)
GF_ASSERT (priv);
GF_ASSERT (priv->shd);
GF_ASSERT (priv->nfs);
+ GF_ASSERT (priv->quotad);
if (!strcmp("glustershd", server))
priv->shd->online = status;
else if (!strcmp ("nfs", server))
priv->nfs->online = status;
+ else if (!strcmp ("quotad", server))
+ priv->quotad->online = status;
}
gf_boolean_t
@@ -3192,11 +3203,14 @@ glusterd_is_nodesvc_online (char *server)
GF_ASSERT (conf);
GF_ASSERT (conf->shd);
GF_ASSERT (conf->nfs);
+ GF_ASSERT (conf->quotad);
if (!strcmp (server, "glustershd"))
online = conf->shd->online;
else if (!strcmp (server, "nfs"))
online = conf->nfs->online;
+ else if (!strcmp (server, "quotad"))
+ online = conf->quotad->online;
return online;
}
@@ -3262,11 +3276,14 @@ glusterd_nodesvc_get_rpc (char *server)
GF_ASSERT (priv);
GF_ASSERT (priv->shd);
GF_ASSERT (priv->nfs);
+ GF_ASSERT (priv->quotad);
if (!strcmp (server, "glustershd"))
rpc = priv->shd->rpc;
else if (!strcmp (server, "nfs"))
rpc = priv->nfs->rpc;
+ else if (!strcmp (server, "quotad"))
+ rpc = priv->quotad->rpc;
return rpc;
}
@@ -3284,11 +3301,14 @@ glusterd_nodesvc_set_rpc (char *server, struct rpc_clnt *rpc)
GF_ASSERT (priv);
GF_ASSERT (priv->shd);
GF_ASSERT (priv->nfs);
+ GF_ASSERT (priv->quotad);
if (!strcmp ("glustershd", server))
priv->shd->rpc = rpc;
else if (!strcmp ("nfs", server))
priv->nfs->rpc = rpc;
+ else if (!strcmp ("quotad", server))
+ priv->quotad->rpc = rpc;
return ret;
}
@@ -3415,6 +3435,14 @@ glusterd_nodesvc_start (char *server)
runner_add_args (&runner, "--xlator-option",
glusterd_uuid_option, NULL);
}
+ if (!strcmp (server, "quotad")) {
+ runner_add_args (&runner, "--xlator-option",
+ "*replicate*.data-self-heal=off",
+ "--xlator-option",
+ "*replicate*.metadata-self-heal=off",
+ "--xlator-option",
+ "*replicate*.entry-self-heal=off", NULL);
+ }
runner_log (&runner, "", GF_LOG_DEBUG,
"Starting the nfs/glustershd services");
@@ -3438,6 +3466,12 @@ glusterd_shd_start ()
return glusterd_nodesvc_start ("glustershd");
}
+int
+glusterd_quotad_start ()
+{
+ return glusterd_nodesvc_start ("quotad");
+}
+
gf_boolean_t
glusterd_is_nodesvc_running (char *server)
{
@@ -3556,6 +3590,12 @@ glusterd_shd_stop ()
}
int
+glusterd_quotad_stop ()
+{
+ return glusterd_nodesvc_stop ("quotad", SIGTERM);
+}
+
+int
glusterd_add_node_to_dict (char *server, dict_t *dict, int count,
dict_t *vol_opts)
{
@@ -3705,6 +3745,12 @@ glusterd_reconfigure_shd ()
}
int
+glusterd_reconfigure_quotad ()
+{
+ return glusterd_reconfigure_nodesvc (glusterd_create_quotad_volfile);
+}
+
+int
glusterd_reconfigure_nfs ()
{
int ret = -1;
@@ -3751,21 +3797,54 @@ glusterd_check_generate_start_shd ()
}
int
-glusterd_nodesvcs_batch_op (glusterd_volinfo_t *volinfo,
- int (*nfs_op) (), int (*shd_op) ())
+glusterd_check_generate_start_quotad ()
{
+ int ret = 0;
+
+ ret = glusterd_check_generate_start_service (glusterd_create_quotad_volfile,
+ glusterd_quotad_stop,
+ glusterd_quotad_start);
+ if (ret == -EINVAL)
+ ret = 0;
+ return ret;
+}
+
+int
+glusterd_nodesvcs_batch_op (glusterd_volinfo_t *volinfo, int (*nfs_op) (),
+ int (*shd_op) (), int (*qd_op) ())
+ {
int ret = 0;
+ xlator_t *this = THIS;
+ glusterd_conf_t *conf = NULL;
+
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
ret = nfs_op ();
if (ret)
goto out;
if (volinfo && !glusterd_is_volume_replicate (volinfo))
- goto out;
+ goto quotad_op;
ret = shd_op ();
if (ret)
goto out;
+
+quotad_op:
+
+ if (conf->op_version == GD_OP_VERSION_MIN)
+ goto out;
+
+ if (is_origin_glusterd ()) {
+ if (volinfo && !glusterd_is_volume_quota_enabled (volinfo))
+ goto out;
+ ret = qd_op ();
+ if (ret)
+ goto out;
+ }
+
out:
return ret;
}
@@ -3775,7 +3854,8 @@ glusterd_nodesvcs_start (glusterd_volinfo_t *volinfo)
{
return glusterd_nodesvcs_batch_op (volinfo,
glusterd_nfs_server_start,
- glusterd_shd_start);
+ glusterd_shd_start,
+ glusterd_quotad_start);
}
int
@@ -3783,7 +3863,8 @@ glusterd_nodesvcs_stop (glusterd_volinfo_t *volinfo)
{
return glusterd_nodesvcs_batch_op (volinfo,
glusterd_nfs_server_stop,
- glusterd_shd_stop);
+ glusterd_shd_stop,
+ glusterd_quotad_stop);
}
gf_boolean_t
@@ -3829,21 +3910,53 @@ glusterd_all_replicate_volumes_stopped ()
return _gf_true;
}
+gf_boolean_t
+glusterd_all_volumes_with_quota_stopped ()
+{
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *voliter = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ list_for_each_entry (voliter, &priv->volumes, vol_list) {
+ if (!glusterd_is_volume_quota_enabled (voliter))
+ continue;
+ if (voliter->status == GLUSTERD_STATUS_STARTED)
+ return _gf_false;
+ }
+
+ return _gf_true;
+}
+
+
int
glusterd_nodesvcs_handle_graph_change (glusterd_volinfo_t *volinfo)
{
int (*shd_op) () = NULL;
int (*nfs_op) () = NULL;
+ int (*qd_op) () = NULL;
shd_op = glusterd_check_generate_start_shd;
nfs_op = glusterd_check_generate_start_nfs;
+ qd_op = glusterd_check_generate_start_quotad;
if (glusterd_are_all_volumes_stopped ()) {
shd_op = glusterd_shd_stop;
nfs_op = glusterd_nfs_server_stop;
- } else if (glusterd_all_replicate_volumes_stopped()) {
- shd_op = glusterd_shd_stop;
+ qd_op = glusterd_quotad_stop;
+ } else {
+ if (glusterd_all_replicate_volumes_stopped()) {
+ shd_op = glusterd_shd_stop;
+ }
+ if (glusterd_all_volumes_with_quota_stopped ()) {
+ qd_op = glusterd_quotad_stop;
+ }
}
- return glusterd_nodesvcs_batch_op (volinfo, nfs_op, shd_op);
+
+ return glusterd_nodesvcs_batch_op (volinfo, nfs_op, shd_op, qd_op);
}
int
@@ -3851,7 +3964,8 @@ glusterd_nodesvcs_handle_reconfigure (glusterd_volinfo_t *volinfo)
{
return glusterd_nodesvcs_batch_op (volinfo,
glusterd_reconfigure_nfs,
- glusterd_reconfigure_shd);
+ glusterd_reconfigure_shd,
+ glusterd_reconfigure_quotad);
}
int
@@ -5878,6 +5992,82 @@ out:
return ret;
}
+int
+glusterd_quotad_statedump (char *options, int option_cnt, char **op_errstr)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char pidfile_path[PATH_MAX] = {0,};
+ char path[PATH_MAX] = {0,};
+ FILE *pidfile = NULL;
+ pid_t pid = -1;
+ char dumpoptions_path[PATH_MAX] = {0,};
+ char *option = NULL;
+ char *tmpptr = NULL;
+ char *dup_options = NULL;
+ char msg[256] = {0,};
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ dup_options = gf_strdup (options);
+ option = strtok_r (dup_options, " ", &tmpptr);
+ if (strcmp (option, "quotad")) {
+ snprintf (msg, sizeof (msg), "for quotad statedump, options "
+ "should be after the key 'quotad'");
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+
+ GLUSTERD_GET_QUOTAD_DIR (path, conf);
+ GLUSTERD_GET_QUOTAD_PIDFILE (pidfile_path, path);
+
+ pidfile = fopen (pidfile_path, "r");
+ if (!pidfile) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to open pidfile: %s",
+ pidfile_path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = fscanf (pidfile, "%d", &pid);
+ if (ret <= 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get pid of quotad "
+ "process");
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (dumpoptions_path, sizeof (dumpoptions_path),
+ DEFAULT_VAR_RUN_DIRECTORY"/glusterdump.%d.options", pid);
+ ret = glusterd_set_dump_options (dumpoptions_path, options, option_cnt);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "error while parsing "
+ "statedump options");
+ ret = -1;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_INFO, "Performing statedump on quotad with "
+ "pid %d", pid);
+
+ kill (pid, SIGUSR1);
+
+ sleep (1);
+
+ ret = 0;
+out:
+ if (pidfile)
+ fclose (pidfile);
+ unlink (dumpoptions_path);
+ GF_FREE (dup_options);
+ return ret;
+}
+
/* Checks if the given peer contains all the bricks belonging to the
* given volume. Returns true if it does else returns false
*/
@@ -7619,3 +7809,9 @@ out:
gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
+
+int
+glusterd_is_volume_quota_enabled (glusterd_volinfo_t *volinfo)
+{
+ return (glusterd_volinfo_get_boolean (volinfo, VKEY_FEATURES_QUOTA));
+}