summaryrefslogtreecommitdiffstats
path: root/extras/ocf/volume.in
blob: de05373b9e34ae1ce7939e71173d43c1ecf9de35 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
#!/bin/sh
#
# glusterd
#
# Description:  Manages a glusterd server as a (typically cloned)
#               HA resource
#
# Authors:      Florian Haas (hastexo Professional Services GmbH)
#
# License:      GNU General Public License (GPL)

#######################################################################
# Initialization:

: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs

# Convenience variables
# When sysconfdir and localstatedir aren't passed in as
# configure flags, they're defined in terms of prefix
prefix=@prefix@
SHORTHOSTNAME=`hostname -s`
#######################################################################

OCF_RESKEY_binary_default="gluster"

: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}

volume_meta_data() {
    cat <<EOF
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="volume" version="0.1">
  <version>0.1</version>
  <longdesc lang="en">
Manages a GlusterFS volume and monitors its bricks. When a resource of
this type is configured as a clone (as is commonly the case), then it
must have clone ordering enabled.
  </longdesc>
  <shortdesc lang="en">Manages a GlusterFS volume</shortdesc>
  <parameters>
    <parameter name="volname" required="1">
      <longdesc lang="en">
      The name of the volume to manage.
      </longdesc>
      <shortdesc lang="en">volume name</shortdesc>
      <content type="string"/>
    </parameter>
    <parameter name="binary">
      <longdesc lang="en">
      Name of the gluster executable. Specify a full absolute
      path if the binary is not in your \$PATH.
      </longdesc>
      <shortdesc lang="en">gluster executable</shortdesc>
      <content type="string" default="$OCF_RESKEY_binary_default"/>
    </parameter>
  </parameters>
  <actions>
    <action name="start"        timeout="20" />
    <action name="stop"         timeout="20" />
    <action name="monitor"      timeout="20" interval="10" />
    <action name="reload"       timeout="20" />
    <action name="meta-data"    timeout="5" />
    <action name="validate-all"   timeout="20" />
  </actions>
</resource-agent>
EOF

}

volume_getdir() {
    local voldir
    voldir="@sysconfdir@/glusterd/vols/${OCF_RESKEY_volname}"

    [ -d ${voldir} ] || return 1

    echo "${voldir}"
    return 0
}

volume_getpid_dir() {
    local volpid_dir
    volpid_dir="/var/run/gluster/vols/${OCF_RESKEY_volname}"

    [ -d ${volpid_dir} ] || return 1

    echo "${volpid_dir}"
    return 0
}

volume_getbricks() {
    local infofile
    local voldir
    voldir=`volume_getdir`
    infofile="${voldir}/info"

    [ -e ${infofile} ] || return 1

    echo "`sed -n -e "s/^brick-.\+=${SHORTHOSTNAME}://p" < ${infofile}`"
    return 0
}

volume_getpids() {
    local bricks
    local pidfile
    local infofile
    local volpid_dir

    volpid_dir=`volume_getpid_dir`
    bricks=`volume_getbricks`

    for brick in ${bricks}; do
	pidfile="${volpid_dir}/${SHORTHOSTNAME}${brick}.pid"
	[ -e $pidfile ] || return 1
	cat $pidfile
    done

    return 0
}

volume_start() {
    local volume_options

    # exit immediately if configuration is not valid
    volume_validate_all || exit $?

    # if resource is already running, bail out early
    if volume_monitor; then
        ocf_log info "Resource is already running"
        return $OCF_SUCCESS
    fi

    # actually start up the resource here
    ocf_run "$OCF_RESKEY_binary" \
	volume start "$OCF_RESKEY_volname" force || exit $OCF_ERR_GENERIC

    # After the resource has been started, check whether it started up
    # correctly. If the resource starts asynchronously, the agent may
    # spin on the monitor function here -- if the resource does not
    # start up within the defined timeout, the cluster manager will
    # consider the start action failed
    while ! volume_monitor; do
        ocf_log debug "Resource has not started yet, waiting"
        sleep 1
    done

    # only return $OCF_SUCCESS if _everything_ succeeded as expected
    return $OCF_SUCCESS
}

volume_stop() {
    local rc
    local pid

    # exit immediately if configuration is not valid
    volume_validate_all || exit $?

    volume_monitor
    rc=$?
    case "$rc" in
        "$OCF_SUCCESS")
            # Currently running. Normal, expected behavior.
            ocf_log debug "Resource is currently running"
            ;;
        "$OCF_NOT_RUNNING")
            # Currently not running. Nothing to do.
            ocf_log info "Resource is already stopped"
            return $OCF_SUCCESS
            ;;
    esac

    # actually shut down the resource here (make sure to immediately
    # exit with an $OCF_ERR_ error code if anything goes seriously
    # wrong)
    pids=`volume_getpids`
    for pid in $pids; do
	ocf_run kill -s TERM $pid
    done

    # After the resource has been stopped, check whether it shut down
    # correctly. If the resource stops asynchronously, the agent may
    # spin on the monitor function here -- if the resource does not
    # shut down within the defined timeout, the cluster manager will
    # consider the stop action failed
    while volume_monitor; do
        ocf_log debug "Resource has not stopped yet, waiting"
        sleep 1
    done

    # only return $OCF_SUCCESS if _everything_ succeeded as expected
    return $OCF_SUCCESS

}

volume_monitor() {
    local pid

    pids=`volume_getpids` || return $OCF_NOT_RUNNING

    for pid in $pids; do
	ocf_run kill -s 0 $pid || return $OCF_NOT_RUNNING
    done

    ocf_log debug "Local bricks for volume ${OCF_RESKEY_volname} running with PIDs $pids"
    return $OCF_SUCCESS
}

volume_validate_all() {
    # Test for configuration errors first
    if [ -z "${OCF_RESKEY_volname}" ]; then
	ocf_log err 'Missing required parameter "volname"'
	return $OCF_ERR_CONFIGURED
    fi

    # Test for required binaries
    check_binary $OCF_RESKEY_binary

    return $OCF_SUCCESS
}



# Make sure meta-data and usage always succeed
case $__OCF_ACTION in
meta-data)      volume_meta_data
                exit $OCF_SUCCESS
                ;;
usage|help)     volume_usage
                exit $OCF_SUCCESS
                ;;
esac

# Anything other than meta-data and usage must pass validation
volume_validate_all || exit $?

# Translate each action into the appropriate function call
case $__OCF_ACTION in
start)          volume_start;;
stop)           volume_stop;;
status|monitor) volume_monitor;;
reload)         ocf_log info "Reloading..."
                volume_start
                ;;
validate-all)   ;;
notify)		exit $OCF_SUCCESS;;
*)              volume_usage
                exit $OCF_ERR_UNIMPLEMENTED
                ;;
esac
rc=$?

# The resource agent may optionally log a debug message
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION returned $rc"
exit $rc